3 年之前 · fe26e80e43
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,4 @@ Builds/x64/Debug/Praxis3D.exp
 
				 Builds/x64/Debug/Praxis3D.lib
			
 
				 *.sarif
			
 
				 Praxis3D/x64/Debug/CodeAnalysisResultManifest.txt
			
 
				+.vs/
			
--- a/Builds/x64/Debug/Praxis3D.exe
+++ b/Builds/x64/Debug/Praxis3D.exe
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h
@@ -0,0 +1,38 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_BROADPHASE_CALLBACK_H
			
 
				+#define B3_BROADPHASE_CALLBACK_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+struct b3BroadphaseProxy;
			
 
				+
			
 
				+struct b3BroadphaseAabbCallback
			
 
				+{
			
 
				+	virtual ~b3BroadphaseAabbCallback() {}
			
 
				+	virtual bool process(const b3BroadphaseProxy* proxy) = 0;
			
 
				+};
			
 
				+
			
 
				+struct b3BroadphaseRayCallback : public b3BroadphaseAabbCallback
			
 
				+{
			
 
				+	///added some cached data to accelerate ray-AABB tests
			
 
				+	b3Vector3 m_rayDirectionInverse;
			
 
				+	unsigned int m_signs[3];
			
 
				+	b3Scalar m_lambda_max;
			
 
				+
			
 
				+	virtual ~b3BroadphaseRayCallback() {}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_BROADPHASE_CALLBACK_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp
@@ -0,0 +1,1352 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+///b3DynamicBvh implementation by Nathanael Presson
			
 
				+
			
 
				+#include "b3DynamicBvh.h"
			
 
				+
			
 
				+//
			
 
				+typedef b3AlignedObjectArray<b3DbvtNode*> b3NodeArray;
			
 
				+typedef b3AlignedObjectArray<const b3DbvtNode*> b3ConstNodeArray;
			
 
				+
			
 
				+//
			
 
				+struct b3DbvtNodeEnumerator : b3DynamicBvh::ICollide
			
 
				+{
			
 
				+	b3ConstNodeArray nodes;
			
 
				+	void Process(const b3DbvtNode* n) { nodes.push_back(n); }
			
 
				+};
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE int b3IndexOf(const b3DbvtNode* node)
			
 
				+{
			
 
				+	return (node->parent->childs[1] == node);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE b3DbvtVolume b3Merge(const b3DbvtVolume& a,
			
 
				+										   const b3DbvtVolume& b)
			
 
				+{
			
 
				+#if (B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE)
			
 
				+	B3_ATTRIBUTE_ALIGNED16(char locals[sizeof(b3DbvtAabbMm)]);
			
 
				+	b3DbvtVolume& res = *(b3DbvtVolume*)locals;
			
 
				+#else
			
 
				+	b3DbvtVolume res;
			
 
				+#endif
			
 
				+	b3Merge(a, b, res);
			
 
				+	return (res);
			
 
				+}
			
 
				+
			
 
				+// volume+edge lengths
			
 
				+static B3_DBVT_INLINE b3Scalar b3Size(const b3DbvtVolume& a)
			
 
				+{
			
 
				+	const b3Vector3 edges = a.Lengths();
			
 
				+	return (edges.x * edges.y * edges.z +
			
 
				+			edges.x + edges.y + edges.z);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static void b3GetMaxDepth(const b3DbvtNode* node, int depth, int& maxdepth)
			
 
				+{
			
 
				+	if (node->isinternal())
			
 
				+	{
			
 
				+		b3GetMaxDepth(node->childs[0], depth + 1, maxdepth);
			
 
				+		b3GetMaxDepth(node->childs[1], depth + 1, maxdepth);
			
 
				+	}
			
 
				+	else
			
 
				+		maxdepth = b3Max(maxdepth, depth);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE void b3DeleteNode(b3DynamicBvh* pdbvt,
			
 
				+										b3DbvtNode* node)
			
 
				+{
			
 
				+	b3AlignedFree(pdbvt->m_free);
			
 
				+	pdbvt->m_free = node;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static void b3RecurseDeleteNode(b3DynamicBvh* pdbvt,
			
 
				+								b3DbvtNode* node)
			
 
				+{
			
 
				+	if (!node->isleaf())
			
 
				+	{
			
 
				+		b3RecurseDeleteNode(pdbvt, node->childs[0]);
			
 
				+		b3RecurseDeleteNode(pdbvt, node->childs[1]);
			
 
				+	}
			
 
				+	if (node == pdbvt->m_root) pdbvt->m_root = 0;
			
 
				+	b3DeleteNode(pdbvt, node);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt,
			
 
				+											   b3DbvtNode* parent,
			
 
				+											   void* data)
			
 
				+{
			
 
				+	b3DbvtNode* node;
			
 
				+	if (pdbvt->m_free)
			
 
				+	{
			
 
				+		node = pdbvt->m_free;
			
 
				+		pdbvt->m_free = 0;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		node = new (b3AlignedAlloc(sizeof(b3DbvtNode), 16)) b3DbvtNode();
			
 
				+	}
			
 
				+	node->parent = parent;
			
 
				+	node->data = data;
			
 
				+	node->childs[1] = 0;
			
 
				+	return (node);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt,
			
 
				+											   b3DbvtNode* parent,
			
 
				+											   const b3DbvtVolume& volume,
			
 
				+											   void* data)
			
 
				+{
			
 
				+	b3DbvtNode* node = b3CreateNode(pdbvt, parent, data);
			
 
				+	node->volume = volume;
			
 
				+	return (node);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE b3DbvtNode* b3CreateNode(b3DynamicBvh* pdbvt,
			
 
				+											   b3DbvtNode* parent,
			
 
				+											   const b3DbvtVolume& volume0,
			
 
				+											   const b3DbvtVolume& volume1,
			
 
				+											   void* data)
			
 
				+{
			
 
				+	b3DbvtNode* node = b3CreateNode(pdbvt, parent, data);
			
 
				+	b3Merge(volume0, volume1, node->volume);
			
 
				+	return (node);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static void b3InsertLeaf(b3DynamicBvh* pdbvt,
			
 
				+						 b3DbvtNode* root,
			
 
				+						 b3DbvtNode* leaf)
			
 
				+{
			
 
				+	if (!pdbvt->m_root)
			
 
				+	{
			
 
				+		pdbvt->m_root = leaf;
			
 
				+		leaf->parent = 0;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		if (!root->isleaf())
			
 
				+		{
			
 
				+			do
			
 
				+			{
			
 
				+				root = root->childs[b3Select(leaf->volume,
			
 
				+											 root->childs[0]->volume,
			
 
				+											 root->childs[1]->volume)];
			
 
				+			} while (!root->isleaf());
			
 
				+		}
			
 
				+		b3DbvtNode* prev = root->parent;
			
 
				+		b3DbvtNode* node = b3CreateNode(pdbvt, prev, leaf->volume, root->volume, 0);
			
 
				+		if (prev)
			
 
				+		{
			
 
				+			prev->childs[b3IndexOf(root)] = node;
			
 
				+			node->childs[0] = root;
			
 
				+			root->parent = node;
			
 
				+			node->childs[1] = leaf;
			
 
				+			leaf->parent = node;
			
 
				+			do
			
 
				+			{
			
 
				+				if (!prev->volume.Contain(node->volume))
			
 
				+					b3Merge(prev->childs[0]->volume, prev->childs[1]->volume, prev->volume);
			
 
				+				else
			
 
				+					break;
			
 
				+				node = prev;
			
 
				+			} while (0 != (prev = node->parent));
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			node->childs[0] = root;
			
 
				+			root->parent = node;
			
 
				+			node->childs[1] = leaf;
			
 
				+			leaf->parent = node;
			
 
				+			pdbvt->m_root = node;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static b3DbvtNode* b3RemoveLeaf(b3DynamicBvh* pdbvt,
			
 
				+								b3DbvtNode* leaf)
			
 
				+{
			
 
				+	if (leaf == pdbvt->m_root)
			
 
				+	{
			
 
				+		pdbvt->m_root = 0;
			
 
				+		return (0);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3DbvtNode* parent = leaf->parent;
			
 
				+		b3DbvtNode* prev = parent->parent;
			
 
				+		b3DbvtNode* sibling = parent->childs[1 - b3IndexOf(leaf)];
			
 
				+		if (prev)
			
 
				+		{
			
 
				+			prev->childs[b3IndexOf(parent)] = sibling;
			
 
				+			sibling->parent = prev;
			
 
				+			b3DeleteNode(pdbvt, parent);
			
 
				+			while (prev)
			
 
				+			{
			
 
				+				const b3DbvtVolume pb = prev->volume;
			
 
				+				b3Merge(prev->childs[0]->volume, prev->childs[1]->volume, prev->volume);
			
 
				+				if (b3NotEqual(pb, prev->volume))
			
 
				+				{
			
 
				+					prev = prev->parent;
			
 
				+				}
			
 
				+				else
			
 
				+					break;
			
 
				+			}
			
 
				+			return (prev ? prev : pdbvt->m_root);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			pdbvt->m_root = sibling;
			
 
				+			sibling->parent = 0;
			
 
				+			b3DeleteNode(pdbvt, parent);
			
 
				+			return (pdbvt->m_root);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static void b3FetchLeaves(b3DynamicBvh* pdbvt,
			
 
				+						  b3DbvtNode* root,
			
 
				+						  b3NodeArray& leaves,
			
 
				+						  int depth = -1)
			
 
				+{
			
 
				+	if (root->isinternal() && depth)
			
 
				+	{
			
 
				+		b3FetchLeaves(pdbvt, root->childs[0], leaves, depth - 1);
			
 
				+		b3FetchLeaves(pdbvt, root->childs[1], leaves, depth - 1);
			
 
				+		b3DeleteNode(pdbvt, root);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		leaves.push_back(root);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static bool b3LeftOfAxis(const b3DbvtNode* node,
			
 
				+						 const b3Vector3& org,
			
 
				+						 const b3Vector3& axis)
			
 
				+{
			
 
				+	return b3Dot(axis, node->volume.Center() - org) <= 0;
			
 
				+}
			
 
				+
			
 
				+// Partitions leaves such that leaves[0, n) are on the
			
 
				+// left of axis, and leaves[n, count) are on the right
			
 
				+// of axis. returns N.
			
 
				+static int b3Split(b3DbvtNode** leaves,
			
 
				+				   int count,
			
 
				+				   const b3Vector3& org,
			
 
				+				   const b3Vector3& axis)
			
 
				+{
			
 
				+	int begin = 0;
			
 
				+	int end = count;
			
 
				+	for (;;)
			
 
				+	{
			
 
				+		while (begin != end && b3LeftOfAxis(leaves[begin], org, axis))
			
 
				+		{
			
 
				+			++begin;
			
 
				+		}
			
 
				+
			
 
				+		if (begin == end)
			
 
				+		{
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		while (begin != end && !b3LeftOfAxis(leaves[end - 1], org, axis))
			
 
				+		{
			
 
				+			--end;
			
 
				+		}
			
 
				+
			
 
				+		if (begin == end)
			
 
				+		{
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		// swap out of place nodes
			
 
				+		--end;
			
 
				+		b3DbvtNode* temp = leaves[begin];
			
 
				+		leaves[begin] = leaves[end];
			
 
				+		leaves[end] = temp;
			
 
				+		++begin;
			
 
				+	}
			
 
				+
			
 
				+	return begin;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static b3DbvtVolume b3Bounds(b3DbvtNode** leaves,
			
 
				+							 int count)
			
 
				+{
			
 
				+#if B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE
			
 
				+	B3_ATTRIBUTE_ALIGNED16(char locals[sizeof(b3DbvtVolume)]);
			
 
				+	b3DbvtVolume& volume = *(b3DbvtVolume*)locals;
			
 
				+	volume = leaves[0]->volume;
			
 
				+#else
			
 
				+	b3DbvtVolume volume = leaves[0]->volume;
			
 
				+#endif
			
 
				+	for (int i = 1, ni = count; i < ni; ++i)
			
 
				+	{
			
 
				+		b3Merge(volume, leaves[i]->volume, volume);
			
 
				+	}
			
 
				+	return (volume);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static void b3BottomUp(b3DynamicBvh* pdbvt,
			
 
				+					   b3DbvtNode** leaves,
			
 
				+					   int count)
			
 
				+{
			
 
				+	while (count > 1)
			
 
				+	{
			
 
				+		b3Scalar minsize = B3_INFINITY;
			
 
				+		int minidx[2] = {-1, -1};
			
 
				+		for (int i = 0; i < count; ++i)
			
 
				+		{
			
 
				+			for (int j = i + 1; j < count; ++j)
			
 
				+			{
			
 
				+				const b3Scalar sz = b3Size(b3Merge(leaves[i]->volume, leaves[j]->volume));
			
 
				+				if (sz < minsize)
			
 
				+				{
			
 
				+					minsize = sz;
			
 
				+					minidx[0] = i;
			
 
				+					minidx[1] = j;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		b3DbvtNode* n[] = {leaves[minidx[0]], leaves[minidx[1]]};
			
 
				+		b3DbvtNode* p = b3CreateNode(pdbvt, 0, n[0]->volume, n[1]->volume, 0);
			
 
				+		p->childs[0] = n[0];
			
 
				+		p->childs[1] = n[1];
			
 
				+		n[0]->parent = p;
			
 
				+		n[1]->parent = p;
			
 
				+		leaves[minidx[0]] = p;
			
 
				+		leaves[minidx[1]] = leaves[count - 1];
			
 
				+		--count;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static b3DbvtNode* b3TopDown(b3DynamicBvh* pdbvt,
			
 
				+							 b3DbvtNode** leaves,
			
 
				+							 int count,
			
 
				+							 int bu_treshold)
			
 
				+{
			
 
				+	static const b3Vector3 axis[] = {b3MakeVector3(1, 0, 0),
			
 
				+									 b3MakeVector3(0, 1, 0),
			
 
				+									 b3MakeVector3(0, 0, 1)};
			
 
				+	b3Assert(bu_treshold > 1);
			
 
				+	if (count > 1)
			
 
				+	{
			
 
				+		if (count > bu_treshold)
			
 
				+		{
			
 
				+			const b3DbvtVolume vol = b3Bounds(leaves, count);
			
 
				+			const b3Vector3 org = vol.Center();
			
 
				+			int partition;
			
 
				+			int bestaxis = -1;
			
 
				+			int bestmidp = count;
			
 
				+			int splitcount[3][2] = {{0, 0}, {0, 0}, {0, 0}};
			
 
				+			int i;
			
 
				+			for (i = 0; i < count; ++i)
			
 
				+			{
			
 
				+				const b3Vector3 x = leaves[i]->volume.Center() - org;
			
 
				+				for (int j = 0; j < 3; ++j)
			
 
				+				{
			
 
				+					++splitcount[j][b3Dot(x, axis[j]) > 0 ? 1 : 0];
			
 
				+				}
			
 
				+			}
			
 
				+			for (i = 0; i < 3; ++i)
			
 
				+			{
			
 
				+				if ((splitcount[i][0] > 0) && (splitcount[i][1] > 0))
			
 
				+				{
			
 
				+					const int midp = (int)b3Fabs(b3Scalar(splitcount[i][0] - splitcount[i][1]));
			
 
				+					if (midp < bestmidp)
			
 
				+					{
			
 
				+						bestaxis = i;
			
 
				+						bestmidp = midp;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			if (bestaxis >= 0)
			
 
				+			{
			
 
				+				partition = b3Split(leaves, count, org, axis[bestaxis]);
			
 
				+				b3Assert(partition != 0 && partition != count);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				partition = count / 2 + 1;
			
 
				+			}
			
 
				+			b3DbvtNode* node = b3CreateNode(pdbvt, 0, vol, 0);
			
 
				+			node->childs[0] = b3TopDown(pdbvt, &leaves[0], partition, bu_treshold);
			
 
				+			node->childs[1] = b3TopDown(pdbvt, &leaves[partition], count - partition, bu_treshold);
			
 
				+			node->childs[0]->parent = node;
			
 
				+			node->childs[1]->parent = node;
			
 
				+			return (node);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			b3BottomUp(pdbvt, leaves, count);
			
 
				+			return (leaves[0]);
			
 
				+		}
			
 
				+	}
			
 
				+	return (leaves[0]);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+static B3_DBVT_INLINE b3DbvtNode* b3Sort(b3DbvtNode* n, b3DbvtNode*& r)
			
 
				+{
			
 
				+	b3DbvtNode* p = n->parent;
			
 
				+	b3Assert(n->isinternal());
			
 
				+	if (p > n)
			
 
				+	{
			
 
				+		const int i = b3IndexOf(n);
			
 
				+		const int j = 1 - i;
			
 
				+		b3DbvtNode* s = p->childs[j];
			
 
				+		b3DbvtNode* q = p->parent;
			
 
				+		b3Assert(n == p->childs[i]);
			
 
				+		if (q)
			
 
				+			q->childs[b3IndexOf(p)] = n;
			
 
				+		else
			
 
				+			r = n;
			
 
				+		s->parent = n;
			
 
				+		p->parent = n;
			
 
				+		n->parent = q;
			
 
				+		p->childs[0] = n->childs[0];
			
 
				+		p->childs[1] = n->childs[1];
			
 
				+		n->childs[0]->parent = p;
			
 
				+		n->childs[1]->parent = p;
			
 
				+		n->childs[i] = p;
			
 
				+		n->childs[j] = s;
			
 
				+		b3Swap(p->volume, n->volume);
			
 
				+		return (p);
			
 
				+	}
			
 
				+	return (n);
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+static B3_DBVT_INLINE b3DbvtNode*	walkup(b3DbvtNode* n,int count)
			
 
				+{
			
 
				+	while(n&&(count--)) n=n->parent;
			
 
				+	return(n);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+// Api
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+b3DynamicBvh::b3DynamicBvh()
			
 
				+{
			
 
				+	m_root = 0;
			
 
				+	m_free = 0;
			
 
				+	m_lkhd = -1;
			
 
				+	m_leaves = 0;
			
 
				+	m_opath = 0;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+b3DynamicBvh::~b3DynamicBvh()
			
 
				+{
			
 
				+	clear();
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::clear()
			
 
				+{
			
 
				+	if (m_root)
			
 
				+		b3RecurseDeleteNode(this, m_root);
			
 
				+	b3AlignedFree(m_free);
			
 
				+	m_free = 0;
			
 
				+	m_lkhd = -1;
			
 
				+	m_stkStack.clear();
			
 
				+	m_opath = 0;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::optimizeBottomUp()
			
 
				+{
			
 
				+	if (m_root)
			
 
				+	{
			
 
				+		b3NodeArray leaves;
			
 
				+		leaves.reserve(m_leaves);
			
 
				+		b3FetchLeaves(this, m_root, leaves);
			
 
				+		b3BottomUp(this, &leaves[0], leaves.size());
			
 
				+		m_root = leaves[0];
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::optimizeTopDown(int bu_treshold)
			
 
				+{
			
 
				+	if (m_root)
			
 
				+	{
			
 
				+		b3NodeArray leaves;
			
 
				+		leaves.reserve(m_leaves);
			
 
				+		b3FetchLeaves(this, m_root, leaves);
			
 
				+		m_root = b3TopDown(this, &leaves[0], leaves.size(), bu_treshold);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::optimizeIncremental(int passes)
			
 
				+{
			
 
				+	if (passes < 0) passes = m_leaves;
			
 
				+	if (m_root && (passes > 0))
			
 
				+	{
			
 
				+		do
			
 
				+		{
			
 
				+			b3DbvtNode* node = m_root;
			
 
				+			unsigned bit = 0;
			
 
				+			while (node->isinternal())
			
 
				+			{
			
 
				+				node = b3Sort(node, m_root)->childs[(m_opath >> bit) & 1];
			
 
				+				bit = (bit + 1) & (sizeof(unsigned) * 8 - 1);
			
 
				+			}
			
 
				+			update(node);
			
 
				+			++m_opath;
			
 
				+		} while (--passes);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+b3DbvtNode* b3DynamicBvh::insert(const b3DbvtVolume& volume, void* data)
			
 
				+{
			
 
				+	b3DbvtNode* leaf = b3CreateNode(this, 0, volume, data);
			
 
				+	b3InsertLeaf(this, m_root, leaf);
			
 
				+	++m_leaves;
			
 
				+	return (leaf);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::update(b3DbvtNode* leaf, int lookahead)
			
 
				+{
			
 
				+	b3DbvtNode* root = b3RemoveLeaf(this, leaf);
			
 
				+	if (root)
			
 
				+	{
			
 
				+		if (lookahead >= 0)
			
 
				+		{
			
 
				+			for (int i = 0; (i < lookahead) && root->parent; ++i)
			
 
				+			{
			
 
				+				root = root->parent;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+			root = m_root;
			
 
				+	}
			
 
				+	b3InsertLeaf(this, root, leaf);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume)
			
 
				+{
			
 
				+	b3DbvtNode* root = b3RemoveLeaf(this, leaf);
			
 
				+	if (root)
			
 
				+	{
			
 
				+		if (m_lkhd >= 0)
			
 
				+		{
			
 
				+			for (int i = 0; (i < m_lkhd) && root->parent; ++i)
			
 
				+			{
			
 
				+				root = root->parent;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+			root = m_root;
			
 
				+	}
			
 
				+	leaf->volume = volume;
			
 
				+	b3InsertLeaf(this, root, leaf);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity, b3Scalar margin)
			
 
				+{
			
 
				+	if (leaf->volume.Contain(volume)) return (false);
			
 
				+	volume.Expand(b3MakeVector3(margin, margin, margin));
			
 
				+	volume.SignedExpand(velocity);
			
 
				+	update(leaf, volume);
			
 
				+	return (true);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity)
			
 
				+{
			
 
				+	if (leaf->volume.Contain(volume)) return (false);
			
 
				+	volume.SignedExpand(velocity);
			
 
				+	update(leaf, volume);
			
 
				+	return (true);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+bool b3DynamicBvh::update(b3DbvtNode* leaf, b3DbvtVolume& volume, b3Scalar margin)
			
 
				+{
			
 
				+	if (leaf->volume.Contain(volume)) return (false);
			
 
				+	volume.Expand(b3MakeVector3(margin, margin, margin));
			
 
				+	update(leaf, volume);
			
 
				+	return (true);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::remove(b3DbvtNode* leaf)
			
 
				+{
			
 
				+	b3RemoveLeaf(this, leaf);
			
 
				+	b3DeleteNode(this, leaf);
			
 
				+	--m_leaves;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::write(IWriter* iwriter) const
			
 
				+{
			
 
				+	b3DbvtNodeEnumerator nodes;
			
 
				+	nodes.nodes.reserve(m_leaves * 2);
			
 
				+	enumNodes(m_root, nodes);
			
 
				+	iwriter->Prepare(m_root, nodes.nodes.size());
			
 
				+	for (int i = 0; i < nodes.nodes.size(); ++i)
			
 
				+	{
			
 
				+		const b3DbvtNode* n = nodes.nodes[i];
			
 
				+		int p = -1;
			
 
				+		if (n->parent) p = nodes.nodes.findLinearSearch(n->parent);
			
 
				+		if (n->isinternal())
			
 
				+		{
			
 
				+			const int c0 = nodes.nodes.findLinearSearch(n->childs[0]);
			
 
				+			const int c1 = nodes.nodes.findLinearSearch(n->childs[1]);
			
 
				+			iwriter->WriteNode(n, i, p, c0, c1);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			iwriter->WriteLeaf(n, i, p);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::clone(b3DynamicBvh& dest, IClone* iclone) const
			
 
				+{
			
 
				+	dest.clear();
			
 
				+	if (m_root != 0)
			
 
				+	{
			
 
				+		b3AlignedObjectArray<sStkCLN> stack;
			
 
				+		stack.reserve(m_leaves);
			
 
				+		stack.push_back(sStkCLN(m_root, 0));
			
 
				+		do
			
 
				+		{
			
 
				+			const int i = stack.size() - 1;
			
 
				+			const sStkCLN e = stack[i];
			
 
				+			b3DbvtNode* n = b3CreateNode(&dest, e.parent, e.node->volume, e.node->data);
			
 
				+			stack.pop_back();
			
 
				+			if (e.parent != 0)
			
 
				+				e.parent->childs[i & 1] = n;
			
 
				+			else
			
 
				+				dest.m_root = n;
			
 
				+			if (e.node->isinternal())
			
 
				+			{
			
 
				+				stack.push_back(sStkCLN(e.node->childs[0], n));
			
 
				+				stack.push_back(sStkCLN(e.node->childs[1], n));
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				iclone->CloneLeaf(n);
			
 
				+			}
			
 
				+		} while (stack.size() > 0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+int b3DynamicBvh::maxdepth(const b3DbvtNode* node)
			
 
				+{
			
 
				+	int depth = 0;
			
 
				+	if (node) b3GetMaxDepth(node, 1, depth);
			
 
				+	return (depth);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+int b3DynamicBvh::countLeaves(const b3DbvtNode* node)
			
 
				+{
			
 
				+	if (node->isinternal())
			
 
				+		return (countLeaves(node->childs[0]) + countLeaves(node->childs[1]));
			
 
				+	else
			
 
				+		return (1);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvh::extractLeaves(const b3DbvtNode* node, b3AlignedObjectArray<const b3DbvtNode*>& leaves)
			
 
				+{
			
 
				+	if (node->isinternal())
			
 
				+	{
			
 
				+		extractLeaves(node->childs[0], leaves);
			
 
				+		extractLeaves(node->childs[1], leaves);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		leaves.push_back(node);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+#if B3_DBVT_ENABLE_BENCHMARK
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+/*
			
 
				+q6600,2.4ghz
			
 
				+
			
 
				+/Ox /Ob2 /Oi /Ot /I "." /I "..\.." /I "..\..\src" /D "NDEBUG" /D "_LIB" /D "_WINDOWS" /D "_CRT_SECURE_NO_DEPRECATE" /D "_CRT_NONSTDC_NO_DEPRECATE" /D "WIN32"
			
 
				+/GF /FD /MT /GS- /Gy /arch:SSE2 /Zc:wchar_t- /Fp"..\..\out\release8\build\libbulletcollision\libbulletcollision.pch"
			
 
				+/Fo"..\..\out\release8\build\libbulletcollision\\"
			
 
				+/Fd"..\..\out\release8\build\libbulletcollision\bulletcollision.pdb"
			
 
				+/W3 /nologo /c /Wp64 /Zi /errorReport:prompt
			
 
				+
			
 
				+Benchmarking dbvt...
			
 
				+World scale: 100.000000
			
 
				+Extents base: 1.000000
			
 
				+Extents range: 4.000000
			
 
				+Leaves: 8192
			
 
				+sizeof(b3DbvtVolume): 32 bytes
			
 
				+sizeof(b3DbvtNode):   44 bytes
			
 
				+[1] b3DbvtVolume intersections: 3499 ms (-1%)
			
 
				+[2] b3DbvtVolume merges: 1934 ms (0%)
			
 
				+[3] b3DynamicBvh::collideTT: 5485 ms (-21%)
			
 
				+[4] b3DynamicBvh::collideTT self: 2814 ms (-20%)
			
 
				+[5] b3DynamicBvh::collideTT xform: 7379 ms (-1%)
			
 
				+[6] b3DynamicBvh::collideTT xform,self: 7270 ms (-2%)
			
 
				+[7] b3DynamicBvh::rayTest: 6314 ms (0%),(332143 r/s)
			
 
				+[8] insert/remove: 2093 ms (0%),(1001983 ir/s)
			
 
				+[9] updates (teleport): 1879 ms (-3%),(1116100 u/s)
			
 
				+[10] updates (jitter): 1244 ms (-4%),(1685813 u/s)
			
 
				+[11] optimize (incremental): 2514 ms (0%),(1668000 o/s)
			
 
				+[12] b3DbvtVolume notequal: 3659 ms (0%)
			
 
				+[13] culling(OCL+fullsort): 2218 ms (0%),(461 t/s)
			
 
				+[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
			
 
				+[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
			
 
				+[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
			
 
				+[17] b3DbvtVolume select: 3419 ms (0%)
			
 
				+*/
			
 
				+
			
 
				+struct b3DbvtBenchmark
			
 
				+{
			
 
				+	struct NilPolicy : b3DynamicBvh::ICollide
			
 
				+	{
			
 
				+		NilPolicy() : m_pcount(0), m_depth(-B3_INFINITY), m_checksort(true) {}
			
 
				+		void Process(const b3DbvtNode*, const b3DbvtNode*) { ++m_pcount; }
			
 
				+		void Process(const b3DbvtNode*) { ++m_pcount; }
			
 
				+		void Process(const b3DbvtNode*, b3Scalar depth)
			
 
				+		{
			
 
				+			++m_pcount;
			
 
				+			if (m_checksort)
			
 
				+			{
			
 
				+				if (depth >= m_depth)
			
 
				+					m_depth = depth;
			
 
				+				else
			
 
				+					printf("wrong depth: %f (should be >= %f)\r\n", depth, m_depth);
			
 
				+			}
			
 
				+		}
			
 
				+		int m_pcount;
			
 
				+		b3Scalar m_depth;
			
 
				+		bool m_checksort;
			
 
				+	};
			
 
				+	struct P14 : b3DynamicBvh::ICollide
			
 
				+	{
			
 
				+		struct Node
			
 
				+		{
			
 
				+			const b3DbvtNode* leaf;
			
 
				+			b3Scalar depth;
			
 
				+		};
			
 
				+		void Process(const b3DbvtNode* leaf, b3Scalar depth)
			
 
				+		{
			
 
				+			Node n;
			
 
				+			n.leaf = leaf;
			
 
				+			n.depth = depth;
			
 
				+		}
			
 
				+		static int sortfnc(const Node& a, const Node& b)
			
 
				+		{
			
 
				+			if (a.depth < b.depth) return (+1);
			
 
				+			if (a.depth > b.depth) return (-1);
			
 
				+			return (0);
			
 
				+		}
			
 
				+		b3AlignedObjectArray<Node> m_nodes;
			
 
				+	};
			
 
				+	struct P15 : b3DynamicBvh::ICollide
			
 
				+	{
			
 
				+		struct Node
			
 
				+		{
			
 
				+			const b3DbvtNode* leaf;
			
 
				+			b3Scalar depth;
			
 
				+		};
			
 
				+		void Process(const b3DbvtNode* leaf)
			
 
				+		{
			
 
				+			Node n;
			
 
				+			n.leaf = leaf;
			
 
				+			n.depth = dot(leaf->volume.Center(), m_axis);
			
 
				+		}
			
 
				+		static int sortfnc(const Node& a, const Node& b)
			
 
				+		{
			
 
				+			if (a.depth < b.depth) return (+1);
			
 
				+			if (a.depth > b.depth) return (-1);
			
 
				+			return (0);
			
 
				+		}
			
 
				+		b3AlignedObjectArray<Node> m_nodes;
			
 
				+		b3Vector3 m_axis;
			
 
				+	};
			
 
				+	static b3Scalar RandUnit()
			
 
				+	{
			
 
				+		return (rand() / (b3Scalar)RAND_MAX);
			
 
				+	}
			
 
				+	static b3Vector3 RandVector3()
			
 
				+	{
			
 
				+		return (b3Vector3(RandUnit(), RandUnit(), RandUnit()));
			
 
				+	}
			
 
				+	static b3Vector3 RandVector3(b3Scalar cs)
			
 
				+	{
			
 
				+		return (RandVector3() * cs - b3Vector3(cs, cs, cs) / 2);
			
 
				+	}
			
 
				+	static b3DbvtVolume RandVolume(b3Scalar cs, b3Scalar eb, b3Scalar es)
			
 
				+	{
			
 
				+		return (b3DbvtVolume::FromCE(RandVector3(cs), b3Vector3(eb, eb, eb) + RandVector3() * es));
			
 
				+	}
			
 
				+	static b3Transform RandTransform(b3Scalar cs)
			
 
				+	{
			
 
				+		b3Transform t;
			
 
				+		t.setOrigin(RandVector3(cs));
			
 
				+		t.setRotation(b3Quaternion(RandUnit() * B3_PI * 2, RandUnit() * B3_PI * 2, RandUnit() * B3_PI * 2).normalized());
			
 
				+		return (t);
			
 
				+	}
			
 
				+	static void RandTree(b3Scalar cs, b3Scalar eb, b3Scalar es, int leaves, b3DynamicBvh& dbvt)
			
 
				+	{
			
 
				+		dbvt.clear();
			
 
				+		for (int i = 0; i < leaves; ++i)
			
 
				+		{
			
 
				+			dbvt.insert(RandVolume(cs, eb, es), 0);
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+void b3DynamicBvh::benchmark()
			
 
				+{
			
 
				+	static const b3Scalar cfgVolumeCenterScale = 100;
			
 
				+	static const b3Scalar cfgVolumeExentsBase = 1;
			
 
				+	static const b3Scalar cfgVolumeExentsScale = 4;
			
 
				+	static const int cfgLeaves = 8192;
			
 
				+	static const bool cfgEnable = true;
			
 
				+
			
 
				+	//[1] b3DbvtVolume intersections
			
 
				+	bool cfgBenchmark1_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark1_Iterations = 8;
			
 
				+	static const int cfgBenchmark1_Reference = 3499;
			
 
				+	//[2] b3DbvtVolume merges
			
 
				+	bool cfgBenchmark2_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark2_Iterations = 4;
			
 
				+	static const int cfgBenchmark2_Reference = 1945;
			
 
				+	//[3] b3DynamicBvh::collideTT
			
 
				+	bool cfgBenchmark3_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark3_Iterations = 512;
			
 
				+	static const int cfgBenchmark3_Reference = 5485;
			
 
				+	//[4] b3DynamicBvh::collideTT self
			
 
				+	bool cfgBenchmark4_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark4_Iterations = 512;
			
 
				+	static const int cfgBenchmark4_Reference = 2814;
			
 
				+	//[5] b3DynamicBvh::collideTT xform
			
 
				+	bool cfgBenchmark5_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark5_Iterations = 512;
			
 
				+	static const b3Scalar cfgBenchmark5_OffsetScale = 2;
			
 
				+	static const int cfgBenchmark5_Reference = 7379;
			
 
				+	//[6] b3DynamicBvh::collideTT xform,self
			
 
				+	bool cfgBenchmark6_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark6_Iterations = 512;
			
 
				+	static const b3Scalar cfgBenchmark6_OffsetScale = 2;
			
 
				+	static const int cfgBenchmark6_Reference = 7270;
			
 
				+	//[7] b3DynamicBvh::rayTest
			
 
				+	bool cfgBenchmark7_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark7_Passes = 32;
			
 
				+	static const int cfgBenchmark7_Iterations = 65536;
			
 
				+	static const int cfgBenchmark7_Reference = 6307;
			
 
				+	//[8] insert/remove
			
 
				+	bool cfgBenchmark8_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark8_Passes = 32;
			
 
				+	static const int cfgBenchmark8_Iterations = 65536;
			
 
				+	static const int cfgBenchmark8_Reference = 2105;
			
 
				+	//[9] updates (teleport)
			
 
				+	bool cfgBenchmark9_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark9_Passes = 32;
			
 
				+	static const int cfgBenchmark9_Iterations = 65536;
			
 
				+	static const int cfgBenchmark9_Reference = 1879;
			
 
				+	//[10] updates (jitter)
			
 
				+	bool cfgBenchmark10_Enable = cfgEnable;
			
 
				+	static const b3Scalar cfgBenchmark10_Scale = cfgVolumeCenterScale / 10000;
			
 
				+	static const int cfgBenchmark10_Passes = 32;
			
 
				+	static const int cfgBenchmark10_Iterations = 65536;
			
 
				+	static const int cfgBenchmark10_Reference = 1244;
			
 
				+	//[11] optimize (incremental)
			
 
				+	bool cfgBenchmark11_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark11_Passes = 64;
			
 
				+	static const int cfgBenchmark11_Iterations = 65536;
			
 
				+	static const int cfgBenchmark11_Reference = 2510;
			
 
				+	//[12] b3DbvtVolume notequal
			
 
				+	bool cfgBenchmark12_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark12_Iterations = 32;
			
 
				+	static const int cfgBenchmark12_Reference = 3677;
			
 
				+	//[13] culling(OCL+fullsort)
			
 
				+	bool cfgBenchmark13_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark13_Iterations = 1024;
			
 
				+	static const int cfgBenchmark13_Reference = 2231;
			
 
				+	//[14] culling(OCL+qsort)
			
 
				+	bool cfgBenchmark14_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark14_Iterations = 8192;
			
 
				+	static const int cfgBenchmark14_Reference = 3500;
			
 
				+	//[15] culling(KDOP+qsort)
			
 
				+	bool cfgBenchmark15_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark15_Iterations = 8192;
			
 
				+	static const int cfgBenchmark15_Reference = 1151;
			
 
				+	//[16] insert/remove batch
			
 
				+	bool cfgBenchmark16_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark16_BatchCount = 256;
			
 
				+	static const int cfgBenchmark16_Passes = 16384;
			
 
				+	static const int cfgBenchmark16_Reference = 5138;
			
 
				+	//[17] select
			
 
				+	bool cfgBenchmark17_Enable = cfgEnable;
			
 
				+	static const int cfgBenchmark17_Iterations = 4;
			
 
				+	static const int cfgBenchmark17_Reference = 3390;
			
 
				+
			
 
				+	b3Clock wallclock;
			
 
				+	printf("Benchmarking dbvt...\r\n");
			
 
				+	printf("\tWorld scale: %f\r\n", cfgVolumeCenterScale);
			
 
				+	printf("\tExtents base: %f\r\n", cfgVolumeExentsBase);
			
 
				+	printf("\tExtents range: %f\r\n", cfgVolumeExentsScale);
			
 
				+	printf("\tLeaves: %u\r\n", cfgLeaves);
			
 
				+	printf("\tsizeof(b3DbvtVolume): %u bytes\r\n", sizeof(b3DbvtVolume));
			
 
				+	printf("\tsizeof(b3DbvtNode):   %u bytes\r\n", sizeof(b3DbvtNode));
			
 
				+	if (cfgBenchmark1_Enable)
			
 
				+	{  // Benchmark 1
			
 
				+		srand(380843);
			
 
				+		b3AlignedObjectArray<b3DbvtVolume> volumes;
			
 
				+		b3AlignedObjectArray<bool> results;
			
 
				+		volumes.resize(cfgLeaves);
			
 
				+		results.resize(cfgLeaves);
			
 
				+		for (int i = 0; i < cfgLeaves; ++i)
			
 
				+		{
			
 
				+			volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
			
 
				+		}
			
 
				+		printf("[1] b3DbvtVolume intersections: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark1_Iterations; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgLeaves; ++j)
			
 
				+			{
			
 
				+				for (int k = 0; k < cfgLeaves; ++k)
			
 
				+				{
			
 
				+					results[k] = Intersect(volumes[j], volumes[k]);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark1_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark2_Enable)
			
 
				+	{  // Benchmark 2
			
 
				+		srand(380843);
			
 
				+		b3AlignedObjectArray<b3DbvtVolume> volumes;
			
 
				+		b3AlignedObjectArray<b3DbvtVolume> results;
			
 
				+		volumes.resize(cfgLeaves);
			
 
				+		results.resize(cfgLeaves);
			
 
				+		for (int i = 0; i < cfgLeaves; ++i)
			
 
				+		{
			
 
				+			volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
			
 
				+		}
			
 
				+		printf("[2] b3DbvtVolume merges: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark2_Iterations; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgLeaves; ++j)
			
 
				+			{
			
 
				+				for (int k = 0; k < cfgLeaves; ++k)
			
 
				+				{
			
 
				+					Merge(volumes[j], volumes[k], results[k]);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark2_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark3_Enable)
			
 
				+	{  // Benchmark 3
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt[2];
			
 
				+		b3DbvtBenchmark::NilPolicy policy;
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]);
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]);
			
 
				+		dbvt[0].optimizeTopDown();
			
 
				+		dbvt[1].optimizeTopDown();
			
 
				+		printf("[3] b3DynamicBvh::collideTT: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark3_Iterations; ++i)
			
 
				+		{
			
 
				+			b3DynamicBvh::collideTT(dbvt[0].m_root, dbvt[1].m_root, policy);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark3_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark4_Enable)
			
 
				+	{  // Benchmark 4
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3DbvtBenchmark::NilPolicy policy;
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		printf("[4] b3DynamicBvh::collideTT self: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark4_Iterations; ++i)
			
 
				+		{
			
 
				+			b3DynamicBvh::collideTT(dbvt.m_root, dbvt.m_root, policy);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark4_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark5_Enable)
			
 
				+	{  // Benchmark 5
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt[2];
			
 
				+		b3AlignedObjectArray<b3Transform> transforms;
			
 
				+		b3DbvtBenchmark::NilPolicy policy;
			
 
				+		transforms.resize(cfgBenchmark5_Iterations);
			
 
				+		for (int i = 0; i < transforms.size(); ++i)
			
 
				+		{
			
 
				+			transforms[i] = b3DbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark5_OffsetScale);
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]);
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]);
			
 
				+		dbvt[0].optimizeTopDown();
			
 
				+		dbvt[1].optimizeTopDown();
			
 
				+		printf("[5] b3DynamicBvh::collideTT xform: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark5_Iterations; ++i)
			
 
				+		{
			
 
				+			b3DynamicBvh::collideTT(dbvt[0].m_root, dbvt[1].m_root, transforms[i], policy);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark5_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark6_Enable)
			
 
				+	{  // Benchmark 6
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<b3Transform> transforms;
			
 
				+		b3DbvtBenchmark::NilPolicy policy;
			
 
				+		transforms.resize(cfgBenchmark6_Iterations);
			
 
				+		for (int i = 0; i < transforms.size(); ++i)
			
 
				+		{
			
 
				+			transforms[i] = b3DbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark6_OffsetScale);
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		printf("[6] b3DynamicBvh::collideTT xform,self: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark6_Iterations; ++i)
			
 
				+		{
			
 
				+			b3DynamicBvh::collideTT(dbvt.m_root, dbvt.m_root, transforms[i], policy);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark6_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark7_Enable)
			
 
				+	{  // Benchmark 7
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<b3Vector3> rayorg;
			
 
				+		b3AlignedObjectArray<b3Vector3> raydir;
			
 
				+		b3DbvtBenchmark::NilPolicy policy;
			
 
				+		rayorg.resize(cfgBenchmark7_Iterations);
			
 
				+		raydir.resize(cfgBenchmark7_Iterations);
			
 
				+		for (int i = 0; i < rayorg.size(); ++i)
			
 
				+		{
			
 
				+			rayorg[i] = b3DbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2);
			
 
				+			raydir[i] = b3DbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2);
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		printf("[7] b3DynamicBvh::rayTest: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark7_Passes; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgBenchmark7_Iterations; ++j)
			
 
				+			{
			
 
				+				b3DynamicBvh::rayTest(dbvt.m_root, rayorg[j], rayorg[j] + raydir[j], policy);
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		unsigned rays = cfgBenchmark7_Passes * cfgBenchmark7_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u r/s)\r\n", time, (time - cfgBenchmark7_Reference) * 100 / time, (rays * 1000) / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark8_Enable)
			
 
				+	{  // Benchmark 8
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		printf("[8] insert/remove: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark8_Passes; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgBenchmark8_Iterations; ++j)
			
 
				+			{
			
 
				+				dbvt.remove(dbvt.insert(b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0));
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int ir = cfgBenchmark8_Passes * cfgBenchmark8_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u ir/s)\r\n", time, (time - cfgBenchmark8_Reference) * 100 / time, ir * 1000 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark9_Enable)
			
 
				+	{  // Benchmark 9
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*> leaves;
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		dbvt.extractLeaves(dbvt.m_root, leaves);
			
 
				+		printf("[9] updates (teleport): ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark9_Passes; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgBenchmark9_Iterations; ++j)
			
 
				+			{
			
 
				+				dbvt.update(const_cast<b3DbvtNode*>(leaves[rand() % cfgLeaves]),
			
 
				+							b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale));
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int up = cfgBenchmark9_Passes * cfgBenchmark9_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark9_Reference) * 100 / time, up * 1000 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark10_Enable)
			
 
				+	{  // Benchmark 10
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*> leaves;
			
 
				+		b3AlignedObjectArray<b3Vector3> vectors;
			
 
				+		vectors.resize(cfgBenchmark10_Iterations);
			
 
				+		for (int i = 0; i < vectors.size(); ++i)
			
 
				+		{
			
 
				+			vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)) * cfgBenchmark10_Scale;
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		dbvt.extractLeaves(dbvt.m_root, leaves);
			
 
				+		printf("[10] updates (jitter): ");
			
 
				+		wallclock.reset();
			
 
				+
			
 
				+		for (int i = 0; i < cfgBenchmark10_Passes; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgBenchmark10_Iterations; ++j)
			
 
				+			{
			
 
				+				const b3Vector3& d = vectors[j];
			
 
				+				b3DbvtNode* l = const_cast<b3DbvtNode*>(leaves[rand() % cfgLeaves]);
			
 
				+				b3DbvtVolume v = b3DbvtVolume::FromMM(l->volume.Mins() + d, l->volume.Maxs() + d);
			
 
				+				dbvt.update(l, v);
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int up = cfgBenchmark10_Passes * cfgBenchmark10_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark10_Reference) * 100 / time, up * 1000 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark11_Enable)
			
 
				+	{  // Benchmark 11
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		printf("[11] optimize (incremental): ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark11_Passes; ++i)
			
 
				+		{
			
 
				+			dbvt.optimizeIncremental(cfgBenchmark11_Iterations);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int op = cfgBenchmark11_Passes * cfgBenchmark11_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u o/s)\r\n", time, (time - cfgBenchmark11_Reference) * 100 / time, op / time * 1000);
			
 
				+	}
			
 
				+	if (cfgBenchmark12_Enable)
			
 
				+	{  // Benchmark 12
			
 
				+		srand(380843);
			
 
				+		b3AlignedObjectArray<b3DbvtVolume> volumes;
			
 
				+		b3AlignedObjectArray<bool> results;
			
 
				+		volumes.resize(cfgLeaves);
			
 
				+		results.resize(cfgLeaves);
			
 
				+		for (int i = 0; i < cfgLeaves; ++i)
			
 
				+		{
			
 
				+			volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
			
 
				+		}
			
 
				+		printf("[12] b3DbvtVolume notequal: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark12_Iterations; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgLeaves; ++j)
			
 
				+			{
			
 
				+				for (int k = 0; k < cfgLeaves; ++k)
			
 
				+				{
			
 
				+					results[k] = NotEqual(volumes[j], volumes[k]);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark12_Reference) * 100 / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark13_Enable)
			
 
				+	{  // Benchmark 13
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<b3Vector3> vectors;
			
 
				+		b3DbvtBenchmark::NilPolicy policy;
			
 
				+		vectors.resize(cfgBenchmark13_Iterations);
			
 
				+		for (int i = 0; i < vectors.size(); ++i)
			
 
				+		{
			
 
				+			vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized();
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		printf("[13] culling(OCL+fullsort): ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark13_Iterations; ++i)
			
 
				+		{
			
 
				+			static const b3Scalar offset = 0;
			
 
				+			policy.m_depth = -B3_INFINITY;
			
 
				+			dbvt.collideOCL(dbvt.m_root, &vectors[i], &offset, vectors[i], 1, policy);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int t = cfgBenchmark13_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark13_Reference) * 100 / time, (t * 1000) / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark14_Enable)
			
 
				+	{  // Benchmark 14
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<b3Vector3> vectors;
			
 
				+		b3DbvtBenchmark::P14 policy;
			
 
				+		vectors.resize(cfgBenchmark14_Iterations);
			
 
				+		for (int i = 0; i < vectors.size(); ++i)
			
 
				+		{
			
 
				+			vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized();
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		policy.m_nodes.reserve(cfgLeaves);
			
 
				+		printf("[14] culling(OCL+qsort): ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark14_Iterations; ++i)
			
 
				+		{
			
 
				+			static const b3Scalar offset = 0;
			
 
				+			policy.m_nodes.resize(0);
			
 
				+			dbvt.collideOCL(dbvt.m_root, &vectors[i], &offset, vectors[i], 1, policy, false);
			
 
				+			policy.m_nodes.quickSort(b3DbvtBenchmark::P14::sortfnc);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int t = cfgBenchmark14_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark14_Reference) * 100 / time, (t * 1000) / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark15_Enable)
			
 
				+	{  // Benchmark 15
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<b3Vector3> vectors;
			
 
				+		b3DbvtBenchmark::P15 policy;
			
 
				+		vectors.resize(cfgBenchmark15_Iterations);
			
 
				+		for (int i = 0; i < vectors.size(); ++i)
			
 
				+		{
			
 
				+			vectors[i] = (b3DbvtBenchmark::RandVector3() * 2 - b3Vector3(1, 1, 1)).normalized();
			
 
				+		}
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		policy.m_nodes.reserve(cfgLeaves);
			
 
				+		printf("[15] culling(KDOP+qsort): ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark15_Iterations; ++i)
			
 
				+		{
			
 
				+			static const b3Scalar offset = 0;
			
 
				+			policy.m_nodes.resize(0);
			
 
				+			policy.m_axis = vectors[i];
			
 
				+			dbvt.collideKDOP(dbvt.m_root, &vectors[i], &offset, 1, policy);
			
 
				+			policy.m_nodes.quickSort(b3DbvtBenchmark::P15::sortfnc);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int t = cfgBenchmark15_Iterations;
			
 
				+		printf("%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark15_Reference) * 100 / time, (t * 1000) / time);
			
 
				+	}
			
 
				+	if (cfgBenchmark16_Enable)
			
 
				+	{  // Benchmark 16
			
 
				+		srand(380843);
			
 
				+		b3DynamicBvh dbvt;
			
 
				+		b3AlignedObjectArray<b3DbvtNode*> batch;
			
 
				+		b3DbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
			
 
				+		dbvt.optimizeTopDown();
			
 
				+		batch.reserve(cfgBenchmark16_BatchCount);
			
 
				+		printf("[16] insert/remove batch(%u): ", cfgBenchmark16_BatchCount);
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark16_Passes; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgBenchmark16_BatchCount; ++j)
			
 
				+			{
			
 
				+				batch.push_back(dbvt.insert(b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0));
			
 
				+			}
			
 
				+			for (int j = 0; j < cfgBenchmark16_BatchCount; ++j)
			
 
				+			{
			
 
				+				dbvt.remove(batch[j]);
			
 
				+			}
			
 
				+			batch.resize(0);
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		const int ir = cfgBenchmark16_Passes * cfgBenchmark16_BatchCount;
			
 
				+		printf("%u ms (%i%%),(%u bir/s)\r\n", time, (time - cfgBenchmark16_Reference) * 100 / time, int(ir * 1000.0 / time));
			
 
				+	}
			
 
				+	if (cfgBenchmark17_Enable)
			
 
				+	{  // Benchmark 17
			
 
				+		srand(380843);
			
 
				+		b3AlignedObjectArray<b3DbvtVolume> volumes;
			
 
				+		b3AlignedObjectArray<int> results;
			
 
				+		b3AlignedObjectArray<int> indices;
			
 
				+		volumes.resize(cfgLeaves);
			
 
				+		results.resize(cfgLeaves);
			
 
				+		indices.resize(cfgLeaves);
			
 
				+		for (int i = 0; i < cfgLeaves; ++i)
			
 
				+		{
			
 
				+			indices[i] = i;
			
 
				+			volumes[i] = b3DbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
			
 
				+		}
			
 
				+		for (int i = 0; i < cfgLeaves; ++i)
			
 
				+		{
			
 
				+			b3Swap(indices[i], indices[rand() % cfgLeaves]);
			
 
				+		}
			
 
				+		printf("[17] b3DbvtVolume select: ");
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < cfgBenchmark17_Iterations; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < cfgLeaves; ++j)
			
 
				+			{
			
 
				+				for (int k = 0; k < cfgLeaves; ++k)
			
 
				+				{
			
 
				+					const int idx = indices[k];
			
 
				+					results[idx] = Select(volumes[idx], volumes[j], volumes[k]);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		const int time = (int)wallclock.getTimeMilliseconds();
			
 
				+		printf("%u ms (%i%%)\r\n", time, (time - cfgBenchmark17_Reference) * 100 / time);
			
 
				+	}
			
 
				+	printf("\r\n\r\n");
			
 
				+}
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h
@@ -0,0 +1,1332 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+///b3DynamicBvh implementation by Nathanael Presson
			
 
				+
			
 
				+#ifndef B3_DYNAMIC_BOUNDING_VOLUME_TREE_H
			
 
				+#define B3_DYNAMIC_BOUNDING_VOLUME_TREE_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Geometry/b3AabbUtil.h"
			
 
				+
			
 
				+//
			
 
				+// Compile time configuration
			
 
				+//
			
 
				+
			
 
				+// Implementation profiles
			
 
				+#define B3_DBVT_IMPL_GENERIC 0  // Generic implementation
			
 
				+#define B3_DBVT_IMPL_SSE 1      // SSE
			
 
				+
			
 
				+// Template implementation of ICollide
			
 
				+#ifdef _WIN32
			
 
				+#if (defined(_MSC_VER) && _MSC_VER >= 1400)
			
 
				+#define B3_DBVT_USE_TEMPLATE 1
			
 
				+#else
			
 
				+#define B3_DBVT_USE_TEMPLATE 0
			
 
				+#endif
			
 
				+#else
			
 
				+#define B3_DBVT_USE_TEMPLATE 0
			
 
				+#endif
			
 
				+
			
 
				+// Use only intrinsics instead of inline asm
			
 
				+#define B3_DBVT_USE_INTRINSIC_SSE 1
			
 
				+
			
 
				+// Using memmov for collideOCL
			
 
				+#define B3_DBVT_USE_MEMMOVE 1
			
 
				+
			
 
				+// Enable benchmarking code
			
 
				+#define B3_DBVT_ENABLE_BENCHMARK 0
			
 
				+
			
 
				+// Inlining
			
 
				+#define B3_DBVT_INLINE B3_FORCE_INLINE
			
 
				+
			
 
				+// Specific methods implementation
			
 
				+
			
 
				+//SSE gives errors on a MSVC 7.1
			
 
				+#if defined(B3_USE_SSE)  //&& defined (_WIN32)
			
 
				+#define B3_DBVT_SELECT_IMPL B3_DBVT_IMPL_SSE
			
 
				+#define B3_DBVT_MERGE_IMPL B3_DBVT_IMPL_SSE
			
 
				+#define B3_DBVT_INT0_IMPL B3_DBVT_IMPL_SSE
			
 
				+#else
			
 
				+#define B3_DBVT_SELECT_IMPL B3_DBVT_IMPL_GENERIC
			
 
				+#define B3_DBVT_MERGE_IMPL B3_DBVT_IMPL_GENERIC
			
 
				+#define B3_DBVT_INT0_IMPL B3_DBVT_IMPL_GENERIC
			
 
				+#endif
			
 
				+
			
 
				+#if (B3_DBVT_SELECT_IMPL == B3_DBVT_IMPL_SSE) || \
			
 
				+	(B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE) ||  \
			
 
				+	(B3_DBVT_INT0_IMPL == B3_DBVT_IMPL_SSE)
			
 
				+#include <emmintrin.h>
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+// Auto config and checks
			
 
				+//
			
 
				+
			
 
				+#if B3_DBVT_USE_TEMPLATE
			
 
				+#define B3_DBVT_VIRTUAL
			
 
				+#define B3_DBVT_VIRTUAL_DTOR(a)
			
 
				+#define B3_DBVT_PREFIX template <typename T>
			
 
				+#define B3_DBVT_IPOLICY T& policy
			
 
				+#define B3_DBVT_CHECKTYPE                        \
			
 
				+	static const ICollide& typechecker = *(T*)1; \
			
 
				+	(void)typechecker;
			
 
				+#else
			
 
				+#define B3_DBVT_VIRTUAL_DTOR(a) \
			
 
				+	virtual ~a() {}
			
 
				+#define B3_DBVT_VIRTUAL virtual
			
 
				+#define B3_DBVT_PREFIX
			
 
				+#define B3_DBVT_IPOLICY ICollide& policy
			
 
				+#define B3_DBVT_CHECKTYPE
			
 
				+#endif
			
 
				+
			
 
				+#if B3_DBVT_USE_MEMMOVE
			
 
				+#if !defined(__CELLOS_LV2__) && !defined(__MWERKS__)
			
 
				+#include <memory.h>
			
 
				+#endif
			
 
				+#include <string.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_USE_TEMPLATE
			
 
				+#error "B3_DBVT_USE_TEMPLATE undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_USE_MEMMOVE
			
 
				+#error "B3_DBVT_USE_MEMMOVE undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_ENABLE_BENCHMARK
			
 
				+#error "B3_DBVT_ENABLE_BENCHMARK undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_SELECT_IMPL
			
 
				+#error "B3_DBVT_SELECT_IMPL undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_MERGE_IMPL
			
 
				+#error "B3_DBVT_MERGE_IMPL undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_INT0_IMPL
			
 
				+#error "B3_DBVT_INT0_IMPL undefined"
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+// Defaults volumes
			
 
				+//
			
 
				+
			
 
				+/* b3DbvtAabbMm			*/
			
 
				+struct b3DbvtAabbMm
			
 
				+{
			
 
				+	B3_DBVT_INLINE b3Vector3 Center() const { return ((mi + mx) / 2); }
			
 
				+	B3_DBVT_INLINE b3Vector3 Lengths() const { return (mx - mi); }
			
 
				+	B3_DBVT_INLINE b3Vector3 Extents() const { return ((mx - mi) / 2); }
			
 
				+	B3_DBVT_INLINE const b3Vector3& Mins() const { return (mi); }
			
 
				+	B3_DBVT_INLINE const b3Vector3& Maxs() const { return (mx); }
			
 
				+	static inline b3DbvtAabbMm FromCE(const b3Vector3& c, const b3Vector3& e);
			
 
				+	static inline b3DbvtAabbMm FromCR(const b3Vector3& c, b3Scalar r);
			
 
				+	static inline b3DbvtAabbMm FromMM(const b3Vector3& mi, const b3Vector3& mx);
			
 
				+	static inline b3DbvtAabbMm FromPoints(const b3Vector3* pts, int n);
			
 
				+	static inline b3DbvtAabbMm FromPoints(const b3Vector3** ppts, int n);
			
 
				+	B3_DBVT_INLINE void Expand(const b3Vector3& e);
			
 
				+	B3_DBVT_INLINE void SignedExpand(const b3Vector3& e);
			
 
				+	B3_DBVT_INLINE bool Contain(const b3DbvtAabbMm& a) const;
			
 
				+	B3_DBVT_INLINE int Classify(const b3Vector3& n, b3Scalar o, int s) const;
			
 
				+	B3_DBVT_INLINE b3Scalar ProjectMinimum(const b3Vector3& v, unsigned signs) const;
			
 
				+	B3_DBVT_INLINE friend bool b3Intersect(const b3DbvtAabbMm& a,
			
 
				+										   const b3DbvtAabbMm& b);
			
 
				+
			
 
				+	B3_DBVT_INLINE friend bool b3Intersect(const b3DbvtAabbMm& a,
			
 
				+										   const b3Vector3& b);
			
 
				+
			
 
				+	B3_DBVT_INLINE friend b3Scalar b3Proximity(const b3DbvtAabbMm& a,
			
 
				+											   const b3DbvtAabbMm& b);
			
 
				+	B3_DBVT_INLINE friend int b3Select(const b3DbvtAabbMm& o,
			
 
				+									   const b3DbvtAabbMm& a,
			
 
				+									   const b3DbvtAabbMm& b);
			
 
				+	B3_DBVT_INLINE friend void b3Merge(const b3DbvtAabbMm& a,
			
 
				+									   const b3DbvtAabbMm& b,
			
 
				+									   b3DbvtAabbMm& r);
			
 
				+	B3_DBVT_INLINE friend bool b3NotEqual(const b3DbvtAabbMm& a,
			
 
				+										  const b3DbvtAabbMm& b);
			
 
				+
			
 
				+	B3_DBVT_INLINE b3Vector3& tMins() { return (mi); }
			
 
				+	B3_DBVT_INLINE b3Vector3& tMaxs() { return (mx); }
			
 
				+
			
 
				+private:
			
 
				+	B3_DBVT_INLINE void AddSpan(const b3Vector3& d, b3Scalar& smi, b3Scalar& smx) const;
			
 
				+
			
 
				+private:
			
 
				+	b3Vector3 mi, mx;
			
 
				+};
			
 
				+
			
 
				+// Types
			
 
				+typedef b3DbvtAabbMm b3DbvtVolume;
			
 
				+
			
 
				+/* b3DbvtNode				*/
			
 
				+struct b3DbvtNode
			
 
				+{
			
 
				+	b3DbvtVolume volume;
			
 
				+	b3DbvtNode* parent;
			
 
				+	B3_DBVT_INLINE bool isleaf() const { return (childs[1] == 0); }
			
 
				+	B3_DBVT_INLINE bool isinternal() const { return (!isleaf()); }
			
 
				+	union {
			
 
				+		b3DbvtNode* childs[2];
			
 
				+		void* data;
			
 
				+		int dataAsInt;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+///The b3DynamicBvh class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
			
 
				+///This b3DynamicBvh is used for soft body collision detection and for the b3DynamicBvhBroadphase. It has a fast insert, remove and update of nodes.
			
 
				+///Unlike the b3QuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
			
 
				+struct b3DynamicBvh
			
 
				+{
			
 
				+	/* Stack element	*/
			
 
				+	struct sStkNN
			
 
				+	{
			
 
				+		const b3DbvtNode* a;
			
 
				+		const b3DbvtNode* b;
			
 
				+		sStkNN() {}
			
 
				+		sStkNN(const b3DbvtNode* na, const b3DbvtNode* nb) : a(na), b(nb) {}
			
 
				+	};
			
 
				+	struct sStkNP
			
 
				+	{
			
 
				+		const b3DbvtNode* node;
			
 
				+		int mask;
			
 
				+		sStkNP(const b3DbvtNode* n, unsigned m) : node(n), mask(m) {}
			
 
				+	};
			
 
				+	struct sStkNPS
			
 
				+	{
			
 
				+		const b3DbvtNode* node;
			
 
				+		int mask;
			
 
				+		b3Scalar value;
			
 
				+		sStkNPS() {}
			
 
				+		sStkNPS(const b3DbvtNode* n, unsigned m, b3Scalar v) : node(n), mask(m), value(v) {}
			
 
				+	};
			
 
				+	struct sStkCLN
			
 
				+	{
			
 
				+		const b3DbvtNode* node;
			
 
				+		b3DbvtNode* parent;
			
 
				+		sStkCLN(const b3DbvtNode* n, b3DbvtNode* p) : node(n), parent(p) {}
			
 
				+	};
			
 
				+	// Policies/Interfaces
			
 
				+
			
 
				+	/* ICollide	*/
			
 
				+	struct ICollide
			
 
				+	{
			
 
				+		B3_DBVT_VIRTUAL_DTOR(ICollide)
			
 
				+		B3_DBVT_VIRTUAL void Process(const b3DbvtNode*, const b3DbvtNode*) {}
			
 
				+		B3_DBVT_VIRTUAL void Process(const b3DbvtNode*) {}
			
 
				+		B3_DBVT_VIRTUAL void Process(const b3DbvtNode* n, b3Scalar) { Process(n); }
			
 
				+		B3_DBVT_VIRTUAL bool Descent(const b3DbvtNode*) { return (true); }
			
 
				+		B3_DBVT_VIRTUAL bool AllLeaves(const b3DbvtNode*) { return (true); }
			
 
				+	};
			
 
				+	/* IWriter	*/
			
 
				+	struct IWriter
			
 
				+	{
			
 
				+		virtual ~IWriter() {}
			
 
				+		virtual void Prepare(const b3DbvtNode* root, int numnodes) = 0;
			
 
				+		virtual void WriteNode(const b3DbvtNode*, int index, int parent, int child0, int child1) = 0;
			
 
				+		virtual void WriteLeaf(const b3DbvtNode*, int index, int parent) = 0;
			
 
				+	};
			
 
				+	/* IClone	*/
			
 
				+	struct IClone
			
 
				+	{
			
 
				+		virtual ~IClone() {}
			
 
				+		virtual void CloneLeaf(b3DbvtNode*) {}
			
 
				+	};
			
 
				+
			
 
				+	// Constants
			
 
				+	enum
			
 
				+	{
			
 
				+		B3_SIMPLE_STACKSIZE = 64,
			
 
				+		B3_DOUBLE_STACKSIZE = B3_SIMPLE_STACKSIZE * 2
			
 
				+	};
			
 
				+
			
 
				+	// Fields
			
 
				+	b3DbvtNode* m_root;
			
 
				+	b3DbvtNode* m_free;
			
 
				+	int m_lkhd;
			
 
				+	int m_leaves;
			
 
				+	unsigned m_opath;
			
 
				+
			
 
				+	b3AlignedObjectArray<sStkNN> m_stkStack;
			
 
				+	mutable b3AlignedObjectArray<const b3DbvtNode*> m_rayTestStack;
			
 
				+
			
 
				+	// Methods
			
 
				+	b3DynamicBvh();
			
 
				+	~b3DynamicBvh();
			
 
				+	void clear();
			
 
				+	bool empty() const { return (0 == m_root); }
			
 
				+	void optimizeBottomUp();
			
 
				+	void optimizeTopDown(int bu_treshold = 128);
			
 
				+	void optimizeIncremental(int passes);
			
 
				+	b3DbvtNode* insert(const b3DbvtVolume& box, void* data);
			
 
				+	void update(b3DbvtNode* leaf, int lookahead = -1);
			
 
				+	void update(b3DbvtNode* leaf, b3DbvtVolume& volume);
			
 
				+	bool update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity, b3Scalar margin);
			
 
				+	bool update(b3DbvtNode* leaf, b3DbvtVolume& volume, const b3Vector3& velocity);
			
 
				+	bool update(b3DbvtNode* leaf, b3DbvtVolume& volume, b3Scalar margin);
			
 
				+	void remove(b3DbvtNode* leaf);
			
 
				+	void write(IWriter* iwriter) const;
			
 
				+	void clone(b3DynamicBvh& dest, IClone* iclone = 0) const;
			
 
				+	static int maxdepth(const b3DbvtNode* node);
			
 
				+	static int countLeaves(const b3DbvtNode* node);
			
 
				+	static void extractLeaves(const b3DbvtNode* node, b3AlignedObjectArray<const b3DbvtNode*>& leaves);
			
 
				+#if B3_DBVT_ENABLE_BENCHMARK
			
 
				+	static void benchmark();
			
 
				+#else
			
 
				+	static void benchmark()
			
 
				+	{
			
 
				+	}
			
 
				+#endif
			
 
				+	// B3_DBVT_IPOLICY must support ICollide policy/interface
			
 
				+	B3_DBVT_PREFIX
			
 
				+	static void enumNodes(const b3DbvtNode* root,
			
 
				+						  B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+	static void enumLeaves(const b3DbvtNode* root,
			
 
				+						   B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+	void collideTT(const b3DbvtNode* root0,
			
 
				+				   const b3DbvtNode* root1,
			
 
				+				   B3_DBVT_IPOLICY);
			
 
				+
			
 
				+	B3_DBVT_PREFIX
			
 
				+	void collideTTpersistentStack(const b3DbvtNode* root0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  B3_DBVT_IPOLICY);
			
 
				+#if 0
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTT(	const b3DbvtNode* root0,
			
 
				+		const b3DbvtNode* root1,
			
 
				+		const b3Transform& xform,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTT(	const b3DbvtNode* root0,
			
 
				+		const b3Transform& xform0,
			
 
				+		const b3DbvtNode* root1,
			
 
				+		const b3Transform& xform1,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+#endif
			
 
				+
			
 
				+	B3_DBVT_PREFIX
			
 
				+	void collideTV(const b3DbvtNode* root,
			
 
				+				   const b3DbvtVolume& volume,
			
 
				+				   B3_DBVT_IPOLICY) const;
			
 
				+	///rayTest is a re-entrant ray test, and can be called in parallel as long as the b3AlignedAlloc is thread-safe (uses locking etc)
			
 
				+	///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time
			
 
				+	B3_DBVT_PREFIX
			
 
				+	static void rayTest(const b3DbvtNode* root,
			
 
				+						const b3Vector3& rayFrom,
			
 
				+						const b3Vector3& rayTo,
			
 
				+						B3_DBVT_IPOLICY);
			
 
				+	///rayTestInternal is faster than rayTest, because it uses a persistent stack (to reduce dynamic memory allocations to a minimum) and it uses precomputed signs/rayInverseDirections
			
 
				+	///rayTestInternal is used by b3DynamicBvhBroadphase to accelerate world ray casts
			
 
				+	B3_DBVT_PREFIX
			
 
				+	void rayTestInternal(const b3DbvtNode* root,
			
 
				+						 const b3Vector3& rayFrom,
			
 
				+						 const b3Vector3& rayTo,
			
 
				+						 const b3Vector3& rayDirectionInverse,
			
 
				+						 unsigned int signs[3],
			
 
				+						 b3Scalar lambda_max,
			
 
				+						 const b3Vector3& aabbMin,
			
 
				+						 const b3Vector3& aabbMax,
			
 
				+						 B3_DBVT_IPOLICY) const;
			
 
				+
			
 
				+	B3_DBVT_PREFIX
			
 
				+	static void collideKDOP(const b3DbvtNode* root,
			
 
				+							const b3Vector3* normals,
			
 
				+							const b3Scalar* offsets,
			
 
				+							int count,
			
 
				+							B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+	static void collideOCL(const b3DbvtNode* root,
			
 
				+						   const b3Vector3* normals,
			
 
				+						   const b3Scalar* offsets,
			
 
				+						   const b3Vector3& sortaxis,
			
 
				+						   int count,
			
 
				+						   B3_DBVT_IPOLICY,
			
 
				+						   bool fullsort = true);
			
 
				+	B3_DBVT_PREFIX
			
 
				+	static void collideTU(const b3DbvtNode* root,
			
 
				+						  B3_DBVT_IPOLICY);
			
 
				+	// Helpers
			
 
				+	static B3_DBVT_INLINE int nearest(const int* i, const b3DynamicBvh::sStkNPS* a, b3Scalar v, int l, int h)
			
 
				+	{
			
 
				+		int m = 0;
			
 
				+		while (l < h)
			
 
				+		{
			
 
				+			m = (l + h) >> 1;
			
 
				+			if (a[i[m]].value >= v)
			
 
				+				l = m + 1;
			
 
				+			else
			
 
				+				h = m;
			
 
				+		}
			
 
				+		return (h);
			
 
				+	}
			
 
				+	static B3_DBVT_INLINE int allocate(b3AlignedObjectArray<int>& ifree,
			
 
				+									   b3AlignedObjectArray<sStkNPS>& stock,
			
 
				+									   const sStkNPS& value)
			
 
				+	{
			
 
				+		int i;
			
 
				+		if (ifree.size() > 0)
			
 
				+		{
			
 
				+			i = ifree[ifree.size() - 1];
			
 
				+			ifree.pop_back();
			
 
				+			stock[i] = value;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			i = stock.size();
			
 
				+			stock.push_back(value);
			
 
				+		}
			
 
				+		return (i);
			
 
				+	}
			
 
				+	//
			
 
				+private:
			
 
				+	b3DynamicBvh(const b3DynamicBvh&) {}
			
 
				+};
			
 
				+
			
 
				+//
			
 
				+// Inline's
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm b3DbvtAabbMm::FromCE(const b3Vector3& c, const b3Vector3& e)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi = c - e;
			
 
				+	box.mx = c + e;
			
 
				+	return (box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm b3DbvtAabbMm::FromCR(const b3Vector3& c, b3Scalar r)
			
 
				+{
			
 
				+	return (FromCE(c, b3MakeVector3(r, r, r)));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm b3DbvtAabbMm::FromMM(const b3Vector3& mi, const b3Vector3& mx)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi = mi;
			
 
				+	box.mx = mx;
			
 
				+	return (box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm b3DbvtAabbMm::FromPoints(const b3Vector3* pts, int n)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi = box.mx = pts[0];
			
 
				+	for (int i = 1; i < n; ++i)
			
 
				+	{
			
 
				+		box.mi.setMin(pts[i]);
			
 
				+		box.mx.setMax(pts[i]);
			
 
				+	}
			
 
				+	return (box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm b3DbvtAabbMm::FromPoints(const b3Vector3** ppts, int n)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi = box.mx = *ppts[0];
			
 
				+	for (int i = 1; i < n; ++i)
			
 
				+	{
			
 
				+		box.mi.setMin(*ppts[i]);
			
 
				+		box.mx.setMax(*ppts[i]);
			
 
				+	}
			
 
				+	return (box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void b3DbvtAabbMm::Expand(const b3Vector3& e)
			
 
				+{
			
 
				+	mi -= e;
			
 
				+	mx += e;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void b3DbvtAabbMm::SignedExpand(const b3Vector3& e)
			
 
				+{
			
 
				+	if (e.x > 0)
			
 
				+		mx.setX(mx.x + e[0]);
			
 
				+	else
			
 
				+		mi.setX(mi.x + e[0]);
			
 
				+	if (e.y > 0)
			
 
				+		mx.setY(mx.y + e[1]);
			
 
				+	else
			
 
				+		mi.setY(mi.y + e[1]);
			
 
				+	if (e.z > 0)
			
 
				+		mx.setZ(mx.z + e[2]);
			
 
				+	else
			
 
				+		mi.setZ(mi.z + e[2]);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool b3DbvtAabbMm::Contain(const b3DbvtAabbMm& a) const
			
 
				+{
			
 
				+	return ((mi.x <= a.mi.x) &&
			
 
				+			(mi.y <= a.mi.y) &&
			
 
				+			(mi.z <= a.mi.z) &&
			
 
				+			(mx.x >= a.mx.x) &&
			
 
				+			(mx.y >= a.mx.y) &&
			
 
				+			(mx.z >= a.mx.z));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE int b3DbvtAabbMm::Classify(const b3Vector3& n, b3Scalar o, int s) const
			
 
				+{
			
 
				+	b3Vector3 pi, px;
			
 
				+	switch (s)
			
 
				+	{
			
 
				+		case (0 + 0 + 0):
			
 
				+			px = b3MakeVector3(mi.x, mi.y, mi.z);
			
 
				+			pi = b3MakeVector3(mx.x, mx.y, mx.z);
			
 
				+			break;
			
 
				+		case (1 + 0 + 0):
			
 
				+			px = b3MakeVector3(mx.x, mi.y, mi.z);
			
 
				+			pi = b3MakeVector3(mi.x, mx.y, mx.z);
			
 
				+			break;
			
 
				+		case (0 + 2 + 0):
			
 
				+			px = b3MakeVector3(mi.x, mx.y, mi.z);
			
 
				+			pi = b3MakeVector3(mx.x, mi.y, mx.z);
			
 
				+			break;
			
 
				+		case (1 + 2 + 0):
			
 
				+			px = b3MakeVector3(mx.x, mx.y, mi.z);
			
 
				+			pi = b3MakeVector3(mi.x, mi.y, mx.z);
			
 
				+			break;
			
 
				+		case (0 + 0 + 4):
			
 
				+			px = b3MakeVector3(mi.x, mi.y, mx.z);
			
 
				+			pi = b3MakeVector3(mx.x, mx.y, mi.z);
			
 
				+			break;
			
 
				+		case (1 + 0 + 4):
			
 
				+			px = b3MakeVector3(mx.x, mi.y, mx.z);
			
 
				+			pi = b3MakeVector3(mi.x, mx.y, mi.z);
			
 
				+			break;
			
 
				+		case (0 + 2 + 4):
			
 
				+			px = b3MakeVector3(mi.x, mx.y, mx.z);
			
 
				+			pi = b3MakeVector3(mx.x, mi.y, mi.z);
			
 
				+			break;
			
 
				+		case (1 + 2 + 4):
			
 
				+			px = b3MakeVector3(mx.x, mx.y, mx.z);
			
 
				+			pi = b3MakeVector3(mi.x, mi.y, mi.z);
			
 
				+			break;
			
 
				+	}
			
 
				+	if ((b3Dot(n, px) + o) < 0) return (-1);
			
 
				+	if ((b3Dot(n, pi) + o) >= 0) return (+1);
			
 
				+	return (0);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE b3Scalar b3DbvtAabbMm::ProjectMinimum(const b3Vector3& v, unsigned signs) const
			
 
				+{
			
 
				+	const b3Vector3* b[] = {&mx, &mi};
			
 
				+	const b3Vector3 p = b3MakeVector3(b[(signs >> 0) & 1]->x,
			
 
				+									  b[(signs >> 1) & 1]->y,
			
 
				+									  b[(signs >> 2) & 1]->z);
			
 
				+	return (b3Dot(p, v));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void b3DbvtAabbMm::AddSpan(const b3Vector3& d, b3Scalar& smi, b3Scalar& smx) const
			
 
				+{
			
 
				+	for (int i = 0; i < 3; ++i)
			
 
				+	{
			
 
				+		if (d[i] < 0)
			
 
				+		{
			
 
				+			smi += mx[i] * d[i];
			
 
				+			smx += mi[i] * d[i];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			smi += mi[i] * d[i];
			
 
				+			smx += mx[i] * d[i];
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool b3Intersect(const b3DbvtAabbMm& a,
			
 
				+								const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+#if B3_DBVT_INT0_IMPL == B3_DBVT_IMPL_SSE
			
 
				+	const __m128 rt(_mm_or_ps(_mm_cmplt_ps(_mm_load_ps(b.mx), _mm_load_ps(a.mi)),
			
 
				+							  _mm_cmplt_ps(_mm_load_ps(a.mx), _mm_load_ps(b.mi))));
			
 
				+#if defined(_WIN32)
			
 
				+	const __int32* pu((const __int32*)&rt);
			
 
				+#else
			
 
				+	const int* pu((const int*)&rt);
			
 
				+#endif
			
 
				+	return ((pu[0] | pu[1] | pu[2]) == 0);
			
 
				+#else
			
 
				+	return ((a.mi.x <= b.mx.x) &&
			
 
				+			(a.mx.x >= b.mi.x) &&
			
 
				+			(a.mi.y <= b.mx.y) &&
			
 
				+			(a.mx.y >= b.mi.y) &&
			
 
				+			(a.mi.z <= b.mx.z) &&
			
 
				+			(a.mx.z >= b.mi.z));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool b3Intersect(const b3DbvtAabbMm& a,
			
 
				+								const b3Vector3& b)
			
 
				+{
			
 
				+	return ((b.x >= a.mi.x) &&
			
 
				+			(b.y >= a.mi.y) &&
			
 
				+			(b.z >= a.mi.z) &&
			
 
				+			(b.x <= a.mx.x) &&
			
 
				+			(b.y <= a.mx.y) &&
			
 
				+			(b.z <= a.mx.z));
			
 
				+}
			
 
				+
			
 
				+//////////////////////////////////////
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE b3Scalar b3Proximity(const b3DbvtAabbMm& a,
			
 
				+									const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+	const b3Vector3 d = (a.mi + a.mx) - (b.mi + b.mx);
			
 
				+	return (b3Fabs(d.x) + b3Fabs(d.y) + b3Fabs(d.z));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE int b3Select(const b3DbvtAabbMm& o,
			
 
				+							const b3DbvtAabbMm& a,
			
 
				+							const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+#if B3_DBVT_SELECT_IMPL == B3_DBVT_IMPL_SSE
			
 
				+
			
 
				+#if defined(_WIN32)
			
 
				+	static B3_ATTRIBUTE_ALIGNED16(const unsigned __int32) mask[] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
			
 
				+#else
			
 
				+	static B3_ATTRIBUTE_ALIGNED16(const unsigned int) mask[] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x00000000 /*0x7fffffff*/};
			
 
				+#endif
			
 
				+	///@todo: the intrinsic version is 11% slower
			
 
				+#if B3_DBVT_USE_INTRINSIC_SSE
			
 
				+
			
 
				+	union b3SSEUnion  ///NOTE: if we use more intrinsics, move b3SSEUnion into the LinearMath directory
			
 
				+	{
			
 
				+		__m128 ssereg;
			
 
				+		float floats[4];
			
 
				+		int ints[4];
			
 
				+	};
			
 
				+
			
 
				+	__m128 omi(_mm_load_ps(o.mi));
			
 
				+	omi = _mm_add_ps(omi, _mm_load_ps(o.mx));
			
 
				+	__m128 ami(_mm_load_ps(a.mi));
			
 
				+	ami = _mm_add_ps(ami, _mm_load_ps(a.mx));
			
 
				+	ami = _mm_sub_ps(ami, omi);
			
 
				+	ami = _mm_and_ps(ami, _mm_load_ps((const float*)mask));
			
 
				+	__m128 bmi(_mm_load_ps(b.mi));
			
 
				+	bmi = _mm_add_ps(bmi, _mm_load_ps(b.mx));
			
 
				+	bmi = _mm_sub_ps(bmi, omi);
			
 
				+	bmi = _mm_and_ps(bmi, _mm_load_ps((const float*)mask));
			
 
				+	__m128 t0(_mm_movehl_ps(ami, ami));
			
 
				+	ami = _mm_add_ps(ami, t0);
			
 
				+	ami = _mm_add_ss(ami, _mm_shuffle_ps(ami, ami, 1));
			
 
				+	__m128 t1(_mm_movehl_ps(bmi, bmi));
			
 
				+	bmi = _mm_add_ps(bmi, t1);
			
 
				+	bmi = _mm_add_ss(bmi, _mm_shuffle_ps(bmi, bmi, 1));
			
 
				+
			
 
				+	b3SSEUnion tmp;
			
 
				+	tmp.ssereg = _mm_cmple_ss(bmi, ami);
			
 
				+	return tmp.ints[0] & 1;
			
 
				+
			
 
				+#else
			
 
				+	B3_ATTRIBUTE_ALIGNED16(__int32 r[1]);
			
 
				+	__asm
			
 
				+	{
			
 
				+		mov		eax,o
			
 
				+			mov		ecx,a
			
 
				+			mov		edx,b
			
 
				+			movaps	xmm0,[eax]
			
 
				+		movaps	xmm5,mask
			
 
				+			addps	xmm0,[eax+16]	
			
 
				+		movaps	xmm1,[ecx]
			
 
				+		movaps	xmm2,[edx]
			
 
				+		addps	xmm1,[ecx+16]
			
 
				+		addps	xmm2,[edx+16]
			
 
				+		subps	xmm1,xmm0
			
 
				+			subps	xmm2,xmm0
			
 
				+			andps	xmm1,xmm5
			
 
				+			andps	xmm2,xmm5
			
 
				+			movhlps	xmm3,xmm1
			
 
				+			movhlps	xmm4,xmm2
			
 
				+			addps	xmm1,xmm3
			
 
				+			addps	xmm2,xmm4
			
 
				+			pshufd	xmm3,xmm1,1
			
 
				+			pshufd	xmm4,xmm2,1
			
 
				+			addss	xmm1,xmm3
			
 
				+			addss	xmm2,xmm4
			
 
				+			cmpless	xmm2,xmm1
			
 
				+			movss	r,xmm2
			
 
				+	}
			
 
				+	return (r[0] & 1);
			
 
				+#endif
			
 
				+#else
			
 
				+	return (b3Proximity(o, a) < b3Proximity(o, b) ? 0 : 1);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void b3Merge(const b3DbvtAabbMm& a,
			
 
				+							const b3DbvtAabbMm& b,
			
 
				+							b3DbvtAabbMm& r)
			
 
				+{
			
 
				+#if B3_DBVT_MERGE_IMPL == B3_DBVT_IMPL_SSE
			
 
				+	__m128 ami(_mm_load_ps(a.mi));
			
 
				+	__m128 amx(_mm_load_ps(a.mx));
			
 
				+	__m128 bmi(_mm_load_ps(b.mi));
			
 
				+	__m128 bmx(_mm_load_ps(b.mx));
			
 
				+	ami = _mm_min_ps(ami, bmi);
			
 
				+	amx = _mm_max_ps(amx, bmx);
			
 
				+	_mm_store_ps(r.mi, ami);
			
 
				+	_mm_store_ps(r.mx, amx);
			
 
				+#else
			
 
				+	for (int i = 0; i < 3; ++i)
			
 
				+	{
			
 
				+		if (a.mi[i] < b.mi[i])
			
 
				+			r.mi[i] = a.mi[i];
			
 
				+		else
			
 
				+			r.mi[i] = b.mi[i];
			
 
				+		if (a.mx[i] > b.mx[i])
			
 
				+			r.mx[i] = a.mx[i];
			
 
				+		else
			
 
				+			r.mx[i] = b.mx[i];
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool b3NotEqual(const b3DbvtAabbMm& a,
			
 
				+							   const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+	return ((a.mi.x != b.mi.x) ||
			
 
				+			(a.mi.y != b.mi.y) ||
			
 
				+			(a.mi.z != b.mi.z) ||
			
 
				+			(a.mx.x != b.mx.x) ||
			
 
				+			(a.mx.y != b.mx.y) ||
			
 
				+			(a.mx.z != b.mx.z));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+// Inline's
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::enumNodes(const b3DbvtNode* root,
			
 
				+									B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	policy.Process(root);
			
 
				+	if (root->isinternal())
			
 
				+	{
			
 
				+		enumNodes(root->childs[0], policy);
			
 
				+		enumNodes(root->childs[1], policy);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::enumLeaves(const b3DbvtNode* root,
			
 
				+									 B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root->isinternal())
			
 
				+	{
			
 
				+		enumLeaves(root->childs[0], policy);
			
 
				+		enumLeaves(root->childs[1], policy);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		policy.Process(root);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::collideTT(const b3DbvtNode* root0,
			
 
				+									const b3DbvtNode* root1,
			
 
				+									B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root0 && root1)
			
 
				+	{
			
 
				+		int depth = 1;
			
 
				+		int treshold = B3_DOUBLE_STACKSIZE - 4;
			
 
				+		b3AlignedObjectArray<sStkNN> stkStack;
			
 
				+		stkStack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+		stkStack[0] = sStkNN(root0, root1);
			
 
				+		do
			
 
				+		{
			
 
				+			sStkNN p = stkStack[--depth];
			
 
				+			if (depth > treshold)
			
 
				+			{
			
 
				+				stkStack.resize(stkStack.size() * 2);
			
 
				+				treshold = stkStack.size() - 4;
			
 
				+			}
			
 
				+			if (p.a == p.b)
			
 
				+			{
			
 
				+				if (p.a->isinternal())
			
 
				+				{
			
 
				+					stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[0]);
			
 
				+					stkStack[depth++] = sStkNN(p.a->childs[1], p.a->childs[1]);
			
 
				+					stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[1]);
			
 
				+				}
			
 
				+			}
			
 
				+			else if (b3Intersect(p.a->volume, p.b->volume))
			
 
				+			{
			
 
				+				if (p.a->isinternal())
			
 
				+				{
			
 
				+					if (p.b->isinternal())
			
 
				+					{
			
 
				+						stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[0]);
			
 
				+						stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[0]);
			
 
				+						stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[1]);
			
 
				+						stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[1]);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						stkStack[depth++] = sStkNN(p.a->childs[0], p.b);
			
 
				+						stkStack[depth++] = sStkNN(p.a->childs[1], p.b);
			
 
				+					}
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					if (p.b->isinternal())
			
 
				+					{
			
 
				+						stkStack[depth++] = sStkNN(p.a, p.b->childs[0]);
			
 
				+						stkStack[depth++] = sStkNN(p.a, p.b->childs[1]);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						policy.Process(p.a, p.b);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		} while (depth);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::collideTTpersistentStack(const b3DbvtNode* root0,
			
 
				+												   const b3DbvtNode* root1,
			
 
				+												   B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root0 && root1)
			
 
				+	{
			
 
				+		int depth = 1;
			
 
				+		int treshold = B3_DOUBLE_STACKSIZE - 4;
			
 
				+
			
 
				+		m_stkStack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+		m_stkStack[0] = sStkNN(root0, root1);
			
 
				+		do
			
 
				+		{
			
 
				+			sStkNN p = m_stkStack[--depth];
			
 
				+			if (depth > treshold)
			
 
				+			{
			
 
				+				m_stkStack.resize(m_stkStack.size() * 2);
			
 
				+				treshold = m_stkStack.size() - 4;
			
 
				+			}
			
 
				+			if (p.a == p.b)
			
 
				+			{
			
 
				+				if (p.a->isinternal())
			
 
				+				{
			
 
				+					m_stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[0]);
			
 
				+					m_stkStack[depth++] = sStkNN(p.a->childs[1], p.a->childs[1]);
			
 
				+					m_stkStack[depth++] = sStkNN(p.a->childs[0], p.a->childs[1]);
			
 
				+				}
			
 
				+			}
			
 
				+			else if (b3Intersect(p.a->volume, p.b->volume))
			
 
				+			{
			
 
				+				if (p.a->isinternal())
			
 
				+				{
			
 
				+					if (p.b->isinternal())
			
 
				+					{
			
 
				+						m_stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[0]);
			
 
				+						m_stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[0]);
			
 
				+						m_stkStack[depth++] = sStkNN(p.a->childs[0], p.b->childs[1]);
			
 
				+						m_stkStack[depth++] = sStkNN(p.a->childs[1], p.b->childs[1]);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						m_stkStack[depth++] = sStkNN(p.a->childs[0], p.b);
			
 
				+						m_stkStack[depth++] = sStkNN(p.a->childs[1], p.b);
			
 
				+					}
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					if (p.b->isinternal())
			
 
				+					{
			
 
				+						m_stkStack[depth++] = sStkNN(p.a, p.b->childs[0]);
			
 
				+						m_stkStack[depth++] = sStkNN(p.a, p.b->childs[1]);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						policy.Process(p.a, p.b);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		} while (depth);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTT(	const b3DbvtNode* root0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  const b3Transform& xform,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root0&&root1)
			
 
				+		{
			
 
				+			int								depth=1;
			
 
				+			int								treshold=B3_DOUBLE_STACKSIZE-4;
			
 
				+			b3AlignedObjectArray<sStkNN>	stkStack;
			
 
				+			stkStack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+			stkStack[0]=sStkNN(root0,root1);
			
 
				+			do	{
			
 
				+				sStkNN	p=stkStack[--depth];
			
 
				+				if(b3Intersect(p.a->volume,p.b->volume,xform))
			
 
				+				{
			
 
				+					if(depth>treshold)
			
 
				+					{
			
 
				+						stkStack.resize(stkStack.size()*2);
			
 
				+						treshold=stkStack.size()-4;
			
 
				+					}
			
 
				+					if(p.a->isinternal())
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{					
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							policy.Process(p.a,p.b);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			} while(depth);
			
 
				+		}
			
 
				+}
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTT(	const b3DbvtNode* root0,
			
 
				+								  const b3Transform& xform0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  const b3Transform& xform1,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	const b3Transform	xform=xform0.inverse()*xform1;
			
 
				+	collideTT(root0,root1,xform,policy);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::collideTV(const b3DbvtNode* root,
			
 
				+									const b3DbvtVolume& vol,
			
 
				+									B3_DBVT_IPOLICY) const
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root)
			
 
				+	{
			
 
				+		B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)
			
 
				+		volume(vol);
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*> stack;
			
 
				+		stack.resize(0);
			
 
				+		stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+		stack.push_back(root);
			
 
				+		do
			
 
				+		{
			
 
				+			const b3DbvtNode* n = stack[stack.size() - 1];
			
 
				+			stack.pop_back();
			
 
				+			if (b3Intersect(n->volume, volume))
			
 
				+			{
			
 
				+				if (n->isinternal())
			
 
				+				{
			
 
				+					stack.push_back(n->childs[0]);
			
 
				+					stack.push_back(n->childs[1]);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					policy.Process(n);
			
 
				+				}
			
 
				+			}
			
 
				+		} while (stack.size() > 0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::rayTestInternal(const b3DbvtNode* root,
			
 
				+										  const b3Vector3& rayFrom,
			
 
				+										  const b3Vector3& rayTo,
			
 
				+										  const b3Vector3& rayDirectionInverse,
			
 
				+										  unsigned int signs[3],
			
 
				+										  b3Scalar lambda_max,
			
 
				+										  const b3Vector3& aabbMin,
			
 
				+										  const b3Vector3& aabbMax,
			
 
				+										  B3_DBVT_IPOLICY) const
			
 
				+{
			
 
				+	(void)rayTo;
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root)
			
 
				+	{
			
 
				+		int depth = 1;
			
 
				+		int treshold = B3_DOUBLE_STACKSIZE - 2;
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*>& stack = m_rayTestStack;
			
 
				+		stack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+		stack[0] = root;
			
 
				+		b3Vector3 bounds[2];
			
 
				+		do
			
 
				+		{
			
 
				+			const b3DbvtNode* node = stack[--depth];
			
 
				+			bounds[0] = node->volume.Mins() - aabbMax;
			
 
				+			bounds[1] = node->volume.Maxs() - aabbMin;
			
 
				+			b3Scalar tmin = 1.f, lambda_min = 0.f;
			
 
				+			unsigned int result1 = false;
			
 
				+			result1 = b3RayAabb2(rayFrom, rayDirectionInverse, signs, bounds, tmin, lambda_min, lambda_max);
			
 
				+			if (result1)
			
 
				+			{
			
 
				+				if (node->isinternal())
			
 
				+				{
			
 
				+					if (depth > treshold)
			
 
				+					{
			
 
				+						stack.resize(stack.size() * 2);
			
 
				+						treshold = stack.size() - 2;
			
 
				+					}
			
 
				+					stack[depth++] = node->childs[0];
			
 
				+					stack[depth++] = node->childs[1];
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					policy.Process(node);
			
 
				+				}
			
 
				+			}
			
 
				+		} while (depth);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::rayTest(const b3DbvtNode* root,
			
 
				+								  const b3Vector3& rayFrom,
			
 
				+								  const b3Vector3& rayTo,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root)
			
 
				+	{
			
 
				+		b3Vector3 rayDir = (rayTo - rayFrom);
			
 
				+		rayDir.normalize();
			
 
				+
			
 
				+		///what about division by zero? --> just set rayDirection[i] to INF/B3_LARGE_FLOAT
			
 
				+		b3Vector3 rayDirectionInverse;
			
 
				+		rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0];
			
 
				+		rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1];
			
 
				+		rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2];
			
 
				+		unsigned int signs[3] = {rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};
			
 
				+
			
 
				+		b3Scalar lambda_max = rayDir.dot(rayTo - rayFrom);
			
 
				+#ifdef COMPARE_BTRAY_AABB2
			
 
				+		b3Vector3 resultNormal;
			
 
				+#endif  //COMPARE_BTRAY_AABB2
			
 
				+
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*> stack;
			
 
				+
			
 
				+		int depth = 1;
			
 
				+		int treshold = B3_DOUBLE_STACKSIZE - 2;
			
 
				+
			
 
				+		stack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+		stack[0] = root;
			
 
				+		b3Vector3 bounds[2];
			
 
				+		do
			
 
				+		{
			
 
				+			const b3DbvtNode* node = stack[--depth];
			
 
				+
			
 
				+			bounds[0] = node->volume.Mins();
			
 
				+			bounds[1] = node->volume.Maxs();
			
 
				+
			
 
				+			b3Scalar tmin = 1.f, lambda_min = 0.f;
			
 
				+			unsigned int result1 = b3RayAabb2(rayFrom, rayDirectionInverse, signs, bounds, tmin, lambda_min, lambda_max);
			
 
				+
			
 
				+#ifdef COMPARE_BTRAY_AABB2
			
 
				+			b3Scalar param = 1.f;
			
 
				+			bool result2 = b3RayAabb(rayFrom, rayTo, node->volume.Mins(), node->volume.Maxs(), param, resultNormal);
			
 
				+			b3Assert(result1 == result2);
			
 
				+#endif  //TEST_BTRAY_AABB2
			
 
				+
			
 
				+			if (result1)
			
 
				+			{
			
 
				+				if (node->isinternal())
			
 
				+				{
			
 
				+					if (depth > treshold)
			
 
				+					{
			
 
				+						stack.resize(stack.size() * 2);
			
 
				+						treshold = stack.size() - 2;
			
 
				+					}
			
 
				+					stack[depth++] = node->childs[0];
			
 
				+					stack[depth++] = node->childs[1];
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					policy.Process(node);
			
 
				+				}
			
 
				+			}
			
 
				+		} while (depth);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::collideKDOP(const b3DbvtNode* root,
			
 
				+									  const b3Vector3* normals,
			
 
				+									  const b3Scalar* offsets,
			
 
				+									  int count,
			
 
				+									  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root)
			
 
				+	{
			
 
				+		const int inside = (1 << count) - 1;
			
 
				+		b3AlignedObjectArray<sStkNP> stack;
			
 
				+		int signs[sizeof(unsigned) * 8];
			
 
				+		b3Assert(count < int(sizeof(signs) / sizeof(signs[0])));
			
 
				+		for (int i = 0; i < count; ++i)
			
 
				+		{
			
 
				+			signs[i] = ((normals[i].x >= 0) ? 1 : 0) +
			
 
				+					   ((normals[i].y >= 0) ? 2 : 0) +
			
 
				+					   ((normals[i].z >= 0) ? 4 : 0);
			
 
				+		}
			
 
				+		stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+		stack.push_back(sStkNP(root, 0));
			
 
				+		do
			
 
				+		{
			
 
				+			sStkNP se = stack[stack.size() - 1];
			
 
				+			bool out = false;
			
 
				+			stack.pop_back();
			
 
				+			for (int i = 0, j = 1; (!out) && (i < count); ++i, j <<= 1)
			
 
				+			{
			
 
				+				if (0 == (se.mask & j))
			
 
				+				{
			
 
				+					const int side = se.node->volume.Classify(normals[i], offsets[i], signs[i]);
			
 
				+					switch (side)
			
 
				+					{
			
 
				+						case -1:
			
 
				+							out = true;
			
 
				+							break;
			
 
				+						case +1:
			
 
				+							se.mask |= j;
			
 
				+							break;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			if (!out)
			
 
				+			{
			
 
				+				if ((se.mask != inside) && (se.node->isinternal()))
			
 
				+				{
			
 
				+					stack.push_back(sStkNP(se.node->childs[0], se.mask));
			
 
				+					stack.push_back(sStkNP(se.node->childs[1], se.mask));
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					if (policy.AllLeaves(se.node)) enumLeaves(se.node, policy);
			
 
				+				}
			
 
				+			}
			
 
				+		} while (stack.size());
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::collideOCL(const b3DbvtNode* root,
			
 
				+									 const b3Vector3* normals,
			
 
				+									 const b3Scalar* offsets,
			
 
				+									 const b3Vector3& sortaxis,
			
 
				+									 int count,
			
 
				+									 B3_DBVT_IPOLICY,
			
 
				+									 bool fsort)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root)
			
 
				+	{
			
 
				+		const unsigned srtsgns = (sortaxis[0] >= 0 ? 1 : 0) +
			
 
				+								 (sortaxis[1] >= 0 ? 2 : 0) +
			
 
				+								 (sortaxis[2] >= 0 ? 4 : 0);
			
 
				+		const int inside = (1 << count) - 1;
			
 
				+		b3AlignedObjectArray<sStkNPS> stock;
			
 
				+		b3AlignedObjectArray<int> ifree;
			
 
				+		b3AlignedObjectArray<int> stack;
			
 
				+		int signs[sizeof(unsigned) * 8];
			
 
				+		b3Assert(count < int(sizeof(signs) / sizeof(signs[0])));
			
 
				+		for (int i = 0; i < count; ++i)
			
 
				+		{
			
 
				+			signs[i] = ((normals[i].x >= 0) ? 1 : 0) +
			
 
				+					   ((normals[i].y >= 0) ? 2 : 0) +
			
 
				+					   ((normals[i].z >= 0) ? 4 : 0);
			
 
				+		}
			
 
				+		stock.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+		stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+		ifree.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+		stack.push_back(allocate(ifree, stock, sStkNPS(root, 0, root->volume.ProjectMinimum(sortaxis, srtsgns))));
			
 
				+		do
			
 
				+		{
			
 
				+			const int id = stack[stack.size() - 1];
			
 
				+			sStkNPS se = stock[id];
			
 
				+			stack.pop_back();
			
 
				+			ifree.push_back(id);
			
 
				+			if (se.mask != inside)
			
 
				+			{
			
 
				+				bool out = false;
			
 
				+				for (int i = 0, j = 1; (!out) && (i < count); ++i, j <<= 1)
			
 
				+				{
			
 
				+					if (0 == (se.mask & j))
			
 
				+					{
			
 
				+						const int side = se.node->volume.Classify(normals[i], offsets[i], signs[i]);
			
 
				+						switch (side)
			
 
				+						{
			
 
				+							case -1:
			
 
				+								out = true;
			
 
				+								break;
			
 
				+							case +1:
			
 
				+								se.mask |= j;
			
 
				+								break;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				if (out) continue;
			
 
				+			}
			
 
				+			if (policy.Descent(se.node))
			
 
				+			{
			
 
				+				if (se.node->isinternal())
			
 
				+				{
			
 
				+					const b3DbvtNode* pns[] = {se.node->childs[0], se.node->childs[1]};
			
 
				+					sStkNPS nes[] = {sStkNPS(pns[0], se.mask, pns[0]->volume.ProjectMinimum(sortaxis, srtsgns)),
			
 
				+									 sStkNPS(pns[1], se.mask, pns[1]->volume.ProjectMinimum(sortaxis, srtsgns))};
			
 
				+					const int q = nes[0].value < nes[1].value ? 1 : 0;
			
 
				+					int j = stack.size();
			
 
				+					if (fsort && (j > 0))
			
 
				+					{
			
 
				+						/* Insert 0	*/
			
 
				+						j = nearest(&stack[0], &stock[0], nes[q].value, 0, stack.size());
			
 
				+						stack.push_back(0);
			
 
				+#if B3_DBVT_USE_MEMMOVE
			
 
				+						memmove(&stack[j + 1], &stack[j], sizeof(int) * (stack.size() - j - 1));
			
 
				+#else
			
 
				+						for (int k = stack.size() - 1; k > j; --k) stack[k] = stack[k - 1];
			
 
				+#endif
			
 
				+						stack[j] = allocate(ifree, stock, nes[q]);
			
 
				+						/* Insert 1	*/
			
 
				+						j = nearest(&stack[0], &stock[0], nes[1 - q].value, j, stack.size());
			
 
				+						stack.push_back(0);
			
 
				+#if B3_DBVT_USE_MEMMOVE
			
 
				+						memmove(&stack[j + 1], &stack[j], sizeof(int) * (stack.size() - j - 1));
			
 
				+#else
			
 
				+						for (int k = stack.size() - 1; k > j; --k) stack[k] = stack[k - 1];
			
 
				+#endif
			
 
				+						stack[j] = allocate(ifree, stock, nes[1 - q]);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						stack.push_back(allocate(ifree, stock, nes[q]));
			
 
				+						stack.push_back(allocate(ifree, stock, nes[1 - q]));
			
 
				+					}
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					policy.Process(se.node, se.value);
			
 
				+				}
			
 
				+			}
			
 
				+		} while (stack.size());
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void b3DynamicBvh::collideTU(const b3DbvtNode* root,
			
 
				+									B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if (root)
			
 
				+	{
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*> stack;
			
 
				+		stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+		stack.push_back(root);
			
 
				+		do
			
 
				+		{
			
 
				+			const b3DbvtNode* n = stack[stack.size() - 1];
			
 
				+			stack.pop_back();
			
 
				+			if (policy.Descent(n))
			
 
				+			{
			
 
				+				if (n->isinternal())
			
 
				+				{
			
 
				+					stack.push_back(n->childs[0]);
			
 
				+					stack.push_back(n->childs[1]);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					policy.Process(n);
			
 
				+				}
			
 
				+			}
			
 
				+		} while (stack.size() > 0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+// PP Cleanup
			
 
				+//
			
 
				+
			
 
				+#undef B3_DBVT_USE_MEMMOVE
			
 
				+#undef B3_DBVT_USE_TEMPLATE
			
 
				+#undef B3_DBVT_VIRTUAL_DTOR
			
 
				+#undef B3_DBVT_VIRTUAL
			
 
				+#undef B3_DBVT_PREFIX
			
 
				+#undef B3_DBVT_IPOLICY
			
 
				+#undef B3_DBVT_CHECKTYPE
			
 
				+#undef B3_DBVT_IMPL_GENERIC
			
 
				+#undef B3_DBVT_IMPL_SSE
			
 
				+#undef B3_DBVT_USE_INTRINSIC_SSE
			
 
				+#undef B3_DBVT_SELECT_IMPL
			
 
				+#undef B3_DBVT_MERGE_IMPL
			
 
				+#undef B3_DBVT_INT0_IMPL
			
 
				+
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp
@@ -0,0 +1,808 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+///b3DynamicBvhBroadphase implementation by Nathanael Presson
			
 
				+
			
 
				+#include "b3DynamicBvhBroadphase.h"
			
 
				+#include "b3OverlappingPair.h"
			
 
				+
			
 
				+//
			
 
				+// Profiling
			
 
				+//
			
 
				+
			
 
				+#if B3_DBVT_BP_PROFILE || B3_DBVT_BP_ENABLE_BENCHMARK
			
 
				+#include <stdio.h>
			
 
				+#endif
			
 
				+
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+struct b3ProfileScope
			
 
				+{
			
 
				+	__forceinline b3ProfileScope(b3Clock& clock, unsigned long& value) : m_clock(&clock), m_value(&value), m_base(clock.getTimeMicroseconds())
			
 
				+	{
			
 
				+	}
			
 
				+	__forceinline ~b3ProfileScope()
			
 
				+	{
			
 
				+		(*m_value) += m_clock->getTimeMicroseconds() - m_base;
			
 
				+	}
			
 
				+	b3Clock* m_clock;
			
 
				+	unsigned long* m_value;
			
 
				+	unsigned long m_base;
			
 
				+};
			
 
				+#define b3SPC(_value_) b3ProfileScope spc_scope(m_clock, _value_)
			
 
				+#else
			
 
				+#define b3SPC(_value_)
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+// Helpers
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+template <typename T>
			
 
				+static inline void b3ListAppend(T* item, T*& list)
			
 
				+{
			
 
				+	item->links[0] = 0;
			
 
				+	item->links[1] = list;
			
 
				+	if (list) list->links[0] = item;
			
 
				+	list = item;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+template <typename T>
			
 
				+static inline void b3ListRemove(T* item, T*& list)
			
 
				+{
			
 
				+	if (item->links[0])
			
 
				+		item->links[0]->links[1] = item->links[1];
			
 
				+	else
			
 
				+		list = item->links[1];
			
 
				+	if (item->links[1]) item->links[1]->links[0] = item->links[0];
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+template <typename T>
			
 
				+static inline int b3ListCount(T* root)
			
 
				+{
			
 
				+	int n = 0;
			
 
				+	while (root)
			
 
				+	{
			
 
				+		++n;
			
 
				+		root = root->links[1];
			
 
				+	}
			
 
				+	return (n);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+template <typename T>
			
 
				+static inline void b3Clear(T& value)
			
 
				+{
			
 
				+	static const struct ZeroDummy : T
			
 
				+	{
			
 
				+	} zerodummy;
			
 
				+	value = zerodummy;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+// Colliders
			
 
				+//
			
 
				+
			
 
				+/* Tree collider	*/
			
 
				+struct b3DbvtTreeCollider : b3DynamicBvh::ICollide
			
 
				+{
			
 
				+	b3DynamicBvhBroadphase* pbp;
			
 
				+	b3DbvtProxy* proxy;
			
 
				+	b3DbvtTreeCollider(b3DynamicBvhBroadphase* p) : pbp(p) {}
			
 
				+	void Process(const b3DbvtNode* na, const b3DbvtNode* nb)
			
 
				+	{
			
 
				+		if (na != nb)
			
 
				+		{
			
 
				+			b3DbvtProxy* pa = (b3DbvtProxy*)na->data;
			
 
				+			b3DbvtProxy* pb = (b3DbvtProxy*)nb->data;
			
 
				+#if B3_DBVT_BP_SORTPAIRS
			
 
				+			if (pa->m_uniqueId > pb->m_uniqueId)
			
 
				+				b3Swap(pa, pb);
			
 
				+#endif
			
 
				+			pbp->m_paircache->addOverlappingPair(pa->getUid(), pb->getUid());
			
 
				+			++pbp->m_newpairs;
			
 
				+		}
			
 
				+	}
			
 
				+	void Process(const b3DbvtNode* n)
			
 
				+	{
			
 
				+		Process(n, proxy->leaf);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+//
			
 
				+// b3DynamicBvhBroadphase
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+b3DynamicBvhBroadphase::b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache)
			
 
				+{
			
 
				+	m_deferedcollide = false;
			
 
				+	m_needcleanup = true;
			
 
				+	m_releasepaircache = (paircache != 0) ? false : true;
			
 
				+	m_prediction = 0;
			
 
				+	m_stageCurrent = 0;
			
 
				+	m_fixedleft = 0;
			
 
				+	m_fupdates = 1;
			
 
				+	m_dupdates = 0;
			
 
				+	m_cupdates = 10;
			
 
				+	m_newpairs = 1;
			
 
				+	m_updates_call = 0;
			
 
				+	m_updates_done = 0;
			
 
				+	m_updates_ratio = 0;
			
 
				+	m_paircache = paircache ? paircache : new (b3AlignedAlloc(sizeof(b3HashedOverlappingPairCache), 16)) b3HashedOverlappingPairCache();
			
 
				+
			
 
				+	m_pid = 0;
			
 
				+	m_cid = 0;
			
 
				+	for (int i = 0; i <= STAGECOUNT; ++i)
			
 
				+	{
			
 
				+		m_stageRoots[i] = 0;
			
 
				+	}
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+	b3Clear(m_profiling);
			
 
				+#endif
			
 
				+	m_proxies.resize(proxyCapacity);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+b3DynamicBvhBroadphase::~b3DynamicBvhBroadphase()
			
 
				+{
			
 
				+	if (m_releasepaircache)
			
 
				+	{
			
 
				+		m_paircache->~b3OverlappingPairCache();
			
 
				+		b3AlignedFree(m_paircache);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+b3BroadphaseProxy* b3DynamicBvhBroadphase::createProxy(const b3Vector3& aabbMin,
			
 
				+													   const b3Vector3& aabbMax,
			
 
				+													   int objectId,
			
 
				+													   void* userPtr,
			
 
				+													   int collisionFilterGroup,
			
 
				+													   int collisionFilterMask)
			
 
				+{
			
 
				+	b3DbvtProxy* mem = &m_proxies[objectId];
			
 
				+	b3DbvtProxy* proxy = new (mem) b3DbvtProxy(aabbMin, aabbMax, userPtr,
			
 
				+											   collisionFilterGroup,
			
 
				+											   collisionFilterMask);
			
 
				+
			
 
				+	b3DbvtAabbMm aabb = b3DbvtVolume::FromMM(aabbMin, aabbMax);
			
 
				+
			
 
				+	//bproxy->aabb			=	b3DbvtVolume::FromMM(aabbMin,aabbMax);
			
 
				+	proxy->stage = m_stageCurrent;
			
 
				+	proxy->m_uniqueId = objectId;
			
 
				+	proxy->leaf = m_sets[0].insert(aabb, proxy);
			
 
				+	b3ListAppend(proxy, m_stageRoots[m_stageCurrent]);
			
 
				+	if (!m_deferedcollide)
			
 
				+	{
			
 
				+		b3DbvtTreeCollider collider(this);
			
 
				+		collider.proxy = proxy;
			
 
				+		m_sets[0].collideTV(m_sets[0].m_root, aabb, collider);
			
 
				+		m_sets[1].collideTV(m_sets[1].m_root, aabb, collider);
			
 
				+	}
			
 
				+	return (proxy);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::destroyProxy(b3BroadphaseProxy* absproxy,
			
 
				+										  b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	b3DbvtProxy* proxy = (b3DbvtProxy*)absproxy;
			
 
				+	if (proxy->stage == STAGECOUNT)
			
 
				+		m_sets[1].remove(proxy->leaf);
			
 
				+	else
			
 
				+		m_sets[0].remove(proxy->leaf);
			
 
				+	b3ListRemove(proxy, m_stageRoots[proxy->stage]);
			
 
				+	m_paircache->removeOverlappingPairsContainingProxy(proxy->getUid(), dispatcher);
			
 
				+
			
 
				+	m_needcleanup = true;
			
 
				+}
			
 
				+
			
 
				+void b3DynamicBvhBroadphase::getAabb(int objectId, b3Vector3& aabbMin, b3Vector3& aabbMax) const
			
 
				+{
			
 
				+	const b3DbvtProxy* proxy = &m_proxies[objectId];
			
 
				+	aabbMin = proxy->m_aabbMin;
			
 
				+	aabbMax = proxy->m_aabbMax;
			
 
				+}
			
 
				+/*
			
 
				+void	b3DynamicBvhBroadphase::getAabb(b3BroadphaseProxy* absproxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const
			
 
				+{
			
 
				+	b3DbvtProxy*						proxy=(b3DbvtProxy*)absproxy;
			
 
				+	aabbMin = proxy->m_aabbMin;
			
 
				+	aabbMax = proxy->m_aabbMax;
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+struct BroadphaseRayTester : b3DynamicBvh::ICollide
			
 
				+{
			
 
				+	b3BroadphaseRayCallback& m_rayCallback;
			
 
				+	BroadphaseRayTester(b3BroadphaseRayCallback& orgCallback)
			
 
				+		: m_rayCallback(orgCallback)
			
 
				+	{
			
 
				+	}
			
 
				+	void Process(const b3DbvtNode* leaf)
			
 
				+	{
			
 
				+		b3DbvtProxy* proxy = (b3DbvtProxy*)leaf->data;
			
 
				+		m_rayCallback.process(proxy);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+void b3DynamicBvhBroadphase::rayTest(const b3Vector3& rayFrom, const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax)
			
 
				+{
			
 
				+	BroadphaseRayTester callback(rayCallback);
			
 
				+
			
 
				+	m_sets[0].rayTestInternal(m_sets[0].m_root,
			
 
				+							  rayFrom,
			
 
				+							  rayTo,
			
 
				+							  rayCallback.m_rayDirectionInverse,
			
 
				+							  rayCallback.m_signs,
			
 
				+							  rayCallback.m_lambda_max,
			
 
				+							  aabbMin,
			
 
				+							  aabbMax,
			
 
				+							  callback);
			
 
				+
			
 
				+	m_sets[1].rayTestInternal(m_sets[1].m_root,
			
 
				+							  rayFrom,
			
 
				+							  rayTo,
			
 
				+							  rayCallback.m_rayDirectionInverse,
			
 
				+							  rayCallback.m_signs,
			
 
				+							  rayCallback.m_lambda_max,
			
 
				+							  aabbMin,
			
 
				+							  aabbMax,
			
 
				+							  callback);
			
 
				+}
			
 
				+
			
 
				+struct BroadphaseAabbTester : b3DynamicBvh::ICollide
			
 
				+{
			
 
				+	b3BroadphaseAabbCallback& m_aabbCallback;
			
 
				+	BroadphaseAabbTester(b3BroadphaseAabbCallback& orgCallback)
			
 
				+		: m_aabbCallback(orgCallback)
			
 
				+	{
			
 
				+	}
			
 
				+	void Process(const b3DbvtNode* leaf)
			
 
				+	{
			
 
				+		b3DbvtProxy* proxy = (b3DbvtProxy*)leaf->data;
			
 
				+		m_aabbCallback.process(proxy);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+void b3DynamicBvhBroadphase::aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& aabbCallback)
			
 
				+{
			
 
				+	BroadphaseAabbTester callback(aabbCallback);
			
 
				+
			
 
				+	const B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) bounds = b3DbvtVolume::FromMM(aabbMin, aabbMax);
			
 
				+	//process all children, that overlap with  the given AABB bounds
			
 
				+	m_sets[0].collideTV(m_sets[0].m_root, bounds, callback);
			
 
				+	m_sets[1].collideTV(m_sets[1].m_root, bounds, callback);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::setAabb(int objectId,
			
 
				+									 const b3Vector3& aabbMin,
			
 
				+									 const b3Vector3& aabbMax,
			
 
				+									 b3Dispatcher* /*dispatcher*/)
			
 
				+{
			
 
				+	b3DbvtProxy* proxy = &m_proxies[objectId];
			
 
				+	//	b3DbvtProxy*						proxy=(b3DbvtProxy*)absproxy;
			
 
				+	B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)
			
 
				+	aabb = b3DbvtVolume::FromMM(aabbMin, aabbMax);
			
 
				+#if B3_DBVT_BP_PREVENTFALSEUPDATE
			
 
				+	if (b3NotEqual(aabb, proxy->leaf->volume))
			
 
				+#endif
			
 
				+	{
			
 
				+		bool docollide = false;
			
 
				+		if (proxy->stage == STAGECOUNT)
			
 
				+		{ /* fixed -> dynamic set	*/
			
 
				+			m_sets[1].remove(proxy->leaf);
			
 
				+			proxy->leaf = m_sets[0].insert(aabb, proxy);
			
 
				+			docollide = true;
			
 
				+		}
			
 
				+		else
			
 
				+		{ /* dynamic set				*/
			
 
				+			++m_updates_call;
			
 
				+			if (b3Intersect(proxy->leaf->volume, aabb))
			
 
				+			{ /* Moving				*/
			
 
				+
			
 
				+				const b3Vector3 delta = aabbMin - proxy->m_aabbMin;
			
 
				+				b3Vector3 velocity(((proxy->m_aabbMax - proxy->m_aabbMin) / 2) * m_prediction);
			
 
				+				if (delta[0] < 0) velocity[0] = -velocity[0];
			
 
				+				if (delta[1] < 0) velocity[1] = -velocity[1];
			
 
				+				if (delta[2] < 0) velocity[2] = -velocity[2];
			
 
				+				if (
			
 
				+#ifdef B3_DBVT_BP_MARGIN
			
 
				+					m_sets[0].update(proxy->leaf, aabb, velocity, B3_DBVT_BP_MARGIN)
			
 
				+#else
			
 
				+					m_sets[0].update(proxy->leaf, aabb, velocity)
			
 
				+#endif
			
 
				+				)
			
 
				+				{
			
 
				+					++m_updates_done;
			
 
				+					docollide = true;
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{ /* Teleporting			*/
			
 
				+				m_sets[0].update(proxy->leaf, aabb);
			
 
				+				++m_updates_done;
			
 
				+				docollide = true;
			
 
				+			}
			
 
				+		}
			
 
				+		b3ListRemove(proxy, m_stageRoots[proxy->stage]);
			
 
				+		proxy->m_aabbMin = aabbMin;
			
 
				+		proxy->m_aabbMax = aabbMax;
			
 
				+		proxy->stage = m_stageCurrent;
			
 
				+		b3ListAppend(proxy, m_stageRoots[m_stageCurrent]);
			
 
				+		if (docollide)
			
 
				+		{
			
 
				+			m_needcleanup = true;
			
 
				+			if (!m_deferedcollide)
			
 
				+			{
			
 
				+				b3DbvtTreeCollider collider(this);
			
 
				+				m_sets[1].collideTTpersistentStack(m_sets[1].m_root, proxy->leaf, collider);
			
 
				+				m_sets[0].collideTTpersistentStack(m_sets[0].m_root, proxy->leaf, collider);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::setAabbForceUpdate(b3BroadphaseProxy* absproxy,
			
 
				+												const b3Vector3& aabbMin,
			
 
				+												const b3Vector3& aabbMax,
			
 
				+												b3Dispatcher* /*dispatcher*/)
			
 
				+{
			
 
				+	b3DbvtProxy* proxy = (b3DbvtProxy*)absproxy;
			
 
				+	B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)
			
 
				+	aabb = b3DbvtVolume::FromMM(aabbMin, aabbMax);
			
 
				+	bool docollide = false;
			
 
				+	if (proxy->stage == STAGECOUNT)
			
 
				+	{ /* fixed -> dynamic set	*/
			
 
				+		m_sets[1].remove(proxy->leaf);
			
 
				+		proxy->leaf = m_sets[0].insert(aabb, proxy);
			
 
				+		docollide = true;
			
 
				+	}
			
 
				+	else
			
 
				+	{ /* dynamic set				*/
			
 
				+		++m_updates_call;
			
 
				+		/* Teleporting			*/
			
 
				+		m_sets[0].update(proxy->leaf, aabb);
			
 
				+		++m_updates_done;
			
 
				+		docollide = true;
			
 
				+	}
			
 
				+	b3ListRemove(proxy, m_stageRoots[proxy->stage]);
			
 
				+	proxy->m_aabbMin = aabbMin;
			
 
				+	proxy->m_aabbMax = aabbMax;
			
 
				+	proxy->stage = m_stageCurrent;
			
 
				+	b3ListAppend(proxy, m_stageRoots[m_stageCurrent]);
			
 
				+	if (docollide)
			
 
				+	{
			
 
				+		m_needcleanup = true;
			
 
				+		if (!m_deferedcollide)
			
 
				+		{
			
 
				+			b3DbvtTreeCollider collider(this);
			
 
				+			m_sets[1].collideTTpersistentStack(m_sets[1].m_root, proxy->leaf, collider);
			
 
				+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root, proxy->leaf, collider);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::calculateOverlappingPairs(b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	collide(dispatcher);
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+	if (0 == (m_pid % B3_DBVT_BP_PROFILING_RATE))
			
 
				+	{
			
 
				+		printf("fixed(%u) dynamics(%u) pairs(%u)\r\n", m_sets[1].m_leaves, m_sets[0].m_leaves, m_paircache->getNumOverlappingPairs());
			
 
				+		unsigned int total = m_profiling.m_total;
			
 
				+		if (total <= 0) total = 1;
			
 
				+		printf("ddcollide: %u%% (%uus)\r\n", (50 + m_profiling.m_ddcollide * 100) / total, m_profiling.m_ddcollide / B3_DBVT_BP_PROFILING_RATE);
			
 
				+		printf("fdcollide: %u%% (%uus)\r\n", (50 + m_profiling.m_fdcollide * 100) / total, m_profiling.m_fdcollide / B3_DBVT_BP_PROFILING_RATE);
			
 
				+		printf("cleanup:   %u%% (%uus)\r\n", (50 + m_profiling.m_cleanup * 100) / total, m_profiling.m_cleanup / B3_DBVT_BP_PROFILING_RATE);
			
 
				+		printf("total:     %uus\r\n", total / B3_DBVT_BP_PROFILING_RATE);
			
 
				+		const unsigned long sum = m_profiling.m_ddcollide +
			
 
				+								  m_profiling.m_fdcollide +
			
 
				+								  m_profiling.m_cleanup;
			
 
				+		printf("leaked: %u%% (%uus)\r\n", 100 - ((50 + sum * 100) / total), (total - sum) / B3_DBVT_BP_PROFILING_RATE);
			
 
				+		printf("job counts: %u%%\r\n", (m_profiling.m_jobcount * 100) / ((m_sets[0].m_leaves + m_sets[1].m_leaves) * B3_DBVT_BP_PROFILING_RATE));
			
 
				+		b3Clear(m_profiling);
			
 
				+		m_clock.reset();
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	performDeferredRemoval(dispatcher);
			
 
				+}
			
 
				+
			
 
				+void b3DynamicBvhBroadphase::performDeferredRemoval(b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	if (m_paircache->hasDeferredRemoval())
			
 
				+	{
			
 
				+		b3BroadphasePairArray& overlappingPairArray = m_paircache->getOverlappingPairArray();
			
 
				+
			
 
				+		//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
			
 
				+		overlappingPairArray.quickSort(b3BroadphasePairSortPredicate());
			
 
				+
			
 
				+		int invalidPair = 0;
			
 
				+
			
 
				+		int i;
			
 
				+
			
 
				+		b3BroadphasePair previousPair = b3MakeBroadphasePair(-1, -1);
			
 
				+
			
 
				+		for (i = 0; i < overlappingPairArray.size(); i++)
			
 
				+		{
			
 
				+			b3BroadphasePair& pair = overlappingPairArray[i];
			
 
				+
			
 
				+			bool isDuplicate = (pair == previousPair);
			
 
				+
			
 
				+			previousPair = pair;
			
 
				+
			
 
				+			bool needsRemoval = false;
			
 
				+
			
 
				+			if (!isDuplicate)
			
 
				+			{
			
 
				+				//important to perform AABB check that is consistent with the broadphase
			
 
				+				b3DbvtProxy* pa = &m_proxies[pair.x];
			
 
				+				b3DbvtProxy* pb = &m_proxies[pair.y];
			
 
				+				bool hasOverlap = b3Intersect(pa->leaf->volume, pb->leaf->volume);
			
 
				+
			
 
				+				if (hasOverlap)
			
 
				+				{
			
 
				+					needsRemoval = false;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					needsRemoval = true;
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				//remove duplicate
			
 
				+				needsRemoval = true;
			
 
				+				//should have no algorithm
			
 
				+			}
			
 
				+
			
 
				+			if (needsRemoval)
			
 
				+			{
			
 
				+				m_paircache->cleanOverlappingPair(pair, dispatcher);
			
 
				+
			
 
				+				pair.x = -1;
			
 
				+				pair.y = -1;
			
 
				+				invalidPair++;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		//perform a sort, to sort 'invalid' pairs to the end
			
 
				+		overlappingPairArray.quickSort(b3BroadphasePairSortPredicate());
			
 
				+		overlappingPairArray.resize(overlappingPairArray.size() - invalidPair);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::collide(b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	/*printf("---------------------------------------------------------\n");
			
 
				+	printf("m_sets[0].m_leaves=%d\n",m_sets[0].m_leaves);
			
 
				+	printf("m_sets[1].m_leaves=%d\n",m_sets[1].m_leaves);
			
 
				+	printf("numPairs = %d\n",getOverlappingPairCache()->getNumOverlappingPairs());
			
 
				+	{
			
 
				+		int i;
			
 
				+		for (i=0;i<getOverlappingPairCache()->getNumOverlappingPairs();i++)
			
 
				+		{
			
 
				+			printf("pair[%d]=(%d,%d),",i,getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy0->getUid(),
			
 
				+				getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy1->getUid());
			
 
				+		}
			
 
				+		printf("\n");
			
 
				+	}
			
 
				+*/
			
 
				+
			
 
				+	b3SPC(m_profiling.m_total);
			
 
				+	/* optimize				*/
			
 
				+	m_sets[0].optimizeIncremental(1 + (m_sets[0].m_leaves * m_dupdates) / 100);
			
 
				+	if (m_fixedleft)
			
 
				+	{
			
 
				+		const int count = 1 + (m_sets[1].m_leaves * m_fupdates) / 100;
			
 
				+		m_sets[1].optimizeIncremental(1 + (m_sets[1].m_leaves * m_fupdates) / 100);
			
 
				+		m_fixedleft = b3Max<int>(0, m_fixedleft - count);
			
 
				+	}
			
 
				+	/* dynamic -> fixed set	*/
			
 
				+	m_stageCurrent = (m_stageCurrent + 1) % STAGECOUNT;
			
 
				+	b3DbvtProxy* current = m_stageRoots[m_stageCurrent];
			
 
				+	if (current)
			
 
				+	{
			
 
				+		b3DbvtTreeCollider collider(this);
			
 
				+		do
			
 
				+		{
			
 
				+			b3DbvtProxy* next = current->links[1];
			
 
				+			b3ListRemove(current, m_stageRoots[current->stage]);
			
 
				+			b3ListAppend(current, m_stageRoots[STAGECOUNT]);
			
 
				+#if B3_DBVT_BP_ACCURATESLEEPING
			
 
				+			m_paircache->removeOverlappingPairsContainingProxy(current, dispatcher);
			
 
				+			collider.proxy = current;
			
 
				+			b3DynamicBvh::collideTV(m_sets[0].m_root, current->aabb, collider);
			
 
				+			b3DynamicBvh::collideTV(m_sets[1].m_root, current->aabb, collider);
			
 
				+#endif
			
 
				+			m_sets[0].remove(current->leaf);
			
 
				+			B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)
			
 
				+			curAabb = b3DbvtVolume::FromMM(current->m_aabbMin, current->m_aabbMax);
			
 
				+			current->leaf = m_sets[1].insert(curAabb, current);
			
 
				+			current->stage = STAGECOUNT;
			
 
				+			current = next;
			
 
				+		} while (current);
			
 
				+		m_fixedleft = m_sets[1].m_leaves;
			
 
				+		m_needcleanup = true;
			
 
				+	}
			
 
				+	/* collide dynamics		*/
			
 
				+	{
			
 
				+		b3DbvtTreeCollider collider(this);
			
 
				+		if (m_deferedcollide)
			
 
				+		{
			
 
				+			b3SPC(m_profiling.m_fdcollide);
			
 
				+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root, m_sets[1].m_root, collider);
			
 
				+		}
			
 
				+		if (m_deferedcollide)
			
 
				+		{
			
 
				+			b3SPC(m_profiling.m_ddcollide);
			
 
				+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root, m_sets[0].m_root, collider);
			
 
				+		}
			
 
				+	}
			
 
				+	/* clean up				*/
			
 
				+	if (m_needcleanup)
			
 
				+	{
			
 
				+		b3SPC(m_profiling.m_cleanup);
			
 
				+		b3BroadphasePairArray& pairs = m_paircache->getOverlappingPairArray();
			
 
				+		if (pairs.size() > 0)
			
 
				+		{
			
 
				+			int ni = b3Min(pairs.size(), b3Max<int>(m_newpairs, (pairs.size() * m_cupdates) / 100));
			
 
				+			for (int i = 0; i < ni; ++i)
			
 
				+			{
			
 
				+				b3BroadphasePair& p = pairs[(m_cid + i) % pairs.size()];
			
 
				+				b3DbvtProxy* pa = &m_proxies[p.x];
			
 
				+				b3DbvtProxy* pb = &m_proxies[p.y];
			
 
				+				if (!b3Intersect(pa->leaf->volume, pb->leaf->volume))
			
 
				+				{
			
 
				+#if B3_DBVT_BP_SORTPAIRS
			
 
				+					if (pa->m_uniqueId > pb->m_uniqueId)
			
 
				+						b3Swap(pa, pb);
			
 
				+#endif
			
 
				+					m_paircache->removeOverlappingPair(pa->getUid(), pb->getUid(), dispatcher);
			
 
				+					--ni;
			
 
				+					--i;
			
 
				+				}
			
 
				+			}
			
 
				+			if (pairs.size() > 0)
			
 
				+				m_cid = (m_cid + ni) % pairs.size();
			
 
				+			else
			
 
				+				m_cid = 0;
			
 
				+		}
			
 
				+	}
			
 
				+	++m_pid;
			
 
				+	m_newpairs = 1;
			
 
				+	m_needcleanup = false;
			
 
				+	if (m_updates_call > 0)
			
 
				+	{
			
 
				+		m_updates_ratio = m_updates_done / (b3Scalar)m_updates_call;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		m_updates_ratio = 0;
			
 
				+	}
			
 
				+	m_updates_done /= 2;
			
 
				+	m_updates_call /= 2;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::optimize()
			
 
				+{
			
 
				+	m_sets[0].optimizeTopDown();
			
 
				+	m_sets[1].optimizeTopDown();
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+b3OverlappingPairCache* b3DynamicBvhBroadphase::getOverlappingPairCache()
			
 
				+{
			
 
				+	return (m_paircache);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+const b3OverlappingPairCache* b3DynamicBvhBroadphase::getOverlappingPairCache() const
			
 
				+{
			
 
				+	return (m_paircache);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::getBroadphaseAabb(b3Vector3& aabbMin, b3Vector3& aabbMax) const
			
 
				+{
			
 
				+	B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)
			
 
				+	bounds;
			
 
				+
			
 
				+	if (!m_sets[0].empty())
			
 
				+		if (!m_sets[1].empty())
			
 
				+			b3Merge(m_sets[0].m_root->volume,
			
 
				+					m_sets[1].m_root->volume, bounds);
			
 
				+		else
			
 
				+			bounds = m_sets[0].m_root->volume;
			
 
				+	else if (!m_sets[1].empty())
			
 
				+		bounds = m_sets[1].m_root->volume;
			
 
				+	else
			
 
				+		bounds = b3DbvtVolume::FromCR(b3MakeVector3(0, 0, 0), 0);
			
 
				+	aabbMin = bounds.Mins();
			
 
				+	aabbMax = bounds.Maxs();
			
 
				+}
			
 
				+
			
 
				+void b3DynamicBvhBroadphase::resetPool(b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	int totalObjects = m_sets[0].m_leaves + m_sets[1].m_leaves;
			
 
				+	if (!totalObjects)
			
 
				+	{
			
 
				+		//reset internal dynamic tree data structures
			
 
				+		m_sets[0].clear();
			
 
				+		m_sets[1].clear();
			
 
				+
			
 
				+		m_deferedcollide = false;
			
 
				+		m_needcleanup = true;
			
 
				+		m_stageCurrent = 0;
			
 
				+		m_fixedleft = 0;
			
 
				+		m_fupdates = 1;
			
 
				+		m_dupdates = 0;
			
 
				+		m_cupdates = 10;
			
 
				+		m_newpairs = 1;
			
 
				+		m_updates_call = 0;
			
 
				+		m_updates_done = 0;
			
 
				+		m_updates_ratio = 0;
			
 
				+
			
 
				+		m_pid = 0;
			
 
				+		m_cid = 0;
			
 
				+		for (int i = 0; i <= STAGECOUNT; ++i)
			
 
				+		{
			
 
				+			m_stageRoots[i] = 0;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+void b3DynamicBvhBroadphase::printStats()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+#if B3_DBVT_BP_ENABLE_BENCHMARK
			
 
				+
			
 
				+struct b3BroadphaseBenchmark
			
 
				+{
			
 
				+	struct Experiment
			
 
				+	{
			
 
				+		const char* name;
			
 
				+		int object_count;
			
 
				+		int update_count;
			
 
				+		int spawn_count;
			
 
				+		int iterations;
			
 
				+		b3Scalar speed;
			
 
				+		b3Scalar amplitude;
			
 
				+	};
			
 
				+	struct Object
			
 
				+	{
			
 
				+		b3Vector3 center;
			
 
				+		b3Vector3 extents;
			
 
				+		b3BroadphaseProxy* proxy;
			
 
				+		b3Scalar time;
			
 
				+		void update(b3Scalar speed, b3Scalar amplitude, b3BroadphaseInterface* pbi)
			
 
				+		{
			
 
				+			time += speed;
			
 
				+			center[0] = b3Cos(time * (b3Scalar)2.17) * amplitude +
			
 
				+						b3Sin(time) * amplitude / 2;
			
 
				+			center[1] = b3Cos(time * (b3Scalar)1.38) * amplitude +
			
 
				+						b3Sin(time) * amplitude;
			
 
				+			center[2] = b3Sin(time * (b3Scalar)0.777) * amplitude;
			
 
				+			pbi->setAabb(proxy, center - extents, center + extents, 0);
			
 
				+		}
			
 
				+	};
			
 
				+	static int UnsignedRand(int range = RAND_MAX - 1) { return (rand() % (range + 1)); }
			
 
				+	static b3Scalar UnitRand() { return (UnsignedRand(16384) / (b3Scalar)16384); }
			
 
				+	static void OutputTime(const char* name, b3Clock& c, unsigned count = 0)
			
 
				+	{
			
 
				+		const unsigned long us = c.getTimeMicroseconds();
			
 
				+		const unsigned long ms = (us + 500) / 1000;
			
 
				+		const b3Scalar sec = us / (b3Scalar)(1000 * 1000);
			
 
				+		if (count > 0)
			
 
				+			printf("%s : %u us (%u ms), %.2f/s\r\n", name, us, ms, count / sec);
			
 
				+		else
			
 
				+			printf("%s : %u us (%u ms)\r\n", name, us, ms);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+void b3DynamicBvhBroadphase::benchmark(b3BroadphaseInterface* pbi)
			
 
				+{
			
 
				+	static const b3BroadphaseBenchmark::Experiment experiments[] =
			
 
				+		{
			
 
				+			{"1024o.10%", 1024, 10, 0, 8192, (b3Scalar)0.005, (b3Scalar)100},
			
 
				+			/*{"4096o.10%",4096,10,0,8192,(b3Scalar)0.005,(b3Scalar)100},
			
 
				+		{"8192o.10%",8192,10,0,8192,(b3Scalar)0.005,(b3Scalar)100},*/
			
 
				+		};
			
 
				+	static const int nexperiments = sizeof(experiments) / sizeof(experiments[0]);
			
 
				+	b3AlignedObjectArray<b3BroadphaseBenchmark::Object*> objects;
			
 
				+	b3Clock wallclock;
			
 
				+	/* Begin			*/
			
 
				+	for (int iexp = 0; iexp < nexperiments; ++iexp)
			
 
				+	{
			
 
				+		const b3BroadphaseBenchmark::Experiment& experiment = experiments[iexp];
			
 
				+		const int object_count = experiment.object_count;
			
 
				+		const int update_count = (object_count * experiment.update_count) / 100;
			
 
				+		const int spawn_count = (object_count * experiment.spawn_count) / 100;
			
 
				+		const b3Scalar speed = experiment.speed;
			
 
				+		const b3Scalar amplitude = experiment.amplitude;
			
 
				+		printf("Experiment #%u '%s':\r\n", iexp, experiment.name);
			
 
				+		printf("\tObjects: %u\r\n", object_count);
			
 
				+		printf("\tUpdate: %u\r\n", update_count);
			
 
				+		printf("\tSpawn: %u\r\n", spawn_count);
			
 
				+		printf("\tSpeed: %f\r\n", speed);
			
 
				+		printf("\tAmplitude: %f\r\n", amplitude);
			
 
				+		srand(180673);
			
 
				+		/* Create objects	*/
			
 
				+		wallclock.reset();
			
 
				+		objects.reserve(object_count);
			
 
				+		for (int i = 0; i < object_count; ++i)
			
 
				+		{
			
 
				+			b3BroadphaseBenchmark::Object* po = new b3BroadphaseBenchmark::Object();
			
 
				+			po->center[0] = b3BroadphaseBenchmark::UnitRand() * 50;
			
 
				+			po->center[1] = b3BroadphaseBenchmark::UnitRand() * 50;
			
 
				+			po->center[2] = b3BroadphaseBenchmark::UnitRand() * 50;
			
 
				+			po->extents[0] = b3BroadphaseBenchmark::UnitRand() * 2 + 2;
			
 
				+			po->extents[1] = b3BroadphaseBenchmark::UnitRand() * 2 + 2;
			
 
				+			po->extents[2] = b3BroadphaseBenchmark::UnitRand() * 2 + 2;
			
 
				+			po->time = b3BroadphaseBenchmark::UnitRand() * 2000;
			
 
				+			po->proxy = pbi->createProxy(po->center - po->extents, po->center + po->extents, 0, po, 1, 1, 0, 0);
			
 
				+			objects.push_back(po);
			
 
				+		}
			
 
				+		b3BroadphaseBenchmark::OutputTime("\tInitialization", wallclock);
			
 
				+		/* First update		*/
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < objects.size(); ++i)
			
 
				+		{
			
 
				+			objects[i]->update(speed, amplitude, pbi);
			
 
				+		}
			
 
				+		b3BroadphaseBenchmark::OutputTime("\tFirst update", wallclock);
			
 
				+		/* Updates			*/
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < experiment.iterations; ++i)
			
 
				+		{
			
 
				+			for (int j = 0; j < update_count; ++j)
			
 
				+			{
			
 
				+				objects[j]->update(speed, amplitude, pbi);
			
 
				+			}
			
 
				+			pbi->calculateOverlappingPairs(0);
			
 
				+		}
			
 
				+		b3BroadphaseBenchmark::OutputTime("\tUpdate", wallclock, experiment.iterations);
			
 
				+		/* Clean up			*/
			
 
				+		wallclock.reset();
			
 
				+		for (int i = 0; i < objects.size(); ++i)
			
 
				+		{
			
 
				+			pbi->destroyProxy(objects[i]->proxy, 0);
			
 
				+			delete objects[i];
			
 
				+		}
			
 
				+		objects.resize(0);
			
 
				+		b3BroadphaseBenchmark::OutputTime("\tRelease", wallclock);
			
 
				+	}
			
 
				+}
			
 
				+#else
			
 
				+/*void							b3DynamicBvhBroadphase::benchmark(b3BroadphaseInterface*)
			
 
				+{}
			
 
				+*/
			
 
				+#endif
			
 
				+
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+#undef b3SPC
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h
@@ -0,0 +1,197 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+///b3DynamicBvhBroadphase implementation by Nathanael Presson
			
 
				+#ifndef B3_DBVT_BROADPHASE_H
			
 
				+#define B3_DBVT_BROADPHASE_H
			
 
				+
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+#include "b3BroadphaseCallback.h"
			
 
				+
			
 
				+//
			
 
				+// Compile time config
			
 
				+//
			
 
				+
			
 
				+#define B3_DBVT_BP_PROFILE 0
			
 
				+//#define B3_DBVT_BP_SORTPAIRS				1
			
 
				+#define B3_DBVT_BP_PREVENTFALSEUPDATE 0
			
 
				+#define B3_DBVT_BP_ACCURATESLEEPING 0
			
 
				+#define B3_DBVT_BP_ENABLE_BENCHMARK 0
			
 
				+#define B3_DBVT_BP_MARGIN (b3Scalar)0.05
			
 
				+
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+#define B3_DBVT_BP_PROFILING_RATE 256
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3BroadphaseProxy
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	///optional filtering to cull potential collisions
			
 
				+	enum CollisionFilterGroups
			
 
				+	{
			
 
				+		DefaultFilter = 1,
			
 
				+		StaticFilter = 2,
			
 
				+		KinematicFilter = 4,
			
 
				+		DebrisFilter = 8,
			
 
				+		SensorTrigger = 16,
			
 
				+		CharacterFilter = 32,
			
 
				+		AllFilter = -1  //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger
			
 
				+	};
			
 
				+
			
 
				+	//Usually the client b3CollisionObject or Rigidbody class
			
 
				+	void* m_clientObject;
			
 
				+	int m_collisionFilterGroup;
			
 
				+	int m_collisionFilterMask;
			
 
				+	int m_uniqueId;  //m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc.
			
 
				+
			
 
				+	b3Vector3 m_aabbMin;
			
 
				+	b3Vector3 m_aabbMax;
			
 
				+
			
 
				+	B3_FORCE_INLINE int getUid() const
			
 
				+	{
			
 
				+		return m_uniqueId;
			
 
				+	}
			
 
				+
			
 
				+	//used for memory pools
			
 
				+	b3BroadphaseProxy() : m_clientObject(0)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	b3BroadphaseProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, void* userPtr, int collisionFilterGroup, int collisionFilterMask)
			
 
				+		: m_clientObject(userPtr),
			
 
				+		  m_collisionFilterGroup(collisionFilterGroup),
			
 
				+		  m_collisionFilterMask(collisionFilterMask),
			
 
				+		  m_aabbMin(aabbMin),
			
 
				+		  m_aabbMax(aabbMax)
			
 
				+	{
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+//
			
 
				+// b3DbvtProxy
			
 
				+//
			
 
				+struct b3DbvtProxy : b3BroadphaseProxy
			
 
				+{
			
 
				+	/* Fields		*/
			
 
				+	//b3DbvtAabbMm	aabb;
			
 
				+	b3DbvtNode* leaf;
			
 
				+	b3DbvtProxy* links[2];
			
 
				+	int stage;
			
 
				+	/* ctor			*/
			
 
				+
			
 
				+	explicit b3DbvtProxy() {}
			
 
				+	b3DbvtProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, void* userPtr, int collisionFilterGroup, int collisionFilterMask) : b3BroadphaseProxy(aabbMin, aabbMax, userPtr, collisionFilterGroup, collisionFilterMask)
			
 
				+	{
			
 
				+		links[0] = links[1] = 0;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+typedef b3AlignedObjectArray<b3DbvtProxy*> b3DbvtProxyArray;
			
 
				+
			
 
				+///The b3DynamicBvhBroadphase implements a broadphase using two dynamic AABB bounding volume hierarchies/trees (see b3DynamicBvh).
			
 
				+///One tree is used for static/non-moving objects, and another tree is used for dynamic objects. Objects can move from one tree to the other.
			
 
				+///This is a very fast broadphase, especially for very dynamic worlds where many objects are moving. Its insert/add and remove of objects is generally faster than the sweep and prune broadphases b3AxisSweep3 and b332BitAxisSweep3.
			
 
				+struct b3DynamicBvhBroadphase
			
 
				+{
			
 
				+	/* Config		*/
			
 
				+	enum
			
 
				+	{
			
 
				+		DYNAMIC_SET = 0, /* Dynamic set index	*/
			
 
				+		FIXED_SET = 1,   /* Fixed set index		*/
			
 
				+		STAGECOUNT = 2   /* Number of stages		*/
			
 
				+	};
			
 
				+	/* Fields		*/
			
 
				+	b3DynamicBvh m_sets[2];                     // Dbvt sets
			
 
				+	b3DbvtProxy* m_stageRoots[STAGECOUNT + 1];  // Stages list
			
 
				+
			
 
				+	b3AlignedObjectArray<b3DbvtProxy> m_proxies;
			
 
				+	b3OverlappingPairCache* m_paircache;  // Pair cache
			
 
				+	b3Scalar m_prediction;                // Velocity prediction
			
 
				+	int m_stageCurrent;                   // Current stage
			
 
				+	int m_fupdates;                       // % of fixed updates per frame
			
 
				+	int m_dupdates;                       // % of dynamic updates per frame
			
 
				+	int m_cupdates;                       // % of cleanup updates per frame
			
 
				+	int m_newpairs;                       // Number of pairs created
			
 
				+	int m_fixedleft;                      // Fixed optimization left
			
 
				+	unsigned m_updates_call;              // Number of updates call
			
 
				+	unsigned m_updates_done;              // Number of updates done
			
 
				+	b3Scalar m_updates_ratio;             // m_updates_done/m_updates_call
			
 
				+	int m_pid;                            // Parse id
			
 
				+	int m_cid;                            // Cleanup index
			
 
				+	bool m_releasepaircache;              // Release pair cache on delete
			
 
				+	bool m_deferedcollide;                // Defere dynamic/static collision to collide call
			
 
				+	bool m_needcleanup;                   // Need to run cleanup?
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+	b3Clock m_clock;
			
 
				+	struct
			
 
				+	{
			
 
				+		unsigned long m_total;
			
 
				+		unsigned long m_ddcollide;
			
 
				+		unsigned long m_fdcollide;
			
 
				+		unsigned long m_cleanup;
			
 
				+		unsigned long m_jobcount;
			
 
				+	} m_profiling;
			
 
				+#endif
			
 
				+	/* Methods		*/
			
 
				+	b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache = 0);
			
 
				+	virtual ~b3DynamicBvhBroadphase();
			
 
				+	void collide(b3Dispatcher* dispatcher);
			
 
				+	void optimize();
			
 
				+
			
 
				+	/* b3BroadphaseInterface Implementation	*/
			
 
				+	b3BroadphaseProxy* createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int objectIndex, void* userPtr, int collisionFilterGroup, int collisionFilterMask);
			
 
				+	virtual void destroyProxy(b3BroadphaseProxy* proxy, b3Dispatcher* dispatcher);
			
 
				+	virtual void setAabb(int objectId, const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3Dispatcher* dispatcher);
			
 
				+	virtual void rayTest(const b3Vector3& rayFrom, const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin = b3MakeVector3(0, 0, 0), const b3Vector3& aabbMax = b3MakeVector3(0, 0, 0));
			
 
				+	virtual void aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& callback);
			
 
				+
			
 
				+	//virtual void					getAabb(b3BroadphaseProxy* proxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const;
			
 
				+	virtual void getAabb(int objectId, b3Vector3& aabbMin, b3Vector3& aabbMax) const;
			
 
				+	virtual void calculateOverlappingPairs(b3Dispatcher* dispatcher = 0);
			
 
				+	virtual b3OverlappingPairCache* getOverlappingPairCache();
			
 
				+	virtual const b3OverlappingPairCache* getOverlappingPairCache() const;
			
 
				+	virtual void getBroadphaseAabb(b3Vector3& aabbMin, b3Vector3& aabbMax) const;
			
 
				+	virtual void printStats();
			
 
				+
			
 
				+	///reset broadphase internal structures, to ensure determinism/reproducability
			
 
				+	virtual void resetPool(b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	void performDeferredRemoval(b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	void setVelocityPrediction(b3Scalar prediction)
			
 
				+	{
			
 
				+		m_prediction = prediction;
			
 
				+	}
			
 
				+	b3Scalar getVelocityPrediction() const
			
 
				+	{
			
 
				+		return m_prediction;
			
 
				+	}
			
 
				+
			
 
				+	///this setAabbForceUpdate is similar to setAabb but always forces the aabb update.
			
 
				+	///it is not part of the b3BroadphaseInterface but specific to b3DynamicBvhBroadphase.
			
 
				+	///it bypasses certain optimizations that prevent aabb updates (when the aabb shrinks), see
			
 
				+	///http://code.google.com/p/bullet/issues/detail?id=223
			
 
				+	void setAabbForceUpdate(b3BroadphaseProxy* absproxy, const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3Dispatcher* /*dispatcher*/);
			
 
				+
			
 
				+	//static void						benchmark(b3BroadphaseInterface*);
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h
@@ -0,0 +1,70 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_OVERLAPPING_PAIR_H
			
 
				+#define B3_OVERLAPPING_PAIR_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+
			
 
				+#define B3_NEW_PAIR_MARKER -1
			
 
				+#define B3_REMOVED_PAIR_MARKER -2
			
 
				+
			
 
				+typedef b3Int4 b3BroadphasePair;
			
 
				+
			
 
				+inline b3Int4 b3MakeBroadphasePair(int xx, int yy)
			
 
				+{
			
 
				+	b3Int4 pair;
			
 
				+
			
 
				+	if (xx < yy)
			
 
				+	{
			
 
				+		pair.x = xx;
			
 
				+		pair.y = yy;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		pair.x = yy;
			
 
				+		pair.y = xx;
			
 
				+	}
			
 
				+	pair.z = B3_NEW_PAIR_MARKER;
			
 
				+	pair.w = B3_NEW_PAIR_MARKER;
			
 
				+	return pair;
			
 
				+}
			
 
				+
			
 
				+/*struct b3BroadphasePair : public b3Int4
			
 
				+{
			
 
				+	explicit b3BroadphasePair(){}
			
 
				+	
			
 
				+};
			
 
				+*/
			
 
				+
			
 
				+class b3BroadphasePairSortPredicate
			
 
				+{
			
 
				+public:
			
 
				+	bool operator()(const b3BroadphasePair& a, const b3BroadphasePair& b) const
			
 
				+	{
			
 
				+		const int uidA0 = a.x;
			
 
				+		const int uidB0 = b.x;
			
 
				+		const int uidA1 = a.y;
			
 
				+		const int uidB1 = b.y;
			
 
				+		return uidA0 > uidB0 || (uidA0 == uidB0 && uidA1 > uidB1);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE bool operator==(const b3BroadphasePair& a, const b3BroadphasePair& b)
			
 
				+{
			
 
				+	return (a.x == b.x) && (a.y == b.y);
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_OVERLAPPING_PAIR_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp
@@ -0,0 +1,559 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#include "b3OverlappingPairCache.h"
			
 
				+
			
 
				+//#include "b3Dispatcher.h"
			
 
				+//#include "b3CollisionAlgorithm.h"
			
 
				+#include "Bullet3Geometry/b3AabbUtil.h"
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+int b3g_overlappingPairs = 0;
			
 
				+int b3g_removePairs = 0;
			
 
				+int b3g_addedPairs = 0;
			
 
				+int b3g_findPairs = 0;
			
 
				+
			
 
				+b3HashedOverlappingPairCache::b3HashedOverlappingPairCache() : m_overlapFilterCallback(0)
			
 
				+//,	m_blockedForChanges(false)
			
 
				+{
			
 
				+	int initialAllocatedSize = 2;
			
 
				+	m_overlappingPairArray.reserve(initialAllocatedSize);
			
 
				+	growTables();
			
 
				+}
			
 
				+
			
 
				+b3HashedOverlappingPairCache::~b3HashedOverlappingPairCache()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void b3HashedOverlappingPairCache::cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	/*	if (pair.m_algorithm)
			
 
				+	{
			
 
				+		{
			
 
				+			pair.m_algorithm->~b3CollisionAlgorithm();
			
 
				+			dispatcher->freeCollisionAlgorithm(pair.m_algorithm);
			
 
				+			pair.m_algorithm=0;
			
 
				+		}
			
 
				+	}
			
 
				+	*/
			
 
				+}
			
 
				+
			
 
				+void b3HashedOverlappingPairCache::cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	class CleanPairCallback : public b3OverlapCallback
			
 
				+	{
			
 
				+		int m_cleanProxy;
			
 
				+		b3OverlappingPairCache* m_pairCache;
			
 
				+		b3Dispatcher* m_dispatcher;
			
 
				+
			
 
				+	public:
			
 
				+		CleanPairCallback(int cleanProxy, b3OverlappingPairCache* pairCache, b3Dispatcher* dispatcher)
			
 
				+			: m_cleanProxy(cleanProxy),
			
 
				+			  m_pairCache(pairCache),
			
 
				+			  m_dispatcher(dispatcher)
			
 
				+		{
			
 
				+		}
			
 
				+		virtual bool processOverlap(b3BroadphasePair& pair)
			
 
				+		{
			
 
				+			if ((pair.x == m_cleanProxy) ||
			
 
				+				(pair.y == m_cleanProxy))
			
 
				+			{
			
 
				+				m_pairCache->cleanOverlappingPair(pair, m_dispatcher);
			
 
				+			}
			
 
				+			return false;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	CleanPairCallback cleanPairs(proxy, this, dispatcher);
			
 
				+
			
 
				+	processAllOverlappingPairs(&cleanPairs, dispatcher);
			
 
				+}
			
 
				+
			
 
				+void b3HashedOverlappingPairCache::removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	class RemovePairCallback : public b3OverlapCallback
			
 
				+	{
			
 
				+		int m_obsoleteProxy;
			
 
				+
			
 
				+	public:
			
 
				+		RemovePairCallback(int obsoleteProxy)
			
 
				+			: m_obsoleteProxy(obsoleteProxy)
			
 
				+		{
			
 
				+		}
			
 
				+		virtual bool processOverlap(b3BroadphasePair& pair)
			
 
				+		{
			
 
				+			return ((pair.x == m_obsoleteProxy) ||
			
 
				+					(pair.y == m_obsoleteProxy));
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	RemovePairCallback removeCallback(proxy);
			
 
				+
			
 
				+	processAllOverlappingPairs(&removeCallback, dispatcher);
			
 
				+}
			
 
				+
			
 
				+b3BroadphasePair* b3HashedOverlappingPairCache::findPair(int proxy0, int proxy1)
			
 
				+{
			
 
				+	b3g_findPairs++;
			
 
				+	if (proxy0 > proxy1)
			
 
				+		b3Swap(proxy0, proxy1);
			
 
				+	int proxyId1 = proxy0;
			
 
				+	int proxyId2 = proxy1;
			
 
				+
			
 
				+	/*if (proxyId1 > proxyId2) 
			
 
				+		b3Swap(proxyId1, proxyId2);*/
			
 
				+
			
 
				+	int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity() - 1));
			
 
				+
			
 
				+	if (hash >= m_hashTable.size())
			
 
				+	{
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	int index = m_hashTable[hash];
			
 
				+	while (index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
			
 
				+	{
			
 
				+		index = m_next[index];
			
 
				+	}
			
 
				+
			
 
				+	if (index == B3_NULL_PAIR)
			
 
				+	{
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	b3Assert(index < m_overlappingPairArray.size());
			
 
				+
			
 
				+	return &m_overlappingPairArray[index];
			
 
				+}
			
 
				+
			
 
				+//#include <stdio.h>
			
 
				+
			
 
				+void b3HashedOverlappingPairCache::growTables()
			
 
				+{
			
 
				+	int newCapacity = m_overlappingPairArray.capacity();
			
 
				+
			
 
				+	if (m_hashTable.size() < newCapacity)
			
 
				+	{
			
 
				+		//grow hashtable and next table
			
 
				+		int curHashtableSize = m_hashTable.size();
			
 
				+
			
 
				+		m_hashTable.resize(newCapacity);
			
 
				+		m_next.resize(newCapacity);
			
 
				+
			
 
				+		int i;
			
 
				+
			
 
				+		for (i = 0; i < newCapacity; ++i)
			
 
				+		{
			
 
				+			m_hashTable[i] = B3_NULL_PAIR;
			
 
				+		}
			
 
				+		for (i = 0; i < newCapacity; ++i)
			
 
				+		{
			
 
				+			m_next[i] = B3_NULL_PAIR;
			
 
				+		}
			
 
				+
			
 
				+		for (i = 0; i < curHashtableSize; i++)
			
 
				+		{
			
 
				+			const b3BroadphasePair& pair = m_overlappingPairArray[i];
			
 
				+			int proxyId1 = pair.x;
			
 
				+			int proxyId2 = pair.y;
			
 
				+			/*if (proxyId1 > proxyId2) 
			
 
				+				b3Swap(proxyId1, proxyId2);*/
			
 
				+			int hashValue = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity() - 1));  // New hash value with new mask
			
 
				+			m_next[i] = m_hashTable[hashValue];
			
 
				+			m_hashTable[hashValue] = i;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3BroadphasePair* b3HashedOverlappingPairCache::internalAddPair(int proxy0, int proxy1)
			
 
				+{
			
 
				+	if (proxy0 > proxy1)
			
 
				+		b3Swap(proxy0, proxy1);
			
 
				+	int proxyId1 = proxy0;
			
 
				+	int proxyId2 = proxy1;
			
 
				+
			
 
				+	/*if (proxyId1 > proxyId2) 
			
 
				+		b3Swap(proxyId1, proxyId2);*/
			
 
				+
			
 
				+	int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity() - 1));  // New hash value with new mask
			
 
				+
			
 
				+	b3BroadphasePair* pair = internalFindPair(proxy0, proxy1, hash);
			
 
				+	if (pair != NULL)
			
 
				+	{
			
 
				+		return pair;
			
 
				+	}
			
 
				+	/*for(int i=0;i<m_overlappingPairArray.size();++i)
			
 
				+		{
			
 
				+		if(	(m_overlappingPairArray[i].m_pProxy0==proxy0)&&
			
 
				+			(m_overlappingPairArray[i].m_pProxy1==proxy1))
			
 
				+			{
			
 
				+			printf("Adding duplicated %u<>%u\r\n",proxyId1,proxyId2);
			
 
				+			internalFindPair(proxy0, proxy1, hash);
			
 
				+			}
			
 
				+		}*/
			
 
				+	int count = m_overlappingPairArray.size();
			
 
				+	int oldCapacity = m_overlappingPairArray.capacity();
			
 
				+	pair = &m_overlappingPairArray.expandNonInitializing();
			
 
				+
			
 
				+	//this is where we add an actual pair, so also call the 'ghost'
			
 
				+	//	if (m_ghostPairCallback)
			
 
				+	//		m_ghostPairCallback->addOverlappingPair(proxy0,proxy1);
			
 
				+
			
 
				+	int newCapacity = m_overlappingPairArray.capacity();
			
 
				+
			
 
				+	if (oldCapacity < newCapacity)
			
 
				+	{
			
 
				+		growTables();
			
 
				+		//hash with new capacity
			
 
				+		hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity() - 1));
			
 
				+	}
			
 
				+
			
 
				+	*pair = b3MakeBroadphasePair(proxy0, proxy1);
			
 
				+
			
 
				+	//	pair->m_pProxy0 = proxy0;
			
 
				+	//	pair->m_pProxy1 = proxy1;
			
 
				+	//pair->m_algorithm = 0;
			
 
				+	//pair->m_internalTmpValue = 0;
			
 
				+
			
 
				+	m_next[count] = m_hashTable[hash];
			
 
				+	m_hashTable[hash] = count;
			
 
				+
			
 
				+	return pair;
			
 
				+}
			
 
				+
			
 
				+void* b3HashedOverlappingPairCache::removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	b3g_removePairs++;
			
 
				+	if (proxy0 > proxy1)
			
 
				+		b3Swap(proxy0, proxy1);
			
 
				+	int proxyId1 = proxy0;
			
 
				+	int proxyId2 = proxy1;
			
 
				+
			
 
				+	/*if (proxyId1 > proxyId2) 
			
 
				+		b3Swap(proxyId1, proxyId2);*/
			
 
				+
			
 
				+	int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity() - 1));
			
 
				+
			
 
				+	b3BroadphasePair* pair = internalFindPair(proxy0, proxy1, hash);
			
 
				+	if (pair == NULL)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	cleanOverlappingPair(*pair, dispatcher);
			
 
				+
			
 
				+	int pairIndex = int(pair - &m_overlappingPairArray[0]);
			
 
				+	b3Assert(pairIndex < m_overlappingPairArray.size());
			
 
				+
			
 
				+	// Remove the pair from the hash table.
			
 
				+	int index = m_hashTable[hash];
			
 
				+	b3Assert(index != B3_NULL_PAIR);
			
 
				+
			
 
				+	int previous = B3_NULL_PAIR;
			
 
				+	while (index != pairIndex)
			
 
				+	{
			
 
				+		previous = index;
			
 
				+		index = m_next[index];
			
 
				+	}
			
 
				+
			
 
				+	if (previous != B3_NULL_PAIR)
			
 
				+	{
			
 
				+		b3Assert(m_next[previous] == pairIndex);
			
 
				+		m_next[previous] = m_next[pairIndex];
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		m_hashTable[hash] = m_next[pairIndex];
			
 
				+	}
			
 
				+
			
 
				+	// We now move the last pair into spot of the
			
 
				+	// pair being removed. We need to fix the hash
			
 
				+	// table indices to support the move.
			
 
				+
			
 
				+	int lastPairIndex = m_overlappingPairArray.size() - 1;
			
 
				+
			
 
				+	//if (m_ghostPairCallback)
			
 
				+	//	m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher);
			
 
				+
			
 
				+	// If the removed pair is the last pair, we are done.
			
 
				+	if (lastPairIndex == pairIndex)
			
 
				+	{
			
 
				+		m_overlappingPairArray.pop_back();
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	// Remove the last pair from the hash table.
			
 
				+	const b3BroadphasePair* last = &m_overlappingPairArray[lastPairIndex];
			
 
				+	/* missing swap here too, Nat. */
			
 
				+	int lastHash = static_cast<int>(getHash(static_cast<unsigned int>(last->x), static_cast<unsigned int>(last->y)) & (m_overlappingPairArray.capacity() - 1));
			
 
				+
			
 
				+	index = m_hashTable[lastHash];
			
 
				+	b3Assert(index != B3_NULL_PAIR);
			
 
				+
			
 
				+	previous = B3_NULL_PAIR;
			
 
				+	while (index != lastPairIndex)
			
 
				+	{
			
 
				+		previous = index;
			
 
				+		index = m_next[index];
			
 
				+	}
			
 
				+
			
 
				+	if (previous != B3_NULL_PAIR)
			
 
				+	{
			
 
				+		b3Assert(m_next[previous] == lastPairIndex);
			
 
				+		m_next[previous] = m_next[lastPairIndex];
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		m_hashTable[lastHash] = m_next[lastPairIndex];
			
 
				+	}
			
 
				+
			
 
				+	// Copy the last pair into the remove pair's spot.
			
 
				+	m_overlappingPairArray[pairIndex] = m_overlappingPairArray[lastPairIndex];
			
 
				+
			
 
				+	// Insert the last pair into the hash table
			
 
				+	m_next[pairIndex] = m_hashTable[lastHash];
			
 
				+	m_hashTable[lastHash] = pairIndex;
			
 
				+
			
 
				+	m_overlappingPairArray.pop_back();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+//#include <stdio.h>
			
 
				+
			
 
				+void b3HashedOverlappingPairCache::processAllOverlappingPairs(b3OverlapCallback* callback, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	//	printf("m_overlappingPairArray.size()=%d\n",m_overlappingPairArray.size());
			
 
				+	for (i = 0; i < m_overlappingPairArray.size();)
			
 
				+	{
			
 
				+		b3BroadphasePair* pair = &m_overlappingPairArray[i];
			
 
				+		if (callback->processOverlap(*pair))
			
 
				+		{
			
 
				+			removeOverlappingPair(pair->x, pair->y, dispatcher);
			
 
				+
			
 
				+			b3g_overlappingPairs--;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			i++;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3HashedOverlappingPairCache::sortOverlappingPairs(b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	///need to keep hashmap in sync with pair address, so rebuild all
			
 
				+	b3BroadphasePairArray tmpPairs;
			
 
				+	int i;
			
 
				+	for (i = 0; i < m_overlappingPairArray.size(); i++)
			
 
				+	{
			
 
				+		tmpPairs.push_back(m_overlappingPairArray[i]);
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < tmpPairs.size(); i++)
			
 
				+	{
			
 
				+		removeOverlappingPair(tmpPairs[i].x, tmpPairs[i].y, dispatcher);
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < m_next.size(); i++)
			
 
				+	{
			
 
				+		m_next[i] = B3_NULL_PAIR;
			
 
				+	}
			
 
				+
			
 
				+	tmpPairs.quickSort(b3BroadphasePairSortPredicate());
			
 
				+
			
 
				+	for (i = 0; i < tmpPairs.size(); i++)
			
 
				+	{
			
 
				+		addOverlappingPair(tmpPairs[i].x, tmpPairs[i].y);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void* b3SortedOverlappingPairCache::removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	if (!hasDeferredRemoval())
			
 
				+	{
			
 
				+		b3BroadphasePair findPair = b3MakeBroadphasePair(proxy0, proxy1);
			
 
				+
			
 
				+		int findIndex = m_overlappingPairArray.findLinearSearch(findPair);
			
 
				+		if (findIndex < m_overlappingPairArray.size())
			
 
				+		{
			
 
				+			b3g_overlappingPairs--;
			
 
				+			b3BroadphasePair& pair = m_overlappingPairArray[findIndex];
			
 
				+
			
 
				+			cleanOverlappingPair(pair, dispatcher);
			
 
				+			//if (m_ghostPairCallback)
			
 
				+			//	m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher);
			
 
				+
			
 
				+			m_overlappingPairArray.swap(findIndex, m_overlappingPairArray.capacity() - 1);
			
 
				+			m_overlappingPairArray.pop_back();
			
 
				+			return 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+b3BroadphasePair* b3SortedOverlappingPairCache::addOverlappingPair(int proxy0, int proxy1)
			
 
				+{
			
 
				+	//don't add overlap with own
			
 
				+	b3Assert(proxy0 != proxy1);
			
 
				+
			
 
				+	if (!needsBroadphaseCollision(proxy0, proxy1))
			
 
				+		return 0;
			
 
				+
			
 
				+	b3BroadphasePair* pair = &m_overlappingPairArray.expandNonInitializing();
			
 
				+	*pair = b3MakeBroadphasePair(proxy0, proxy1);
			
 
				+
			
 
				+	b3g_overlappingPairs++;
			
 
				+	b3g_addedPairs++;
			
 
				+
			
 
				+	//	if (m_ghostPairCallback)
			
 
				+	//		m_ghostPairCallback->addOverlappingPair(proxy0, proxy1);
			
 
				+	return pair;
			
 
				+}
			
 
				+
			
 
				+///this findPair becomes really slow. Either sort the list to speedup the query, or
			
 
				+///use a different solution. It is mainly used for Removing overlapping pairs. Removal could be delayed.
			
 
				+///we could keep a linked list in each proxy, and store pair in one of the proxies (with lowest memory address)
			
 
				+///Also we can use a 2D bitmap, which can be useful for a future GPU implementation
			
 
				+b3BroadphasePair* b3SortedOverlappingPairCache::findPair(int proxy0, int proxy1)
			
 
				+{
			
 
				+	if (!needsBroadphaseCollision(proxy0, proxy1))
			
 
				+		return 0;
			
 
				+
			
 
				+	b3BroadphasePair tmpPair = b3MakeBroadphasePair(proxy0, proxy1);
			
 
				+	int findIndex = m_overlappingPairArray.findLinearSearch(tmpPair);
			
 
				+
			
 
				+	if (findIndex < m_overlappingPairArray.size())
			
 
				+	{
			
 
				+		//b3Assert(it != m_overlappingPairSet.end());
			
 
				+		b3BroadphasePair* pair = &m_overlappingPairArray[findIndex];
			
 
				+		return pair;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+//#include <stdio.h>
			
 
				+
			
 
				+void b3SortedOverlappingPairCache::processAllOverlappingPairs(b3OverlapCallback* callback, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < m_overlappingPairArray.size();)
			
 
				+	{
			
 
				+		b3BroadphasePair* pair = &m_overlappingPairArray[i];
			
 
				+		if (callback->processOverlap(*pair))
			
 
				+		{
			
 
				+			cleanOverlappingPair(*pair, dispatcher);
			
 
				+			pair->x = -1;
			
 
				+			pair->y = -1;
			
 
				+			m_overlappingPairArray.swap(i, m_overlappingPairArray.size() - 1);
			
 
				+			m_overlappingPairArray.pop_back();
			
 
				+			b3g_overlappingPairs--;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			i++;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3SortedOverlappingPairCache::b3SortedOverlappingPairCache() : m_blockedForChanges(false),
			
 
				+															   m_hasDeferredRemoval(true),
			
 
				+															   m_overlapFilterCallback(0)
			
 
				+
			
 
				+{
			
 
				+	int initialAllocatedSize = 2;
			
 
				+	m_overlappingPairArray.reserve(initialAllocatedSize);
			
 
				+}
			
 
				+
			
 
				+b3SortedOverlappingPairCache::~b3SortedOverlappingPairCache()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void b3SortedOverlappingPairCache::cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	/*	if (pair.m_algorithm)
			
 
				+	{
			
 
				+		{
			
 
				+			pair.m_algorithm->~b3CollisionAlgorithm();
			
 
				+			dispatcher->freeCollisionAlgorithm(pair.m_algorithm);
			
 
				+			pair.m_algorithm=0;
			
 
				+			b3g_removePairs--;
			
 
				+		}
			
 
				+	}
			
 
				+	*/
			
 
				+}
			
 
				+
			
 
				+void b3SortedOverlappingPairCache::cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	class CleanPairCallback : public b3OverlapCallback
			
 
				+	{
			
 
				+		int m_cleanProxy;
			
 
				+		b3OverlappingPairCache* m_pairCache;
			
 
				+		b3Dispatcher* m_dispatcher;
			
 
				+
			
 
				+	public:
			
 
				+		CleanPairCallback(int cleanProxy, b3OverlappingPairCache* pairCache, b3Dispatcher* dispatcher)
			
 
				+			: m_cleanProxy(cleanProxy),
			
 
				+			  m_pairCache(pairCache),
			
 
				+			  m_dispatcher(dispatcher)
			
 
				+		{
			
 
				+		}
			
 
				+		virtual bool processOverlap(b3BroadphasePair& pair)
			
 
				+		{
			
 
				+			if ((pair.x == m_cleanProxy) ||
			
 
				+				(pair.y == m_cleanProxy))
			
 
				+			{
			
 
				+				m_pairCache->cleanOverlappingPair(pair, m_dispatcher);
			
 
				+			}
			
 
				+			return false;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	CleanPairCallback cleanPairs(proxy, this, dispatcher);
			
 
				+
			
 
				+	processAllOverlappingPairs(&cleanPairs, dispatcher);
			
 
				+}
			
 
				+
			
 
				+void b3SortedOverlappingPairCache::removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	class RemovePairCallback : public b3OverlapCallback
			
 
				+	{
			
 
				+		int m_obsoleteProxy;
			
 
				+
			
 
				+	public:
			
 
				+		RemovePairCallback(int obsoleteProxy)
			
 
				+			: m_obsoleteProxy(obsoleteProxy)
			
 
				+		{
			
 
				+		}
			
 
				+		virtual bool processOverlap(b3BroadphasePair& pair)
			
 
				+		{
			
 
				+			return ((pair.x == m_obsoleteProxy) ||
			
 
				+					(pair.y == m_obsoleteProxy));
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	RemovePairCallback removeCallback(proxy);
			
 
				+
			
 
				+	processAllOverlappingPairs(&removeCallback, dispatcher);
			
 
				+}
			
 
				+
			
 
				+void b3SortedOverlappingPairCache::sortOverlappingPairs(b3Dispatcher* dispatcher)
			
 
				+{
			
 
				+	//should already be sorted
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h
@@ -0,0 +1,427 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_OVERLAPPING_PAIR_CACHE_H
			
 
				+#define B3_OVERLAPPING_PAIR_CACHE_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+class b3Dispatcher;
			
 
				+#include "b3OverlappingPair.h"
			
 
				+
			
 
				+typedef b3AlignedObjectArray<b3BroadphasePair> b3BroadphasePairArray;
			
 
				+
			
 
				+struct b3OverlapCallback
			
 
				+{
			
 
				+	virtual ~b3OverlapCallback()
			
 
				+	{
			
 
				+	}
			
 
				+	//return true for deletion of the pair
			
 
				+	virtual bool processOverlap(b3BroadphasePair& pair) = 0;
			
 
				+};
			
 
				+
			
 
				+struct b3OverlapFilterCallback
			
 
				+{
			
 
				+	virtual ~b3OverlapFilterCallback()
			
 
				+	{
			
 
				+	}
			
 
				+	// return true when pairs need collision
			
 
				+	virtual bool needBroadphaseCollision(int proxy0, int proxy1) const = 0;
			
 
				+};
			
 
				+
			
 
				+extern int b3g_removePairs;
			
 
				+extern int b3g_addedPairs;
			
 
				+extern int b3g_findPairs;
			
 
				+
			
 
				+const int B3_NULL_PAIR = 0xffffffff;
			
 
				+
			
 
				+///The b3OverlappingPairCache provides an interface for overlapping pair management (add, remove, storage), used by the b3BroadphaseInterface broadphases.
			
 
				+///The b3HashedOverlappingPairCache and b3SortedOverlappingPairCache classes are two implementations.
			
 
				+class b3OverlappingPairCache
			
 
				+{
			
 
				+public:
			
 
				+	virtual ~b3OverlappingPairCache() {}  // this is needed so we can get to the derived class destructor
			
 
				+
			
 
				+	virtual b3BroadphasePair* getOverlappingPairArrayPtr() = 0;
			
 
				+
			
 
				+	virtual const b3BroadphasePair* getOverlappingPairArrayPtr() const = 0;
			
 
				+
			
 
				+	virtual b3BroadphasePairArray& getOverlappingPairArray() = 0;
			
 
				+
			
 
				+	virtual void cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+	virtual int getNumOverlappingPairs() const = 0;
			
 
				+
			
 
				+	virtual void cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+	virtual void setOverlapFilterCallback(b3OverlapFilterCallback* callback) = 0;
			
 
				+
			
 
				+	virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+	virtual b3BroadphasePair* findPair(int proxy0, int proxy1) = 0;
			
 
				+
			
 
				+	virtual bool hasDeferredRemoval() = 0;
			
 
				+
			
 
				+	//virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)=0;
			
 
				+
			
 
				+	virtual b3BroadphasePair* addOverlappingPair(int proxy0, int proxy1) = 0;
			
 
				+	virtual void* removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher) = 0;
			
 
				+	virtual void removeOverlappingPairsContainingProxy(int /*proxy0*/, b3Dispatcher* /*dispatcher*/) = 0;
			
 
				+
			
 
				+	virtual void sortOverlappingPairs(b3Dispatcher* dispatcher) = 0;
			
 
				+};
			
 
				+
			
 
				+/// Hash-space based Pair Cache, thanks to Erin Catto, Box2D, http://www.box2d.org, and Pierre Terdiman, Codercorner, http://codercorner.com
			
 
				+class b3HashedOverlappingPairCache : public b3OverlappingPairCache
			
 
				+{
			
 
				+	b3BroadphasePairArray m_overlappingPairArray;
			
 
				+	b3OverlapFilterCallback* m_overlapFilterCallback;
			
 
				+	//	bool		m_blockedForChanges;
			
 
				+
			
 
				+public:
			
 
				+	b3HashedOverlappingPairCache();
			
 
				+	virtual ~b3HashedOverlappingPairCache();
			
 
				+
			
 
				+	virtual void removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	virtual void* removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	B3_FORCE_INLINE bool needsBroadphaseCollision(int proxy0, int proxy1) const
			
 
				+	{
			
 
				+		if (m_overlapFilterCallback)
			
 
				+			return m_overlapFilterCallback->needBroadphaseCollision(proxy0, proxy1);
			
 
				+
			
 
				+		bool collides = true;  //(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
			
 
				+		//collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
			
 
				+
			
 
				+		return collides;
			
 
				+	}
			
 
				+
			
 
				+	// Add a pair and return the new pair. If the pair already exists,
			
 
				+	// no new pair is created and the old one is returned.
			
 
				+	virtual b3BroadphasePair* addOverlappingPair(int proxy0, int proxy1)
			
 
				+	{
			
 
				+		b3g_addedPairs++;
			
 
				+
			
 
				+		if (!needsBroadphaseCollision(proxy0, proxy1))
			
 
				+			return 0;
			
 
				+
			
 
				+		return internalAddPair(proxy0, proxy1);
			
 
				+	}
			
 
				+
			
 
				+	void cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	virtual b3BroadphasePair* getOverlappingPairArrayPtr()
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+
			
 
				+	const b3BroadphasePair* getOverlappingPairArrayPtr() const
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+
			
 
				+	b3BroadphasePairArray& getOverlappingPairArray()
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	const b3BroadphasePairArray& getOverlappingPairArray() const
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	void cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	b3BroadphasePair* findPair(int proxy0, int proxy1);
			
 
				+
			
 
				+	int GetCount() const { return m_overlappingPairArray.size(); }
			
 
				+	//	b3BroadphasePair* GetPairs() { return m_pairs; }
			
 
				+
			
 
				+	b3OverlapFilterCallback* getOverlapFilterCallback()
			
 
				+	{
			
 
				+		return m_overlapFilterCallback;
			
 
				+	}
			
 
				+
			
 
				+	void setOverlapFilterCallback(b3OverlapFilterCallback* callback)
			
 
				+	{
			
 
				+		m_overlapFilterCallback = callback;
			
 
				+	}
			
 
				+
			
 
				+	int getNumOverlappingPairs() const
			
 
				+	{
			
 
				+		return m_overlappingPairArray.size();
			
 
				+	}
			
 
				+
			
 
				+private:
			
 
				+	b3BroadphasePair* internalAddPair(int proxy0, int proxy1);
			
 
				+
			
 
				+	void growTables();
			
 
				+
			
 
				+	B3_FORCE_INLINE bool equalsPair(const b3BroadphasePair& pair, int proxyId1, int proxyId2)
			
 
				+	{
			
 
				+		return pair.x == proxyId1 && pair.y == proxyId2;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	// Thomas Wang's hash, see: http://www.concentric.net/~Ttwang/tech/inthash.htm
			
 
				+	// This assumes proxyId1 and proxyId2 are 16-bit.
			
 
				+	B3_FORCE_INLINE int getHash(int proxyId1, int proxyId2)
			
 
				+	{
			
 
				+		int key = (proxyId2 << 16) | proxyId1;
			
 
				+		key = ~key + (key << 15);
			
 
				+		key = key ^ (key >> 12);
			
 
				+		key = key + (key << 2);
			
 
				+		key = key ^ (key >> 4);
			
 
				+		key = key * 2057;
			
 
				+		key = key ^ (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+	B3_FORCE_INLINE unsigned int getHash(unsigned int proxyId1, unsigned int proxyId2)
			
 
				+	{
			
 
				+		int key = static_cast<int>(((unsigned int)proxyId1) | (((unsigned int)proxyId2) << 16));
			
 
				+		// Thomas Wang's hash
			
 
				+
			
 
				+		key += ~(key << 15);
			
 
				+		key ^= (key >> 10);
			
 
				+		key += (key << 3);
			
 
				+		key ^= (key >> 6);
			
 
				+		key += ~(key << 11);
			
 
				+		key ^= (key >> 16);
			
 
				+		return static_cast<unsigned int>(key);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3BroadphasePair* internalFindPair(int proxy0, int proxy1, int hash)
			
 
				+	{
			
 
				+		int proxyId1 = proxy0;
			
 
				+		int proxyId2 = proxy1;
			
 
				+#if 0  // wrong, 'equalsPair' use unsorted uids, copy-past devil striked again. Nat.
			
 
				+		if (proxyId1 > proxyId2) 
			
 
				+			b3Swap(proxyId1, proxyId2);
			
 
				+#endif
			
 
				+
			
 
				+		int index = m_hashTable[hash];
			
 
				+
			
 
				+		while (index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
			
 
				+		{
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+
			
 
				+		if (index == B3_NULL_PAIR)
			
 
				+		{
			
 
				+			return NULL;
			
 
				+		}
			
 
				+
			
 
				+		b3Assert(index < m_overlappingPairArray.size());
			
 
				+
			
 
				+		return &m_overlappingPairArray[index];
			
 
				+	}
			
 
				+
			
 
				+	virtual bool hasDeferredRemoval()
			
 
				+	{
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	/*	virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)
			
 
				+	{
			
 
				+		m_ghostPairCallback = ghostPairCallback;
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+	virtual void sortOverlappingPairs(b3Dispatcher* dispatcher);
			
 
				+
			
 
				+protected:
			
 
				+	b3AlignedObjectArray<int> m_hashTable;
			
 
				+	b3AlignedObjectArray<int> m_next;
			
 
				+	//	b3OverlappingPairCallback*	m_ghostPairCallback;
			
 
				+};
			
 
				+
			
 
				+///b3SortedOverlappingPairCache maintains the objects with overlapping AABB
			
 
				+///Typically managed by the Broadphase, Axis3Sweep or b3SimpleBroadphase
			
 
				+class b3SortedOverlappingPairCache : public b3OverlappingPairCache
			
 
				+{
			
 
				+protected:
			
 
				+	//avoid brute-force finding all the time
			
 
				+	b3BroadphasePairArray m_overlappingPairArray;
			
 
				+
			
 
				+	//during the dispatch, check that user doesn't destroy/create proxy
			
 
				+	bool m_blockedForChanges;
			
 
				+
			
 
				+	///by default, do the removal during the pair traversal
			
 
				+	bool m_hasDeferredRemoval;
			
 
				+
			
 
				+	//if set, use the callback instead of the built in filter in needBroadphaseCollision
			
 
				+	b3OverlapFilterCallback* m_overlapFilterCallback;
			
 
				+
			
 
				+	//		b3OverlappingPairCallback*	m_ghostPairCallback;
			
 
				+
			
 
				+public:
			
 
				+	b3SortedOverlappingPairCache();
			
 
				+	virtual ~b3SortedOverlappingPairCache();
			
 
				+
			
 
				+	virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	void* removeOverlappingPair(int proxy0, int proxy1, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	void cleanOverlappingPair(b3BroadphasePair& pair, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	b3BroadphasePair* addOverlappingPair(int proxy0, int proxy1);
			
 
				+
			
 
				+	b3BroadphasePair* findPair(int proxy0, int proxy1);
			
 
				+
			
 
				+	void cleanProxyFromPairs(int proxy, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	virtual void removeOverlappingPairsContainingProxy(int proxy, b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	inline bool needsBroadphaseCollision(int proxy0, int proxy1) const
			
 
				+	{
			
 
				+		if (m_overlapFilterCallback)
			
 
				+			return m_overlapFilterCallback->needBroadphaseCollision(proxy0, proxy1);
			
 
				+
			
 
				+		bool collides = true;  //(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
			
 
				+		//collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
			
 
				+
			
 
				+		return collides;
			
 
				+	}
			
 
				+
			
 
				+	b3BroadphasePairArray& getOverlappingPairArray()
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	const b3BroadphasePairArray& getOverlappingPairArray() const
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	b3BroadphasePair* getOverlappingPairArrayPtr()
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+
			
 
				+	const b3BroadphasePair* getOverlappingPairArrayPtr() const
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+
			
 
				+	int getNumOverlappingPairs() const
			
 
				+	{
			
 
				+		return m_overlappingPairArray.size();
			
 
				+	}
			
 
				+
			
 
				+	b3OverlapFilterCallback* getOverlapFilterCallback()
			
 
				+	{
			
 
				+		return m_overlapFilterCallback;
			
 
				+	}
			
 
				+
			
 
				+	void setOverlapFilterCallback(b3OverlapFilterCallback* callback)
			
 
				+	{
			
 
				+		m_overlapFilterCallback = callback;
			
 
				+	}
			
 
				+
			
 
				+	virtual bool hasDeferredRemoval()
			
 
				+	{
			
 
				+		return m_hasDeferredRemoval;
			
 
				+	}
			
 
				+
			
 
				+	/*		virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)
			
 
				+		{
			
 
				+			m_ghostPairCallback = ghostPairCallback;
			
 
				+		}
			
 
				+		*/
			
 
				+	virtual void sortOverlappingPairs(b3Dispatcher* dispatcher);
			
 
				+};
			
 
				+
			
 
				+///b3NullPairCache skips add/removal of overlapping pairs. Userful for benchmarking and unit testing.
			
 
				+class b3NullPairCache : public b3OverlappingPairCache
			
 
				+{
			
 
				+	b3BroadphasePairArray m_overlappingPairArray;
			
 
				+
			
 
				+public:
			
 
				+	virtual b3BroadphasePair* getOverlappingPairArrayPtr()
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+	const b3BroadphasePair* getOverlappingPairArrayPtr() const
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+	b3BroadphasePairArray& getOverlappingPairArray()
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	virtual void cleanOverlappingPair(b3BroadphasePair& /*pair*/, b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual int getNumOverlappingPairs() const
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual void cleanProxyFromPairs(int /*proxy*/, b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual void setOverlapFilterCallback(b3OverlapFilterCallback* /*callback*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual void processAllOverlappingPairs(b3OverlapCallback*, b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual b3BroadphasePair* findPair(int /*proxy0*/, int /*proxy1*/)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual bool hasDeferredRemoval()
			
 
				+	{
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//	virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* /* ghostPairCallback */)
			
 
				+	//	{
			
 
				+	//
			
 
				+	//	}
			
 
				+
			
 
				+	virtual b3BroadphasePair* addOverlappingPair(int /*proxy0*/, int /*proxy1*/)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual void* removeOverlappingPair(int /*proxy0*/, int /*proxy1*/, b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual void removeOverlappingPairsContainingProxy(int /*proxy0*/, b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual void sortOverlappingPairs(b3Dispatcher* dispatcher)
			
 
				+	{
			
 
				+		(void)dispatcher;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_OVERLAPPING_PAIR_CACHE_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h
@@ -0,0 +1,56 @@
 
				+
			
 
				+#ifndef B3_AABB_H
			
 
				+#define B3_AABB_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Mat3x3.h"
			
 
				+
			
 
				+typedef struct b3Aabb b3Aabb_t;
			
 
				+
			
 
				+struct b3Aabb
			
 
				+{
			
 
				+	union {
			
 
				+		float m_min[4];
			
 
				+		b3Float4 m_minVec;
			
 
				+		int m_minIndices[4];
			
 
				+	};
			
 
				+	union {
			
 
				+		float m_max[4];
			
 
				+		b3Float4 m_maxVec;
			
 
				+		int m_signedMaxIndices[4];
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+inline void b3TransformAabb2(b3Float4ConstArg localAabbMin, b3Float4ConstArg localAabbMax, float margin,
			
 
				+							 b3Float4ConstArg pos,
			
 
				+							 b3QuatConstArg orn,
			
 
				+							 b3Float4* aabbMinOut, b3Float4* aabbMaxOut)
			
 
				+{
			
 
				+	b3Float4 localHalfExtents = 0.5f * (localAabbMax - localAabbMin);
			
 
				+	localHalfExtents += b3MakeFloat4(margin, margin, margin, 0.f);
			
 
				+	b3Float4 localCenter = 0.5f * (localAabbMax + localAabbMin);
			
 
				+	b3Mat3x3 m;
			
 
				+	m = b3QuatGetRotationMatrix(orn);
			
 
				+	b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);
			
 
				+	b3Float4 center = b3TransformPoint(localCenter, pos, orn);
			
 
				+
			
 
				+	b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents, b3GetRow(abs_b, 0)),
			
 
				+								   b3Dot3F4(localHalfExtents, b3GetRow(abs_b, 1)),
			
 
				+								   b3Dot3F4(localHalfExtents, b3GetRow(abs_b, 2)),
			
 
				+								   0.f);
			
 
				+	*aabbMinOut = center - extent;
			
 
				+	*aabbMaxOut = center + extent;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between two aabbs
			
 
				+inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1, b3Float4ConstArg aabbMax1,
			
 
				+								  b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;
			
 
				+	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;
			
 
				+	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_AABB_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/CMakeLists.txt
+++ b/Dependencies/include/bullet3/Bullet3Collision/CMakeLists.txt
@@ -0,0 +1,93 @@
 
				+
			
 
				+INCLUDE_DIRECTORIES(
			
 
				+	${BULLET_PHYSICS_SOURCE_DIR}/src
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Collision_SRCS
			
 
				+	BroadPhaseCollision/b3DynamicBvh.cpp
			
 
				+	BroadPhaseCollision/b3DynamicBvhBroadphase.cpp
			
 
				+	BroadPhaseCollision/b3OverlappingPairCache.cpp
			
 
				+	NarrowPhaseCollision/b3ConvexUtility.cpp
			
 
				+	NarrowPhaseCollision/b3CpuNarrowPhase.cpp
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3CollisionBroadPhase_HDRS
			
 
				+	BroadPhaseCollision/b3BroadphaseCallback.h
			
 
				+	BroadPhaseCollision/b3DynamicBvh.h
			
 
				+	BroadPhaseCollision/b3DynamicBvhBroadphase.h
			
 
				+	BroadPhaseCollision/b3OverlappingPair.h
			
 
				+	BroadPhaseCollision/b3OverlappingPairCache.h
			
 
				+)
			
 
				+SET(Bullet3CollisionBroadPhaseShared_HDRS
			
 
				+	BroadPhaseCollision/shared/b3Aabb.h
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3CollisionNarrowPhase_HDRS
			
 
				+	NarrowPhaseCollision/b3Config.h
			
 
				+	NarrowPhaseCollision/b3Contact4.h
			
 
				+	NarrowPhaseCollision/b3ConvexUtility.h
			
 
				+	NarrowPhaseCollision/b3CpuNarrowPhase.h
			
 
				+	NarrowPhaseCollision/b3RaycastInfo.h
			
 
				+	NarrowPhaseCollision/b3RigidBodyCL.h
			
 
				+)
			
 
				+SET(Bullet3CollisionNarrowPhaseShared_HDRS
			
 
				+
			
 
				+	NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h
			
 
				+	NarrowPhaseCollision/shared/b3BvhTraversal.h
			
 
				+	NarrowPhaseCollision/shared/b3ClipFaces.h
			
 
				+	NarrowPhaseCollision/shared/b3Collidable.h
			
 
				+	NarrowPhaseCollision/shared/b3Contact4Data.h
			
 
				+	NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h
			
 
				+	NarrowPhaseCollision/shared/b3ContactSphereSphere.h
			
 
				+	NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h
			
 
				+	NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h
			
 
				+	NarrowPhaseCollision/shared/b3FindSeparatingAxis.h
			
 
				+	NarrowPhaseCollision/shared/b3MprPenetration.h
			
 
				+	NarrowPhaseCollision/shared/b3NewContactReduction.h
			
 
				+	NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h
			
 
				+	NarrowPhaseCollision/shared/b3ReduceContacts.h
			
 
				+	NarrowPhaseCollision/shared/b3RigidBodyData.h
			
 
				+	NarrowPhaseCollision/shared/b3UpdateAabbs.h
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Collision_HDRS
			
 
				+	${Bullet3CollisionBroadPhase_HDRS}
			
 
				+	${Bullet3CollisionBroadPhaseShared_HDRS}
			
 
				+	${Bullet3CollisionNarrowPhaseShared_HDRS}
			
 
				+	${Bullet3CollisionNarrowPhase_HDRS}
			
 
				+)
			
 
				+
			
 
				+ADD_LIBRARY(Bullet3Collision ${Bullet3Collision_SRCS} ${Bullet3Collision_HDRS})
			
 
				+if (BUILD_SHARED_LIBS)
			
 
				+  target_link_libraries(Bullet3Collision Bullet3Geometry)
			
 
				+endif ()
			
 
				+SET_TARGET_PROPERTIES(Bullet3Collision PROPERTIES VERSION ${BULLET_VERSION})
			
 
				+SET_TARGET_PROPERTIES(Bullet3Collision PROPERTIES SOVERSION ${BULLET_VERSION})
			
 
				+
			
 
				+IF (INSTALL_LIBS)
			
 
				+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+		#FILES_MATCHING requires CMake 2.6
			
 
				+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Collision DESTINATION .)
			
 
				+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Collision
			
 
				+					RUNTIME DESTINATION bin
			
 
				+					LIBRARY DESTINATION lib${LIB_SUFFIX}
			
 
				+					ARCHIVE DESTINATION lib${LIB_SUFFIX})
			
 
				+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
			
 
				+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
			
 
				+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
			
 
				+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+
			
 
				+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Collision PROPERTIES FRAMEWORK true)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Collision PROPERTIES PUBLIC_HEADER "${Bullet3Collision_HDRS}")
			
 
				+			# Have to list out sub-directories manually:
			
 
				+			#todo
			
 
				+			#SET_PROPERTY(SOURCE ${Bullet3CollisionBroadPhase_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/BroadPhaseCollision)
			
 
				+
			
 
				+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+ENDIF (INSTALL_LIBS)
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3Config.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3Config.h
@@ -0,0 +1,39 @@
 
				+#ifndef B3_CONFIG_H
			
 
				+#define B3_CONFIG_H
			
 
				+
			
 
				+struct b3Config
			
 
				+{
			
 
				+	int m_maxConvexBodies;
			
 
				+	int m_maxConvexShapes;
			
 
				+	int m_maxBroadphasePairs;
			
 
				+	int m_maxContactCapacity;
			
 
				+	int m_compoundPairCapacity;
			
 
				+
			
 
				+	int m_maxVerticesPerFace;
			
 
				+	int m_maxFacesPerShape;
			
 
				+	int m_maxConvexVertices;
			
 
				+	int m_maxConvexIndices;
			
 
				+	int m_maxConvexUniqueEdges;
			
 
				+
			
 
				+	int m_maxCompoundChildShapes;
			
 
				+
			
 
				+	int m_maxTriConvexPairCapacity;
			
 
				+
			
 
				+	b3Config()
			
 
				+		: m_maxConvexBodies(128 * 1024),
			
 
				+		  m_maxVerticesPerFace(64),
			
 
				+		  m_maxFacesPerShape(12),
			
 
				+		  m_maxConvexVertices(8192),
			
 
				+		  m_maxConvexIndices(81920),
			
 
				+		  m_maxConvexUniqueEdges(8192),
			
 
				+		  m_maxCompoundChildShapes(8192),
			
 
				+		  m_maxTriConvexPairCapacity(256 * 1024)
			
 
				+	{
			
 
				+		m_maxConvexShapes = m_maxConvexBodies;
			
 
				+		m_maxBroadphasePairs = 16 * m_maxConvexBodies;
			
 
				+		m_maxContactCapacity = m_maxBroadphasePairs;
			
 
				+		m_compoundPairCapacity = 1024 * 1024;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CONFIG_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h
@@ -0,0 +1,55 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONTACT4_H
			
 
				+#define B3_CONTACT4_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3Contact4 : public b3Contact4Data
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	int getBodyA() const { return abs(m_bodyAPtrAndSignBit); }
			
 
				+	int getBodyB() const { return abs(m_bodyBPtrAndSignBit); }
			
 
				+	bool isBodyAFixed() const { return m_bodyAPtrAndSignBit < 0; }
			
 
				+	bool isBodyBFixed() const { return m_bodyBPtrAndSignBit < 0; }
			
 
				+	//	todo. make it safer
			
 
				+	int& getBatchIdx() { return m_batchIdx; }
			
 
				+	const int& getBatchIdx() const { return m_batchIdx; }
			
 
				+	float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp / (float)0xffff); }
			
 
				+	void setRestituitionCoeff(float c)
			
 
				+	{
			
 
				+		b3Assert(c >= 0.f && c <= 1.f);
			
 
				+		m_restituitionCoeffCmp = (unsigned short)(c * 0xffff);
			
 
				+	}
			
 
				+	float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp / (float)0xffff); }
			
 
				+	void setFrictionCoeff(float c)
			
 
				+	{
			
 
				+		b3Assert(c >= 0.f && c <= 1.f);
			
 
				+		m_frictionCoeffCmp = (unsigned short)(c * 0xffff);
			
 
				+	}
			
 
				+
			
 
				+	//float& getNPoints() { return m_worldNormal[3]; }
			
 
				+	int getNPoints() const { return (int)m_worldNormalOnB.w; }
			
 
				+
			
 
				+	float getPenetration(int idx) const { return m_worldPosB[idx].w; }
			
 
				+
			
 
				+	bool isInvalid() const { return (getBodyA() == 0 || getBodyB() == 0); }
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CONTACT4_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp
@@ -0,0 +1,500 @@
 
				+/*
			
 
				+Copyright (c) 2012 Advanced Micro Devices, Inc.  
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Originally written by Erwin Coumans
			
 
				+
			
 
				+#include "b3ConvexUtility.h"
			
 
				+#include "Bullet3Geometry/b3ConvexHullComputer.h"
			
 
				+#include "Bullet3Geometry/b3GrahamScan2dConvexHull.h"
			
 
				+#include "Bullet3Common/b3Quaternion.h"
			
 
				+#include "Bullet3Common/b3HashMap.h"
			
 
				+
			
 
				+b3ConvexUtility::~b3ConvexUtility()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices, int numPoints, bool mergeCoplanarTriangles)
			
 
				+{
			
 
				+	b3ConvexHullComputer conv;
			
 
				+	conv.compute(&orgVertices[0].getX(), sizeof(b3Vector3), numPoints, 0.f, 0.f);
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Vector3> faceNormals;
			
 
				+	int numFaces = conv.faces.size();
			
 
				+	faceNormals.resize(numFaces);
			
 
				+	b3ConvexHullComputer* convexUtil = &conv;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3MyFace> tmpFaces;
			
 
				+	tmpFaces.resize(numFaces);
			
 
				+
			
 
				+	int numVertices = convexUtil->vertices.size();
			
 
				+	m_vertices.resize(numVertices);
			
 
				+	for (int p = 0; p < numVertices; p++)
			
 
				+	{
			
 
				+		m_vertices[p] = convexUtil->vertices[p];
			
 
				+	}
			
 
				+
			
 
				+	for (int i = 0; i < numFaces; i++)
			
 
				+	{
			
 
				+		int face = convexUtil->faces[i];
			
 
				+		//printf("face=%d\n",face);
			
 
				+		const b3ConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face];
			
 
				+		const b3ConvexHullComputer::Edge* edge = firstEdge;
			
 
				+
			
 
				+		b3Vector3 edges[3];
			
 
				+		int numEdges = 0;
			
 
				+		//compute face normals
			
 
				+
			
 
				+		do
			
 
				+		{
			
 
				+			int src = edge->getSourceVertex();
			
 
				+			tmpFaces[i].m_indices.push_back(src);
			
 
				+			int targ = edge->getTargetVertex();
			
 
				+			b3Vector3 wa = convexUtil->vertices[src];
			
 
				+
			
 
				+			b3Vector3 wb = convexUtil->vertices[targ];
			
 
				+			b3Vector3 newEdge = wb - wa;
			
 
				+			newEdge.normalize();
			
 
				+			if (numEdges < 2)
			
 
				+				edges[numEdges++] = newEdge;
			
 
				+
			
 
				+			edge = edge->getNextEdgeOfFace();
			
 
				+		} while (edge != firstEdge);
			
 
				+
			
 
				+		b3Scalar planeEq = 1e30f;
			
 
				+
			
 
				+		if (numEdges == 2)
			
 
				+		{
			
 
				+			faceNormals[i] = edges[0].cross(edges[1]);
			
 
				+			faceNormals[i].normalize();
			
 
				+			tmpFaces[i].m_plane[0] = faceNormals[i].getX();
			
 
				+			tmpFaces[i].m_plane[1] = faceNormals[i].getY();
			
 
				+			tmpFaces[i].m_plane[2] = faceNormals[i].getZ();
			
 
				+			tmpFaces[i].m_plane[3] = planeEq;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			b3Assert(0);  //degenerate?
			
 
				+			faceNormals[i].setZero();
			
 
				+		}
			
 
				+
			
 
				+		for (int v = 0; v < tmpFaces[i].m_indices.size(); v++)
			
 
				+		{
			
 
				+			b3Scalar eq = m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]);
			
 
				+			if (planeEq > eq)
			
 
				+			{
			
 
				+				planeEq = eq;
			
 
				+			}
			
 
				+		}
			
 
				+		tmpFaces[i].m_plane[3] = -planeEq;
			
 
				+	}
			
 
				+
			
 
				+	//merge coplanar faces and copy them to m_polyhedron
			
 
				+
			
 
				+	b3Scalar faceWeldThreshold = 0.999f;
			
 
				+	b3AlignedObjectArray<int> todoFaces;
			
 
				+	for (int i = 0; i < tmpFaces.size(); i++)
			
 
				+		todoFaces.push_back(i);
			
 
				+
			
 
				+	while (todoFaces.size())
			
 
				+	{
			
 
				+		b3AlignedObjectArray<int> coplanarFaceGroup;
			
 
				+		int refFace = todoFaces[todoFaces.size() - 1];
			
 
				+
			
 
				+		coplanarFaceGroup.push_back(refFace);
			
 
				+		b3MyFace& faceA = tmpFaces[refFace];
			
 
				+		todoFaces.pop_back();
			
 
				+
			
 
				+		b3Vector3 faceNormalA = b3MakeVector3(faceA.m_plane[0], faceA.m_plane[1], faceA.m_plane[2]);
			
 
				+		for (int j = todoFaces.size() - 1; j >= 0; j--)
			
 
				+		{
			
 
				+			int i = todoFaces[j];
			
 
				+			b3MyFace& faceB = tmpFaces[i];
			
 
				+			b3Vector3 faceNormalB = b3MakeVector3(faceB.m_plane[0], faceB.m_plane[1], faceB.m_plane[2]);
			
 
				+			if (faceNormalA.dot(faceNormalB) > faceWeldThreshold)
			
 
				+			{
			
 
				+				coplanarFaceGroup.push_back(i);
			
 
				+				todoFaces.remove(i);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		bool did_merge = false;
			
 
				+		if (coplanarFaceGroup.size() > 1)
			
 
				+		{
			
 
				+			//do the merge: use Graham Scan 2d convex hull
			
 
				+
			
 
				+			b3AlignedObjectArray<b3GrahamVector3> orgpoints;
			
 
				+			b3Vector3 averageFaceNormal = b3MakeVector3(0, 0, 0);
			
 
				+
			
 
				+			for (int i = 0; i < coplanarFaceGroup.size(); i++)
			
 
				+			{
			
 
				+				//				m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
			
 
				+
			
 
				+				b3MyFace& face = tmpFaces[coplanarFaceGroup[i]];
			
 
				+				b3Vector3 faceNormal = b3MakeVector3(face.m_plane[0], face.m_plane[1], face.m_plane[2]);
			
 
				+				averageFaceNormal += faceNormal;
			
 
				+				for (int f = 0; f < face.m_indices.size(); f++)
			
 
				+				{
			
 
				+					int orgIndex = face.m_indices[f];
			
 
				+					b3Vector3 pt = m_vertices[orgIndex];
			
 
				+
			
 
				+					bool found = false;
			
 
				+
			
 
				+					for (int i = 0; i < orgpoints.size(); i++)
			
 
				+					{
			
 
				+						//if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001))
			
 
				+						if (orgpoints[i].m_orgIndex == orgIndex)
			
 
				+						{
			
 
				+							found = true;
			
 
				+							break;
			
 
				+						}
			
 
				+					}
			
 
				+					if (!found)
			
 
				+						orgpoints.push_back(b3GrahamVector3(pt, orgIndex));
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			b3MyFace combinedFace;
			
 
				+			for (int i = 0; i < 4; i++)
			
 
				+				combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];
			
 
				+
			
 
				+			b3AlignedObjectArray<b3GrahamVector3> hull;
			
 
				+
			
 
				+			averageFaceNormal.normalize();
			
 
				+			b3GrahamScanConvexHull2D(orgpoints, hull, averageFaceNormal);
			
 
				+
			
 
				+			for (int i = 0; i < hull.size(); i++)
			
 
				+			{
			
 
				+				combinedFace.m_indices.push_back(hull[i].m_orgIndex);
			
 
				+				for (int k = 0; k < orgpoints.size(); k++)
			
 
				+				{
			
 
				+					if (orgpoints[k].m_orgIndex == hull[i].m_orgIndex)
			
 
				+					{
			
 
				+						orgpoints[k].m_orgIndex = -1;  // invalidate...
			
 
				+						break;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			// are there rejected vertices?
			
 
				+			bool reject_merge = false;
			
 
				+
			
 
				+			for (int i = 0; i < orgpoints.size(); i++)
			
 
				+			{
			
 
				+				if (orgpoints[i].m_orgIndex == -1)
			
 
				+					continue;  // this is in the hull...
			
 
				+				// this vertex is rejected -- is anybody else using this vertex?
			
 
				+				for (int j = 0; j < tmpFaces.size(); j++)
			
 
				+				{
			
 
				+					b3MyFace& face = tmpFaces[j];
			
 
				+					// is this a face of the current coplanar group?
			
 
				+					bool is_in_current_group = false;
			
 
				+					for (int k = 0; k < coplanarFaceGroup.size(); k++)
			
 
				+					{
			
 
				+						if (coplanarFaceGroup[k] == j)
			
 
				+						{
			
 
				+							is_in_current_group = true;
			
 
				+							break;
			
 
				+						}
			
 
				+					}
			
 
				+					if (is_in_current_group)  // ignore this face...
			
 
				+						continue;
			
 
				+					// does this face use this rejected vertex?
			
 
				+					for (int v = 0; v < face.m_indices.size(); v++)
			
 
				+					{
			
 
				+						if (face.m_indices[v] == orgpoints[i].m_orgIndex)
			
 
				+						{
			
 
				+							// this rejected vertex is used in another face -- reject merge
			
 
				+							reject_merge = true;
			
 
				+							break;
			
 
				+						}
			
 
				+					}
			
 
				+					if (reject_merge)
			
 
				+						break;
			
 
				+				}
			
 
				+				if (reject_merge)
			
 
				+					break;
			
 
				+			}
			
 
				+
			
 
				+			if (!reject_merge)
			
 
				+			{
			
 
				+				// do this merge!
			
 
				+				did_merge = true;
			
 
				+				m_faces.push_back(combinedFace);
			
 
				+			}
			
 
				+		}
			
 
				+		if (!did_merge)
			
 
				+		{
			
 
				+			for (int i = 0; i < coplanarFaceGroup.size(); i++)
			
 
				+			{
			
 
				+				b3MyFace face = tmpFaces[coplanarFaceGroup[i]];
			
 
				+				m_faces.push_back(face);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	initialize();
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+inline bool IsAlmostZero(const b3Vector3& v)
			
 
				+{
			
 
				+	if (fabsf(v.getX()) > 1e-6 || fabsf(v.getY()) > 1e-6 || fabsf(v.getZ()) > 1e-6) return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+struct b3InternalVertexPair
			
 
				+{
			
 
				+	b3InternalVertexPair(short int v0, short int v1)
			
 
				+		: m_v0(v0),
			
 
				+		  m_v1(v1)
			
 
				+	{
			
 
				+		if (m_v1 > m_v0)
			
 
				+			b3Swap(m_v0, m_v1);
			
 
				+	}
			
 
				+	short int m_v0;
			
 
				+	short int m_v1;
			
 
				+	int getHash() const
			
 
				+	{
			
 
				+		return m_v0 + (m_v1 << 16);
			
 
				+	}
			
 
				+	bool equals(const b3InternalVertexPair& other) const
			
 
				+	{
			
 
				+		return m_v0 == other.m_v0 && m_v1 == other.m_v1;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+struct b3InternalEdge
			
 
				+{
			
 
				+	b3InternalEdge()
			
 
				+		: m_face0(-1),
			
 
				+		  m_face1(-1)
			
 
				+	{
			
 
				+	}
			
 
				+	short int m_face0;
			
 
				+	short int m_face1;
			
 
				+};
			
 
				+
			
 
				+//
			
 
				+
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+bool b3ConvexUtility::testContainment() const
			
 
				+{
			
 
				+	for (int p = 0; p < 8; p++)
			
 
				+	{
			
 
				+		b3Vector3 LocalPt;
			
 
				+		if (p == 0)
			
 
				+			LocalPt = m_localCenter + b3Vector3(m_extents[0], m_extents[1], m_extents[2]);
			
 
				+		else if (p == 1)
			
 
				+			LocalPt = m_localCenter + b3Vector3(m_extents[0], m_extents[1], -m_extents[2]);
			
 
				+		else if (p == 2)
			
 
				+			LocalPt = m_localCenter + b3Vector3(m_extents[0], -m_extents[1], m_extents[2]);
			
 
				+		else if (p == 3)
			
 
				+			LocalPt = m_localCenter + b3Vector3(m_extents[0], -m_extents[1], -m_extents[2]);
			
 
				+		else if (p == 4)
			
 
				+			LocalPt = m_localCenter + b3Vector3(-m_extents[0], m_extents[1], m_extents[2]);
			
 
				+		else if (p == 5)
			
 
				+			LocalPt = m_localCenter + b3Vector3(-m_extents[0], m_extents[1], -m_extents[2]);
			
 
				+		else if (p == 6)
			
 
				+			LocalPt = m_localCenter + b3Vector3(-m_extents[0], -m_extents[1], m_extents[2]);
			
 
				+		else if (p == 7)
			
 
				+			LocalPt = m_localCenter + b3Vector3(-m_extents[0], -m_extents[1], -m_extents[2]);
			
 
				+
			
 
				+		for (int i = 0; i < m_faces.size(); i++)
			
 
				+		{
			
 
				+			const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
			
 
				+			const b3Scalar d = LocalPt.dot(Normal) + m_faces[i].m_plane[3];
			
 
				+			if (d > 0.0f)
			
 
				+				return false;
			
 
				+		}
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void b3ConvexUtility::initialize()
			
 
				+{
			
 
				+	b3HashMap<b3InternalVertexPair, b3InternalEdge> edges;
			
 
				+
			
 
				+	b3Scalar TotalArea = 0.0f;
			
 
				+
			
 
				+	m_localCenter.setValue(0, 0, 0);
			
 
				+	for (int i = 0; i < m_faces.size(); i++)
			
 
				+	{
			
 
				+		int numVertices = m_faces[i].m_indices.size();
			
 
				+		int NbTris = numVertices;
			
 
				+		for (int j = 0; j < NbTris; j++)
			
 
				+		{
			
 
				+			int k = (j + 1) % numVertices;
			
 
				+			b3InternalVertexPair vp(m_faces[i].m_indices[j], m_faces[i].m_indices[k]);
			
 
				+			b3InternalEdge* edptr = edges.find(vp);
			
 
				+			b3Vector3 edge = m_vertices[vp.m_v1] - m_vertices[vp.m_v0];
			
 
				+			edge.normalize();
			
 
				+
			
 
				+			bool found = false;
			
 
				+			b3Vector3 diff, diff2;
			
 
				+
			
 
				+			for (int p = 0; p < m_uniqueEdges.size(); p++)
			
 
				+			{
			
 
				+				diff = m_uniqueEdges[p] - edge;
			
 
				+				diff2 = m_uniqueEdges[p] + edge;
			
 
				+
			
 
				+				//	if ((diff.length2()==0.f) ||
			
 
				+				//	(diff2.length2()==0.f))
			
 
				+
			
 
				+				if (IsAlmostZero(diff) ||
			
 
				+					IsAlmostZero(diff2))
			
 
				+				{
			
 
				+					found = true;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (!found)
			
 
				+			{
			
 
				+				m_uniqueEdges.push_back(edge);
			
 
				+			}
			
 
				+
			
 
				+			if (edptr)
			
 
				+			{
			
 
				+				//TBD: figure out why I added this assert
			
 
				+				//				b3Assert(edptr->m_face0>=0);
			
 
				+				//			b3Assert(edptr->m_face1<0);
			
 
				+				edptr->m_face1 = i;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				b3InternalEdge ed;
			
 
				+				ed.m_face0 = i;
			
 
				+				edges.insert(vp, ed);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+#ifdef USE_CONNECTED_FACES
			
 
				+	for (int i = 0; i < m_faces.size(); i++)
			
 
				+	{
			
 
				+		int numVertices = m_faces[i].m_indices.size();
			
 
				+		m_faces[i].m_connectedFaces.resize(numVertices);
			
 
				+
			
 
				+		for (int j = 0; j < numVertices; j++)
			
 
				+		{
			
 
				+			int k = (j + 1) % numVertices;
			
 
				+			b3InternalVertexPair vp(m_faces[i].m_indices[j], m_faces[i].m_indices[k]);
			
 
				+			b3InternalEdge* edptr = edges.find(vp);
			
 
				+			b3Assert(edptr);
			
 
				+			b3Assert(edptr->m_face0 >= 0);
			
 
				+			b3Assert(edptr->m_face1 >= 0);
			
 
				+
			
 
				+			int connectedFace = (edptr->m_face0 == i) ? edptr->m_face1 : edptr->m_face0;
			
 
				+			m_faces[i].m_connectedFaces[j] = connectedFace;
			
 
				+		}
			
 
				+	}
			
 
				+#endif  //USE_CONNECTED_FACES
			
 
				+
			
 
				+	for (int i = 0; i < m_faces.size(); i++)
			
 
				+	{
			
 
				+		int numVertices = m_faces[i].m_indices.size();
			
 
				+		int NbTris = numVertices - 2;
			
 
				+
			
 
				+		const b3Vector3& p0 = m_vertices[m_faces[i].m_indices[0]];
			
 
				+		for (int j = 1; j <= NbTris; j++)
			
 
				+		{
			
 
				+			int k = (j + 1) % numVertices;
			
 
				+			const b3Vector3& p1 = m_vertices[m_faces[i].m_indices[j]];
			
 
				+			const b3Vector3& p2 = m_vertices[m_faces[i].m_indices[k]];
			
 
				+			b3Scalar Area = ((p0 - p1).cross(p0 - p2)).length() * 0.5f;
			
 
				+			b3Vector3 Center = (p0 + p1 + p2) / 3.0f;
			
 
				+			m_localCenter += Area * Center;
			
 
				+			TotalArea += Area;
			
 
				+		}
			
 
				+	}
			
 
				+	m_localCenter /= TotalArea;
			
 
				+
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+	if (1)
			
 
				+	{
			
 
				+		m_radius = FLT_MAX;
			
 
				+		for (int i = 0; i < m_faces.size(); i++)
			
 
				+		{
			
 
				+			const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
			
 
				+			const b3Scalar dist = b3Fabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]);
			
 
				+			if (dist < m_radius)
			
 
				+				m_radius = dist;
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar MinX = FLT_MAX;
			
 
				+		b3Scalar MinY = FLT_MAX;
			
 
				+		b3Scalar MinZ = FLT_MAX;
			
 
				+		b3Scalar MaxX = -FLT_MAX;
			
 
				+		b3Scalar MaxY = -FLT_MAX;
			
 
				+		b3Scalar MaxZ = -FLT_MAX;
			
 
				+		for (int i = 0; i < m_vertices.size(); i++)
			
 
				+		{
			
 
				+			const b3Vector3& pt = m_vertices[i];
			
 
				+			if (pt.getX() < MinX) MinX = pt.getX();
			
 
				+			if (pt.getX() > MaxX) MaxX = pt.getX();
			
 
				+			if (pt.getY() < MinY) MinY = pt.getY();
			
 
				+			if (pt.getY() > MaxY) MaxY = pt.getY();
			
 
				+			if (pt.getZ() < MinZ) MinZ = pt.getZ();
			
 
				+			if (pt.getZ() > MaxZ) MaxZ = pt.getZ();
			
 
				+		}
			
 
				+		mC.setValue(MaxX + MinX, MaxY + MinY, MaxZ + MinZ);
			
 
				+		mE.setValue(MaxX - MinX, MaxY - MinY, MaxZ - MinZ);
			
 
				+
			
 
				+		//		const b3Scalar r = m_radius / sqrtf(2.0f);
			
 
				+		const b3Scalar r = m_radius / sqrtf(3.0f);
			
 
				+		const int LargestExtent = mE.maxAxis();
			
 
				+		const b3Scalar Step = (mE[LargestExtent] * 0.5f - r) / 1024.0f;
			
 
				+		m_extents[0] = m_extents[1] = m_extents[2] = r;
			
 
				+		m_extents[LargestExtent] = mE[LargestExtent] * 0.5f;
			
 
				+		bool FoundBox = false;
			
 
				+		for (int j = 0; j < 1024; j++)
			
 
				+		{
			
 
				+			if (testContainment())
			
 
				+			{
			
 
				+				FoundBox = true;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			m_extents[LargestExtent] -= Step;
			
 
				+		}
			
 
				+		if (!FoundBox)
			
 
				+		{
			
 
				+			m_extents[0] = m_extents[1] = m_extents[2] = r;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// Refine the box
			
 
				+			const b3Scalar Step = (m_radius - r) / 1024.0f;
			
 
				+			const int e0 = (1 << LargestExtent) & 3;
			
 
				+			const int e1 = (1 << e0) & 3;
			
 
				+
			
 
				+			for (int j = 0; j < 1024; j++)
			
 
				+			{
			
 
				+				const b3Scalar Saved0 = m_extents[e0];
			
 
				+				const b3Scalar Saved1 = m_extents[e1];
			
 
				+				m_extents[e0] += Step;
			
 
				+				m_extents[e1] += Step;
			
 
				+
			
 
				+				if (!testContainment())
			
 
				+				{
			
 
				+					m_extents[e0] = Saved0;
			
 
				+					m_extents[e1] = Saved1;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h
@@ -0,0 +1,55 @@
 
				+
			
 
				+/*
			
 
				+Copyright (c) 2012 Advanced Micro Devices, Inc.  
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Originally written by Erwin Coumans
			
 
				+
			
 
				+#ifndef _BT_CONVEX_UTILITY_H
			
 
				+#define _BT_CONVEX_UTILITY_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+
			
 
				+struct b3MyFace
			
 
				+{
			
 
				+	b3AlignedObjectArray<int> m_indices;
			
 
				+	b3Scalar m_plane[4];
			
 
				+};
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3ConvexUtility
			
 
				+{
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3Vector3 m_localCenter;
			
 
				+	b3Vector3 m_extents;
			
 
				+	b3Vector3 mC;
			
 
				+	b3Vector3 mE;
			
 
				+	b3Scalar m_radius;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Vector3> m_vertices;
			
 
				+	b3AlignedObjectArray<b3MyFace> m_faces;
			
 
				+	b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
			
 
				+
			
 
				+	b3ConvexUtility()
			
 
				+	{
			
 
				+	}
			
 
				+	virtual ~b3ConvexUtility();
			
 
				+
			
 
				+	bool initializePolyhedralFeatures(const b3Vector3* orgVertices, int numVertices, bool mergeCoplanarTriangles = true);
			
 
				+
			
 
				+	void initialize();
			
 
				+	bool testContainment() const;
			
 
				+};
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp
@@ -0,0 +1,297 @@
 
				+#include "b3CpuNarrowPhase.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h"
			
 
				+
			
 
				+struct b3CpuNarrowPhaseInternalData
			
 
				+{
			
 
				+	b3AlignedObjectArray<b3Aabb> m_localShapeAABBCPU;
			
 
				+	b3AlignedObjectArray<b3Collidable> m_collidablesCPU;
			
 
				+	b3AlignedObjectArray<b3ConvexUtility*> m_convexData;
			
 
				+	b3Config m_config;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3ConvexPolyhedronData> m_convexPolyhedra;
			
 
				+	b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
			
 
				+	b3AlignedObjectArray<b3Vector3> m_convexVertices;
			
 
				+	b3AlignedObjectArray<int> m_convexIndices;
			
 
				+	b3AlignedObjectArray<b3GpuFace> m_convexFaces;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Contact4Data> m_contacts;
			
 
				+
			
 
				+	int m_numAcceleratedShapes;
			
 
				+};
			
 
				+
			
 
				+const b3AlignedObjectArray<b3Contact4Data>& b3CpuNarrowPhase::getContacts() const
			
 
				+{
			
 
				+	return m_data->m_contacts;
			
 
				+}
			
 
				+
			
 
				+b3Collidable& b3CpuNarrowPhase::getCollidableCpu(int collidableIndex)
			
 
				+{
			
 
				+	return m_data->m_collidablesCPU[collidableIndex];
			
 
				+}
			
 
				+
			
 
				+const b3Collidable& b3CpuNarrowPhase::getCollidableCpu(int collidableIndex) const
			
 
				+{
			
 
				+	return m_data->m_collidablesCPU[collidableIndex];
			
 
				+}
			
 
				+
			
 
				+b3CpuNarrowPhase::b3CpuNarrowPhase(const struct b3Config& config)
			
 
				+{
			
 
				+	m_data = new b3CpuNarrowPhaseInternalData;
			
 
				+	m_data->m_config = config;
			
 
				+	m_data->m_numAcceleratedShapes = 0;
			
 
				+}
			
 
				+
			
 
				+b3CpuNarrowPhase::~b3CpuNarrowPhase()
			
 
				+{
			
 
				+	delete m_data;
			
 
				+}
			
 
				+
			
 
				+void b3CpuNarrowPhase::computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies)
			
 
				+{
			
 
				+	int nPairs = pairs.size();
			
 
				+	int numContacts = 0;
			
 
				+	int maxContactCapacity = m_data->m_config.m_maxContactCapacity;
			
 
				+	m_data->m_contacts.resize(maxContactCapacity);
			
 
				+
			
 
				+	for (int i = 0; i < nPairs; i++)
			
 
				+	{
			
 
				+		int bodyIndexA = pairs[i].x;
			
 
				+		int bodyIndexB = pairs[i].y;
			
 
				+		int collidableIndexA = bodies[bodyIndexA].m_collidableIdx;
			
 
				+		int collidableIndexB = bodies[bodyIndexB].m_collidableIdx;
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
			
 
				+		{
			
 
				+			//			computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
			
 
				+			//				&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_SPHERE)
			
 
				+		{
			
 
				+			//			computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			
 
				+			//				&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			
 
				+			//printf("convex-sphere\n");
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE)
			
 
				+		{
			
 
				+			//			computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			
 
				+			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			
 
				+			//			printf("convex-plane\n");
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
			
 
				+		{
			
 
				+			//			computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
			
 
				+			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			
 
				+			//			printf("plane-convex\n");
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+		{
			
 
				+			//			computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			
 
				+			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],
			
 
				+			//			nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU);
			
 
				+			//			printf("convex-plane\n");
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE)
			
 
				+		{
			
 
				+			//			computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			
 
				+			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			
 
				+			//			printf("convex-plane\n");
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+		{
			
 
				+			//			computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
			
 
				+			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			
 
				+			//			printf("plane-convex\n");
			
 
				+		}
			
 
				+
			
 
				+		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
			
 
				+			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
			
 
				+		{
			
 
				+			//printf("pairs[i].z=%d\n",pairs[i].z);
			
 
				+			//int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,bodies,
			
 
				+			//		m_data->m_collidablesCPU,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
			
 
				+			int contactIndex = b3ContactConvexConvexSAT(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, bodies,
			
 
				+														m_data->m_collidablesCPU, m_data->m_convexPolyhedra, m_data->m_convexVertices, m_data->m_uniqueEdges, m_data->m_convexIndices, m_data->m_convexFaces, m_data->m_contacts, numContacts, maxContactCapacity);
			
 
				+
			
 
				+			if (contactIndex >= 0)
			
 
				+			{
			
 
				+				pairs[i].z = contactIndex;
			
 
				+			}
			
 
				+			//			printf("plane-convex\n");
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	m_data->m_contacts.resize(numContacts);
			
 
				+}
			
 
				+
			
 
				+int b3CpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr)
			
 
				+{
			
 
				+	int collidableIndex = allocateCollidable();
			
 
				+	if (collidableIndex < 0)
			
 
				+		return collidableIndex;
			
 
				+
			
 
				+	b3Collidable& col = m_data->m_collidablesCPU[collidableIndex];
			
 
				+	col.m_shapeType = SHAPE_CONVEX_HULL;
			
 
				+	col.m_shapeIndex = -1;
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3 localCenter = b3MakeVector3(0, 0, 0);
			
 
				+		for (int i = 0; i < utilPtr->m_vertices.size(); i++)
			
 
				+			localCenter += utilPtr->m_vertices[i];
			
 
				+		localCenter *= (1.f / utilPtr->m_vertices.size());
			
 
				+		utilPtr->m_localCenter = localCenter;
			
 
				+
			
 
				+		col.m_shapeIndex = registerConvexHullShapeInternal(utilPtr, col);
			
 
				+	}
			
 
				+
			
 
				+	if (col.m_shapeIndex >= 0)
			
 
				+	{
			
 
				+		b3Aabb aabb;
			
 
				+
			
 
				+		b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f);
			
 
				+		b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f);
			
 
				+
			
 
				+		for (int i = 0; i < utilPtr->m_vertices.size(); i++)
			
 
				+		{
			
 
				+			myAabbMin.setMin(utilPtr->m_vertices[i]);
			
 
				+			myAabbMax.setMax(utilPtr->m_vertices[i]);
			
 
				+		}
			
 
				+		aabb.m_min[0] = myAabbMin[0];
			
 
				+		aabb.m_min[1] = myAabbMin[1];
			
 
				+		aabb.m_min[2] = myAabbMin[2];
			
 
				+		aabb.m_minIndices[3] = 0;
			
 
				+
			
 
				+		aabb.m_max[0] = myAabbMax[0];
			
 
				+		aabb.m_max[1] = myAabbMax[1];
			
 
				+		aabb.m_max[2] = myAabbMax[2];
			
 
				+		aabb.m_signedMaxIndices[3] = 0;
			
 
				+
			
 
				+		m_data->m_localShapeAABBCPU.push_back(aabb);
			
 
				+	}
			
 
				+
			
 
				+	return collidableIndex;
			
 
				+}
			
 
				+
			
 
				+int b3CpuNarrowPhase::allocateCollidable()
			
 
				+{
			
 
				+	int curSize = m_data->m_collidablesCPU.size();
			
 
				+	if (curSize < m_data->m_config.m_maxConvexShapes)
			
 
				+	{
			
 
				+		m_data->m_collidablesCPU.expand();
			
 
				+		return curSize;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3Error("allocateCollidable out-of-range %d\n", m_data->m_config.m_maxConvexShapes);
			
 
				+	}
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+int b3CpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
			
 
				+{
			
 
				+	b3AlignedObjectArray<b3Vector3> verts;
			
 
				+
			
 
				+	unsigned char* vts = (unsigned char*)vertices;
			
 
				+	for (int i = 0; i < numVertices; i++)
			
 
				+	{
			
 
				+		float* vertex = (float*)&vts[i * strideInBytes];
			
 
				+		verts.push_back(b3MakeVector3(vertex[0] * scaling[0], vertex[1] * scaling[1], vertex[2] * scaling[2]));
			
 
				+	}
			
 
				+
			
 
				+	b3ConvexUtility* utilPtr = new b3ConvexUtility();
			
 
				+	bool merge = true;
			
 
				+	if (numVertices)
			
 
				+	{
			
 
				+		utilPtr->initializePolyhedralFeatures(&verts[0], verts.size(), merge);
			
 
				+	}
			
 
				+
			
 
				+	int collidableIndex = registerConvexHullShape(utilPtr);
			
 
				+
			
 
				+	delete utilPtr;
			
 
				+	return collidableIndex;
			
 
				+}
			
 
				+
			
 
				+int b3CpuNarrowPhase::registerConvexHullShapeInternal(b3ConvexUtility* convexPtr, b3Collidable& col)
			
 
				+{
			
 
				+	m_data->m_convexData.resize(m_data->m_numAcceleratedShapes + 1);
			
 
				+	m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes + 1);
			
 
				+
			
 
				+	b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size() - 1);
			
 
				+	convex.mC = convexPtr->mC;
			
 
				+	convex.mE = convexPtr->mE;
			
 
				+	convex.m_extents = convexPtr->m_extents;
			
 
				+	convex.m_localCenter = convexPtr->m_localCenter;
			
 
				+	convex.m_radius = convexPtr->m_radius;
			
 
				+
			
 
				+	convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size();
			
 
				+	int edgeOffset = m_data->m_uniqueEdges.size();
			
 
				+	convex.m_uniqueEdgesOffset = edgeOffset;
			
 
				+
			
 
				+	m_data->m_uniqueEdges.resize(edgeOffset + convex.m_numUniqueEdges);
			
 
				+
			
 
				+	//convex data here
			
 
				+	int i;
			
 
				+	for (i = 0; i < convexPtr->m_uniqueEdges.size(); i++)
			
 
				+	{
			
 
				+		m_data->m_uniqueEdges[edgeOffset + i] = convexPtr->m_uniqueEdges[i];
			
 
				+	}
			
 
				+
			
 
				+	int faceOffset = m_data->m_convexFaces.size();
			
 
				+	convex.m_faceOffset = faceOffset;
			
 
				+	convex.m_numFaces = convexPtr->m_faces.size();
			
 
				+
			
 
				+	m_data->m_convexFaces.resize(faceOffset + convex.m_numFaces);
			
 
				+
			
 
				+	for (i = 0; i < convexPtr->m_faces.size(); i++)
			
 
				+	{
			
 
				+		m_data->m_convexFaces[convex.m_faceOffset + i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0],
			
 
				+																			   convexPtr->m_faces[i].m_plane[1],
			
 
				+																			   convexPtr->m_faces[i].m_plane[2],
			
 
				+																			   convexPtr->m_faces[i].m_plane[3]);
			
 
				+
			
 
				+		int indexOffset = m_data->m_convexIndices.size();
			
 
				+		int numIndices = convexPtr->m_faces[i].m_indices.size();
			
 
				+		m_data->m_convexFaces[convex.m_faceOffset + i].m_numIndices = numIndices;
			
 
				+		m_data->m_convexFaces[convex.m_faceOffset + i].m_indexOffset = indexOffset;
			
 
				+		m_data->m_convexIndices.resize(indexOffset + numIndices);
			
 
				+		for (int p = 0; p < numIndices; p++)
			
 
				+		{
			
 
				+			m_data->m_convexIndices[indexOffset + p] = convexPtr->m_faces[i].m_indices[p];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	convex.m_numVertices = convexPtr->m_vertices.size();
			
 
				+	int vertexOffset = m_data->m_convexVertices.size();
			
 
				+	convex.m_vertexOffset = vertexOffset;
			
 
				+
			
 
				+	m_data->m_convexVertices.resize(vertexOffset + convex.m_numVertices);
			
 
				+	for (int i = 0; i < convexPtr->m_vertices.size(); i++)
			
 
				+	{
			
 
				+		m_data->m_convexVertices[vertexOffset + i] = convexPtr->m_vertices[i];
			
 
				+	}
			
 
				+
			
 
				+	(m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr;
			
 
				+
			
 
				+	return m_data->m_numAcceleratedShapes++;
			
 
				+}
			
 
				+
			
 
				+const b3Aabb& b3CpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
			
 
				+{
			
 
				+	return m_data->m_localShapeAABBCPU[collidableIndex];
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h
@@ -0,0 +1,92 @@
 
				+#ifndef B3_CPU_NARROWPHASE_H
			
 
				+#define B3_CPU_NARROWPHASE_H
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+
			
 
				+class b3CpuNarrowPhase
			
 
				+{
			
 
				+protected:
			
 
				+	struct b3CpuNarrowPhaseInternalData* m_data;
			
 
				+	int m_acceleratedCompanionShapeIndex;
			
 
				+	int m_planeBodyIndex;
			
 
				+	int m_static0Index;
			
 
				+
			
 
				+	int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr, b3Collidable& col);
			
 
				+	int registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling);
			
 
				+
			
 
				+public:
			
 
				+	b3CpuNarrowPhase(const struct b3Config& config);
			
 
				+
			
 
				+	virtual ~b3CpuNarrowPhase(void);
			
 
				+
			
 
				+	int registerSphereShape(float radius);
			
 
				+	int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant);
			
 
				+
			
 
				+	int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes);
			
 
				+	int registerFace(const b3Vector3& faceNormal, float faceConstant);
			
 
				+
			
 
				+	int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, const float* scaling);
			
 
				+
			
 
				+	//do they need to be merged?
			
 
				+
			
 
				+	int registerConvexHullShape(b3ConvexUtility* utilPtr);
			
 
				+	int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
			
 
				+
			
 
				+	//int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu);
			
 
				+	void setObjectTransform(const float* position, const float* orientation, int bodyIndex);
			
 
				+
			
 
				+	void writeAllBodiesToGpu();
			
 
				+	void reset();
			
 
				+	void readbackAllBodiesToCpu();
			
 
				+	bool getObjectTransformFromCpu(float* position, float* orientation, int bodyIndex) const;
			
 
				+
			
 
				+	void setObjectTransformCpu(float* position, float* orientation, int bodyIndex);
			
 
				+	void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex);
			
 
				+
			
 
				+	//virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects);
			
 
				+	virtual void computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies);
			
 
				+
			
 
				+	const struct b3RigidBodyData* getBodiesCpu() const;
			
 
				+	//struct b3RigidBodyData* getBodiesCpu();
			
 
				+
			
 
				+	int getNumBodiesGpu() const;
			
 
				+
			
 
				+	int getNumBodyInertiasGpu() const;
			
 
				+
			
 
				+	const struct b3Collidable* getCollidablesCpu() const;
			
 
				+	int getNumCollidablesGpu() const;
			
 
				+
			
 
				+	/*const struct b3Contact4* getContactsCPU() const;
			
 
				+
			
 
				+	
			
 
				+	int	getNumContactsGpu() const;
			
 
				+	*/
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3Contact4Data>& getContacts() const;
			
 
				+
			
 
				+	int getNumRigidBodies() const;
			
 
				+
			
 
				+	int allocateCollidable();
			
 
				+
			
 
				+	int getStatic0Index() const
			
 
				+	{
			
 
				+		return m_static0Index;
			
 
				+	}
			
 
				+	b3Collidable& getCollidableCpu(int collidableIndex);
			
 
				+	const b3Collidable& getCollidableCpu(int collidableIndex) const;
			
 
				+
			
 
				+	const b3CpuNarrowPhaseInternalData* getInternalData() const
			
 
				+	{
			
 
				+		return m_data;
			
 
				+	}
			
 
				+
			
 
				+	const struct b3Aabb& getLocalSpaceAabb(int collidableIndex) const;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CPU_NARROWPHASE_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h
@@ -0,0 +1,25 @@
 
				+
			
 
				+#ifndef B3_RAYCAST_INFO_H
			
 
				+#define B3_RAYCAST_INFO_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3RayInfo
			
 
				+{
			
 
				+	b3Vector3 m_from;
			
 
				+	b3Vector3 m_to;
			
 
				+};
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3RayHit
			
 
				+{
			
 
				+	b3Scalar m_hitFraction;
			
 
				+	int m_hitBody;
			
 
				+	int m_hitResult1;
			
 
				+	int m_hitResult2;
			
 
				+	b3Vector3 m_hitPoint;
			
 
				+	b3Vector3 m_hitNormal;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_RAYCAST_INFO_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h
@@ -0,0 +1,28 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_RIGID_BODY_CL
			
 
				+#define B3_RIGID_BODY_CL
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+inline float b3GetInvMass(const b3RigidBodyData& body)
			
 
				+{
			
 
				+	return body.m_invMass;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_RIGID_BODY_CL
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h
@@ -0,0 +1,19 @@
 
				+
			
 
				+#ifndef B3_BVH_SUBTREE_INFO_DATA_H
			
 
				+#define B3_BVH_SUBTREE_INFO_DATA_H
			
 
				+
			
 
				+typedef struct b3BvhSubtreeInfoData b3BvhSubtreeInfoData_t;
			
 
				+
			
 
				+struct b3BvhSubtreeInfoData
			
 
				+{
			
 
				+	//12 bytes
			
 
				+	unsigned short int m_quantizedAabbMin[3];
			
 
				+	unsigned short int m_quantizedAabbMax[3];
			
 
				+	//4 bytes, points to the root of the subtree
			
 
				+	int m_rootNodeIndex;
			
 
				+	//4 bytes
			
 
				+	int m_subtreeSize;
			
 
				+	int m_padding[3];
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_BVH_SUBTREE_INFO_DATA_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h
@@ -0,0 +1,123 @@
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
			
 
				+
			
 
				+// work-in-progress
			
 
				+void b3BvhTraversal(__global const b3Int4* pairs,
			
 
				+					__global const b3RigidBodyData* rigidBodies,
			
 
				+					__global const b3Collidable* collidables,
			
 
				+					__global b3Aabb* aabbs,
			
 
				+					__global b3Int4* concavePairsOut,
			
 
				+					__global volatile int* numConcavePairsOut,
			
 
				+					__global const b3BvhSubtreeInfo* subtreeHeadersRoot,
			
 
				+					__global const b3QuantizedBvhNode* quantizedNodesRoot,
			
 
				+					__global const b3BvhInfo* bvhInfos,
			
 
				+					int numPairs,
			
 
				+					int maxNumConcavePairsCapacity,
			
 
				+					int id)
			
 
				+{
			
 
				+	int bodyIndexA = pairs[id].x;
			
 
				+	int bodyIndexB = pairs[id].y;
			
 
				+	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
			
 
				+	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
			
 
				+
			
 
				+	//once the broadphase avoids static-static pairs, we can remove this test
			
 
				+	if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0))
			
 
				+	{
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH)
			
 
				+		return;
			
 
				+
			
 
				+	int shapeTypeB = collidables[collidableIndexB].m_shapeType;
			
 
				+
			
 
				+	if (shapeTypeB != SHAPE_CONVEX_HULL &&
			
 
				+		shapeTypeB != SHAPE_SPHERE &&
			
 
				+		shapeTypeB != SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+		return;
			
 
				+
			
 
				+	b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];
			
 
				+
			
 
				+	b3Float4 bvhAabbMin = bvhInfo.m_aabbMin;
			
 
				+	b3Float4 bvhAabbMax = bvhInfo.m_aabbMax;
			
 
				+	b3Float4 bvhQuantization = bvhInfo.m_quantization;
			
 
				+	int numSubtreeHeaders = bvhInfo.m_numSubTrees;
			
 
				+	__global const b3BvhSubtreeInfoData* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];
			
 
				+	__global const b3QuantizedBvhNodeData* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];
			
 
				+
			
 
				+	unsigned short int quantizedQueryAabbMin[3];
			
 
				+	unsigned short int quantizedQueryAabbMax[3];
			
 
				+	b3QuantizeWithClamp(quantizedQueryAabbMin, aabbs[bodyIndexB].m_minVec, false, bvhAabbMin, bvhAabbMax, bvhQuantization);
			
 
				+	b3QuantizeWithClamp(quantizedQueryAabbMax, aabbs[bodyIndexB].m_maxVec, true, bvhAabbMin, bvhAabbMax, bvhQuantization);
			
 
				+
			
 
				+	for (int i = 0; i < numSubtreeHeaders; i++)
			
 
				+	{
			
 
				+		b3BvhSubtreeInfoData subtree = subtreeHeaders[i];
			
 
				+
			
 
				+		int overlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax);
			
 
				+		if (overlap != 0)
			
 
				+		{
			
 
				+			int startNodeIndex = subtree.m_rootNodeIndex;
			
 
				+			int endNodeIndex = subtree.m_rootNodeIndex + subtree.m_subtreeSize;
			
 
				+			int curIndex = startNodeIndex;
			
 
				+			int escapeIndex;
			
 
				+			int isLeafNode;
			
 
				+			int aabbOverlap;
			
 
				+			while (curIndex < endNodeIndex)
			
 
				+			{
			
 
				+				b3QuantizedBvhNodeData rootNode = quantizedNodes[curIndex];
			
 
				+				aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode.m_quantizedAabbMin, rootNode.m_quantizedAabbMax);
			
 
				+				isLeafNode = b3IsLeaf(&rootNode);
			
 
				+				if (aabbOverlap)
			
 
				+				{
			
 
				+					if (isLeafNode)
			
 
				+					{
			
 
				+						int triangleIndex = b3GetTriangleIndex(&rootNode);
			
 
				+						if (shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+						{
			
 
				+							int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
			
 
				+							int pairIdx = b3AtomicAdd(numConcavePairsOut, numChildrenB);
			
 
				+							for (int b = 0; b < numChildrenB; b++)
			
 
				+							{
			
 
				+								if ((pairIdx + b) < maxNumConcavePairsCapacity)
			
 
				+								{
			
 
				+									int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
			
 
				+									b3Int4 newPair = b3MakeInt4(bodyIndexA, bodyIndexB, triangleIndex, childShapeIndexB);
			
 
				+									concavePairsOut[pairIdx + b] = newPair;
			
 
				+								}
			
 
				+							}
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							int pairIdx = b3AtomicInc(numConcavePairsOut);
			
 
				+							if (pairIdx < maxNumConcavePairsCapacity)
			
 
				+							{
			
 
				+								b3Int4 newPair = b3MakeInt4(bodyIndexA, bodyIndexB, triangleIndex, 0);
			
 
				+								concavePairsOut[pairIdx] = newPair;
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+					curIndex++;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					if (isLeafNode)
			
 
				+					{
			
 
				+						curIndex++;
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						escapeIndex = b3GetEscapeIndex(&rootNode);
			
 
				+						curIndex += escapeIndex;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h
@@ -0,0 +1,171 @@
 
				+#ifndef B3_CLIP_FACES_H
			
 
				+#define B3_CLIP_FACES_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+
			
 
				+inline b3Float4 b3Lerp3(b3Float4ConstArg a, b3Float4ConstArg b, float t)
			
 
				+{
			
 
				+	return b3MakeFloat4(a.x + (b.x - a.x) * t,
			
 
				+						a.y + (b.y - a.y) * t,
			
 
				+						a.z + (b.z - a.z) * t,
			
 
				+						0.f);
			
 
				+}
			
 
				+
			
 
				+// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
			
 
				+int clipFaceGlobal(__global const b3Float4* pVtxIn, int numVertsIn, b3Float4ConstArg planeNormalWS, float planeEqWS, __global b3Float4* ppVtxOut)
			
 
				+{
			
 
				+	int ve;
			
 
				+	float ds, de;
			
 
				+	int numVertsOut = 0;
			
 
				+	//double-check next test
			
 
				+	//	if (numVertsIn < 2)
			
 
				+	//		return 0;
			
 
				+
			
 
				+	b3Float4 firstVertex = pVtxIn[numVertsIn - 1];
			
 
				+	b3Float4 endVertex = pVtxIn[0];
			
 
				+
			
 
				+	ds = b3Dot(planeNormalWS, firstVertex) + planeEqWS;
			
 
				+
			
 
				+	for (ve = 0; ve < numVertsIn; ve++)
			
 
				+	{
			
 
				+		endVertex = pVtxIn[ve];
			
 
				+		de = b3Dot(planeNormalWS, endVertex) + planeEqWS;
			
 
				+		if (ds < 0)
			
 
				+		{
			
 
				+			if (de < 0)
			
 
				+			{
			
 
				+				// Start < 0, end < 0, so output endVertex
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// Start < 0, end >= 0, so output intersection
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (de < 0)
			
 
				+			{
			
 
				+				// Start >= 0, end < 0 so output intersection and end
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+		}
			
 
				+		firstVertex = endVertex;
			
 
				+		ds = de;
			
 
				+	}
			
 
				+	return numVertsOut;
			
 
				+}
			
 
				+
			
 
				+__kernel void clipFacesAndFindContactsKernel(__global const b3Float4* separatingNormals,
			
 
				+											 __global const int* hasSeparatingAxis,
			
 
				+											 __global b3Int4* clippingFacesOut,
			
 
				+											 __global b3Float4* worldVertsA1,
			
 
				+											 __global b3Float4* worldNormalsA1,
			
 
				+											 __global b3Float4* worldVertsB1,
			
 
				+											 __global b3Float4* worldVertsB2,
			
 
				+											 int vertexFaceCapacity,
			
 
				+											 int pairIndex)
			
 
				+{
			
 
				+	//    int i = get_global_id(0);
			
 
				+	//int pairIndex = i;
			
 
				+	int i = pairIndex;
			
 
				+
			
 
				+	float minDist = -1e30f;
			
 
				+	float maxDist = 0.02f;
			
 
				+
			
 
				+	//	if (i<numPairs)
			
 
				+	{
			
 
				+		if (hasSeparatingAxis[i])
			
 
				+		{
			
 
				+			//			int bodyIndexA = pairs[i].x;
			
 
				+			//		int bodyIndexB = pairs[i].y;
			
 
				+
			
 
				+			int numLocalContactsOut = 0;
			
 
				+
			
 
				+			int capacityWorldVertsB2 = vertexFaceCapacity;
			
 
				+
			
 
				+			__global b3Float4* pVtxIn = &worldVertsB1[pairIndex * capacityWorldVertsB2];
			
 
				+			__global b3Float4* pVtxOut = &worldVertsB2[pairIndex * capacityWorldVertsB2];
			
 
				+
			
 
				+			{
			
 
				+				__global b3Int4* clippingFaces = clippingFacesOut;
			
 
				+
			
 
				+				int closestFaceA = clippingFaces[pairIndex].x;
			
 
				+				// int closestFaceB = clippingFaces[pairIndex].y;
			
 
				+				int numVertsInA = clippingFaces[pairIndex].z;
			
 
				+				int numVertsInB = clippingFaces[pairIndex].w;
			
 
				+
			
 
				+				int numVertsOut = 0;
			
 
				+
			
 
				+				if (closestFaceA >= 0)
			
 
				+				{
			
 
				+					// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
			
 
				+
			
 
				+					for (int e0 = 0; e0 < numVertsInA; e0++)
			
 
				+					{
			
 
				+						const b3Float4 aw = worldVertsA1[pairIndex * capacityWorldVertsB2 + e0];
			
 
				+						const b3Float4 bw = worldVertsA1[pairIndex * capacityWorldVertsB2 + ((e0 + 1) % numVertsInA)];
			
 
				+						const b3Float4 WorldEdge0 = aw - bw;
			
 
				+						b3Float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
			
 
				+						b3Float4 planeNormalWS1 = -b3Cross(WorldEdge0, worldPlaneAnormal1);
			
 
				+						b3Float4 worldA1 = aw;
			
 
				+						float planeEqWS1 = -b3Dot(worldA1, planeNormalWS1);
			
 
				+						b3Float4 planeNormalWS = planeNormalWS1;
			
 
				+						float planeEqWS = planeEqWS1;
			
 
				+						numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS, planeEqWS, pVtxOut);
			
 
				+						__global b3Float4* tmp = pVtxOut;
			
 
				+						pVtxOut = pVtxIn;
			
 
				+						pVtxIn = tmp;
			
 
				+						numVertsInB = numVertsOut;
			
 
				+						numVertsOut = 0;
			
 
				+					}
			
 
				+
			
 
				+					b3Float4 planeNormalWS = worldNormalsA1[pairIndex];
			
 
				+					float planeEqWS = -b3Dot(planeNormalWS, worldVertsA1[pairIndex * capacityWorldVertsB2]);
			
 
				+
			
 
				+					for (int i = 0; i < numVertsInB; i++)
			
 
				+					{
			
 
				+						float depth = b3Dot(planeNormalWS, pVtxIn[i]) + planeEqWS;
			
 
				+						if (depth <= minDist)
			
 
				+						{
			
 
				+							depth = minDist;
			
 
				+						}
			
 
				+						/*
			
 
				+						static float maxDepth = 0.f;
			
 
				+						if (depth < maxDepth)
			
 
				+						{
			
 
				+							maxDepth = depth;
			
 
				+							if (maxDepth < -10)
			
 
				+							{
			
 
				+								printf("error at framecount %d?\n",myframecount);
			
 
				+							}
			
 
				+							printf("maxDepth = %f\n", maxDepth);
			
 
				+
			
 
				+						}
			
 
				+*/
			
 
				+						if (depth <= maxDist)
			
 
				+						{
			
 
				+							b3Float4 pointInWorld = pVtxIn[i];
			
 
				+							pVtxOut[numLocalContactsOut++] = b3MakeFloat4(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				clippingFaces[pairIndex].w = numLocalContactsOut;
			
 
				+			}
			
 
				+
			
 
				+			for (int i = 0; i < numLocalContactsOut; i++)
			
 
				+				pVtxIn[i] = pVtxOut[i];
			
 
				+
			
 
				+		}  //		if (hasSeparatingAxis[i])
			
 
				+	}      //	if (i<numPairs)
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_CLIP_FACES_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h
@@ -0,0 +1,69 @@
 
				+
			
 
				+#ifndef B3_COLLIDABLE_H
			
 
				+#define B3_COLLIDABLE_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+
			
 
				+enum b3ShapeTypes
			
 
				+{
			
 
				+	SHAPE_HEIGHT_FIELD = 1,
			
 
				+
			
 
				+	SHAPE_CONVEX_HULL = 3,
			
 
				+	SHAPE_PLANE = 4,
			
 
				+	SHAPE_CONCAVE_TRIMESH = 5,
			
 
				+	SHAPE_COMPOUND_OF_CONVEX_HULLS = 6,
			
 
				+	SHAPE_SPHERE = 7,
			
 
				+	MAX_NUM_SHAPE_TYPES,
			
 
				+};
			
 
				+
			
 
				+typedef struct b3Collidable b3Collidable_t;
			
 
				+
			
 
				+struct b3Collidable
			
 
				+{
			
 
				+	union {
			
 
				+		int m_numChildShapes;
			
 
				+		int m_bvhIndex;
			
 
				+	};
			
 
				+	union {
			
 
				+		float m_radius;
			
 
				+		int m_compoundBvhIndex;
			
 
				+	};
			
 
				+
			
 
				+	int m_shapeType;
			
 
				+	union {
			
 
				+		int m_shapeIndex;
			
 
				+		float m_height;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+typedef struct b3GpuChildShape b3GpuChildShape_t;
			
 
				+struct b3GpuChildShape
			
 
				+{
			
 
				+	b3Float4 m_childPosition;
			
 
				+	b3Quat m_childOrientation;
			
 
				+	union {
			
 
				+		int m_shapeIndex;  //used for SHAPE_COMPOUND_OF_CONVEX_HULLS
			
 
				+		int m_capsuleAxis;
			
 
				+	};
			
 
				+	union {
			
 
				+		float m_radius;        //used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES
			
 
				+		int m_numChildShapes;  //used for compound shape
			
 
				+	};
			
 
				+	union {
			
 
				+		float m_height;  //used for childshape of SHAPE_COMPOUND_OF_CAPSULES
			
 
				+		int m_collidableShapeIndex;
			
 
				+	};
			
 
				+	int m_shapeType;
			
 
				+};
			
 
				+
			
 
				+struct b3CompoundOverlappingPair
			
 
				+{
			
 
				+	int m_bodyIndexA;
			
 
				+	int m_bodyIndexB;
			
 
				+	//	int	m_pairType;
			
 
				+	int m_childShapeIndexA;
			
 
				+	int m_childShapeIndexB;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_COLLIDABLE_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h
@@ -0,0 +1,36 @@
 
				+#ifndef B3_CONTACT4DATA_H
			
 
				+#define B3_CONTACT4DATA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+typedef struct b3Contact4Data b3Contact4Data_t;
			
 
				+
			
 
				+struct b3Contact4Data
			
 
				+{
			
 
				+	b3Float4 m_worldPosB[4];
			
 
				+	//	b3Float4	m_localPosA[4];
			
 
				+	//	b3Float4	m_localPosB[4];
			
 
				+	b3Float4 m_worldNormalOnB;  //	w: m_nPoints
			
 
				+	unsigned short m_restituitionCoeffCmp;
			
 
				+	unsigned short m_frictionCoeffCmp;
			
 
				+	int m_batchIdx;
			
 
				+	int m_bodyAPtrAndSignBit;  //x:m_bodyAPtr, y:m_bodyBPtr
			
 
				+	int m_bodyBPtrAndSignBit;
			
 
				+
			
 
				+	int m_childIndexA;
			
 
				+	int m_childIndexB;
			
 
				+	int m_unused1;
			
 
				+	int m_unused2;
			
 
				+};
			
 
				+
			
 
				+inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)
			
 
				+{
			
 
				+	return (int)contact->m_worldNormalOnB.w;
			
 
				+};
			
 
				+
			
 
				+inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)
			
 
				+{
			
 
				+	contact->m_worldNormalOnB.w = (float)numPoints;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CONTACT4DATA_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h
@@ -0,0 +1,486 @@
 
				+
			
 
				+#ifndef B3_CONTACT_CONVEX_CONVEX_SAT_H
			
 
				+#define B3_CONTACT_CONVEX_CONVEX_SAT_H
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h"
			
 
				+
			
 
				+#define B3_MAX_VERTS 1024
			
 
				+
			
 
				+inline b3Float4 b3Lerp3(const b3Float4& a, const b3Float4& b, float t)
			
 
				+{
			
 
				+	return b3MakeVector3(a.x + (b.x - a.x) * t,
			
 
				+						 a.y + (b.y - a.y) * t,
			
 
				+						 a.z + (b.z - a.z) * t,
			
 
				+						 0.f);
			
 
				+}
			
 
				+
			
 
				+// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
			
 
				+inline int b3ClipFace(const b3Float4* pVtxIn, int numVertsIn, b3Float4& planeNormalWS, float planeEqWS, b3Float4* ppVtxOut)
			
 
				+{
			
 
				+	int ve;
			
 
				+	float ds, de;
			
 
				+	int numVertsOut = 0;
			
 
				+	if (numVertsIn < 2)
			
 
				+		return 0;
			
 
				+
			
 
				+	b3Float4 firstVertex = pVtxIn[numVertsIn - 1];
			
 
				+	b3Float4 endVertex = pVtxIn[0];
			
 
				+
			
 
				+	ds = b3Dot3F4(planeNormalWS, firstVertex) + planeEqWS;
			
 
				+
			
 
				+	for (ve = 0; ve < numVertsIn; ve++)
			
 
				+	{
			
 
				+		endVertex = pVtxIn[ve];
			
 
				+
			
 
				+		de = b3Dot3F4(planeNormalWS, endVertex) + planeEqWS;
			
 
				+
			
 
				+		if (ds < 0)
			
 
				+		{
			
 
				+			if (de < 0)
			
 
				+			{
			
 
				+				// Start < 0, end < 0, so output endVertex
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// Start < 0, end >= 0, so output intersection
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (de < 0)
			
 
				+			{
			
 
				+				// Start >= 0, end < 0 so output intersection and end
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+		}
			
 
				+		firstVertex = endVertex;
			
 
				+		ds = de;
			
 
				+	}
			
 
				+	return numVertsOut;
			
 
				+}
			
 
				+
			
 
				+inline int b3ClipFaceAgainstHull(const b3Float4& separatingNormal, const b3ConvexPolyhedronData* hullA,
			
 
				+								 const b3Float4& posA, const b3Quaternion& ornA, b3Float4* worldVertsB1, int numWorldVertsB1,
			
 
				+								 b3Float4* worldVertsB2, int capacityWorldVertsB2,
			
 
				+								 const float minDist, float maxDist,
			
 
				+								 const b3AlignedObjectArray<b3Float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
			
 
				+								 //const b3Float4* verticesB,	const b3GpuFace* facesB,	const int* indicesB,
			
 
				+								 b3Float4* contactsOut,
			
 
				+								 int contactCapacity)
			
 
				+{
			
 
				+	int numContactsOut = 0;
			
 
				+
			
 
				+	b3Float4* pVtxIn = worldVertsB1;
			
 
				+	b3Float4* pVtxOut = worldVertsB2;
			
 
				+
			
 
				+	int numVertsIn = numWorldVertsB1;
			
 
				+	int numVertsOut = 0;
			
 
				+
			
 
				+	int closestFaceA = -1;
			
 
				+	{
			
 
				+		float dmin = FLT_MAX;
			
 
				+		for (int face = 0; face < hullA->m_numFaces; face++)
			
 
				+		{
			
 
				+			const b3Float4 Normal = b3MakeVector3(
			
 
				+				facesA[hullA->m_faceOffset + face].m_plane.x,
			
 
				+				facesA[hullA->m_faceOffset + face].m_plane.y,
			
 
				+				facesA[hullA->m_faceOffset + face].m_plane.z, 0.f);
			
 
				+			const b3Float4 faceANormalWS = b3QuatRotate(ornA, Normal);
			
 
				+
			
 
				+			float d = b3Dot3F4(faceANormalWS, separatingNormal);
			
 
				+			if (d < dmin)
			
 
				+			{
			
 
				+				dmin = d;
			
 
				+				closestFaceA = face;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if (closestFaceA < 0)
			
 
				+		return numContactsOut;
			
 
				+
			
 
				+	b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA];
			
 
				+
			
 
				+	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
			
 
				+	//int numContacts = numWorldVertsB1;
			
 
				+	int numVerticesA = polyA.m_numIndices;
			
 
				+	for (int e0 = 0; e0 < numVerticesA; e0++)
			
 
				+	{
			
 
				+		const b3Float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]];
			
 
				+		const b3Float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]];
			
 
				+		const b3Float4 edge0 = a - b;
			
 
				+		const b3Float4 WorldEdge0 = b3QuatRotate(ornA, edge0);
			
 
				+		b3Float4 planeNormalA = b3MakeFloat4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
			
 
				+		b3Float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA);
			
 
				+
			
 
				+		b3Float4 planeNormalWS1 = -b3Cross3(WorldEdge0, worldPlaneAnormal1);
			
 
				+		b3Float4 worldA1 = b3TransformPoint(a, posA, ornA);
			
 
				+		float planeEqWS1 = -b3Dot3F4(worldA1, planeNormalWS1);
			
 
				+
			
 
				+		b3Float4 planeNormalWS = planeNormalWS1;
			
 
				+		float planeEqWS = planeEqWS1;
			
 
				+
			
 
				+		//clip face
			
 
				+		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
			
 
				+		numVertsOut = b3ClipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut);
			
 
				+
			
 
				+		//btSwap(pVtxIn,pVtxOut);
			
 
				+		b3Float4* tmp = pVtxOut;
			
 
				+		pVtxOut = pVtxIn;
			
 
				+		pVtxIn = tmp;
			
 
				+		numVertsIn = numVertsOut;
			
 
				+		numVertsOut = 0;
			
 
				+	}
			
 
				+
			
 
				+	// only keep points that are behind the witness face
			
 
				+	{
			
 
				+		b3Float4 localPlaneNormal = b3MakeFloat4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
			
 
				+		float localPlaneEq = polyA.m_plane.w;
			
 
				+		b3Float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal);
			
 
				+		float planeEqWS = localPlaneEq - b3Dot3F4(planeNormalWS, posA);
			
 
				+		for (int i = 0; i < numVertsIn; i++)
			
 
				+		{
			
 
				+			float depth = b3Dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS;
			
 
				+			if (depth <= minDist)
			
 
				+			{
			
 
				+				depth = minDist;
			
 
				+			}
			
 
				+			if (numContactsOut < contactCapacity)
			
 
				+			{
			
 
				+				if (depth <= maxDist)
			
 
				+				{
			
 
				+					b3Float4 pointInWorld = pVtxIn[i];
			
 
				+					//resultOut.addContactPoint(separatingNormal,point,depth);
			
 
				+					contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);
			
 
				+					//printf("depth=%f\n",depth);
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return numContactsOut;
			
 
				+}
			
 
				+
			
 
				+inline int b3ClipHullAgainstHull(const b3Float4& separatingNormal,
			
 
				+								 const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
			
 
				+								 const b3Float4& posA, const b3Quaternion& ornA, const b3Float4& posB, const b3Quaternion& ornB,
			
 
				+								 b3Float4* worldVertsB1, b3Float4* worldVertsB2, int capacityWorldVerts,
			
 
				+								 const float minDist, float maxDist,
			
 
				+								 const b3AlignedObjectArray<b3Float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
			
 
				+								 const b3AlignedObjectArray<b3Float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB,
			
 
				+
			
 
				+								 b3Float4* contactsOut,
			
 
				+								 int contactCapacity)
			
 
				+{
			
 
				+	int numContactsOut = 0;
			
 
				+	int numWorldVertsB1 = 0;
			
 
				+
			
 
				+	B3_PROFILE("clipHullAgainstHull");
			
 
				+
			
 
				+	//float curMaxDist=maxDist;
			
 
				+	int closestFaceB = -1;
			
 
				+	float dmax = -FLT_MAX;
			
 
				+
			
 
				+	{
			
 
				+		//B3_PROFILE("closestFaceB");
			
 
				+		if (hullB.m_numFaces != 1)
			
 
				+		{
			
 
				+			//printf("wtf\n");
			
 
				+		}
			
 
				+		static bool once = true;
			
 
				+		//printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
			
 
				+
			
 
				+		for (int face = 0; face < hullB.m_numFaces; face++)
			
 
				+		{
			
 
				+#ifdef BT_DEBUG_SAT_FACE
			
 
				+			if (once)
			
 
				+				printf("face %d\n", face);
			
 
				+			const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face];
			
 
				+			if (once)
			
 
				+			{
			
 
				+				for (int i = 0; i < faceB->m_numIndices; i++)
			
 
				+				{
			
 
				+					b3Float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]];
			
 
				+					printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z);
			
 
				+				}
			
 
				+			}
			
 
				+#endif  //BT_DEBUG_SAT_FACE \
			
 
				+	//if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
			
 
				+			{
			
 
				+				const b3Float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x,
			
 
				+													  facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f);
			
 
				+				const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
			
 
				+#ifdef BT_DEBUG_SAT_FACE
			
 
				+				if (once)
			
 
				+					printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z);
			
 
				+#endif
			
 
				+				float d = b3Dot3F4(WorldNormal, separatingNormal);
			
 
				+				if (d > dmax)
			
 
				+				{
			
 
				+					dmax = d;
			
 
				+					closestFaceB = face;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		once = false;
			
 
				+	}
			
 
				+
			
 
				+	b3Assert(closestFaceB >= 0);
			
 
				+	{
			
 
				+		//B3_PROFILE("worldVertsB1");
			
 
				+		const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB];
			
 
				+		const int numVertices = polyB.m_numIndices;
			
 
				+		for (int e0 = 0; e0 < numVertices; e0++)
			
 
				+		{
			
 
				+			const b3Float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]];
			
 
				+			worldVertsB1[numWorldVertsB1++] = b3TransformPoint(b, posB, ornB);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (closestFaceB >= 0)
			
 
				+	{
			
 
				+		//B3_PROFILE("clipFaceAgainstHull");
			
 
				+		numContactsOut = b3ClipFaceAgainstHull((b3Float4&)separatingNormal, &hullA,
			
 
				+											   posA, ornA,
			
 
				+											   worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist,
			
 
				+											   verticesA, facesA, indicesA,
			
 
				+											   contactsOut, contactCapacity);
			
 
				+	}
			
 
				+
			
 
				+	return numContactsOut;
			
 
				+}
			
 
				+
			
 
				+inline int b3ClipHullHullSingle(
			
 
				+	int bodyIndexA, int bodyIndexB,
			
 
				+	const b3Float4& posA,
			
 
				+	const b3Quaternion& ornA,
			
 
				+	const b3Float4& posB,
			
 
				+	const b3Quaternion& ornB,
			
 
				+
			
 
				+	int collidableIndexA, int collidableIndexB,
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,
			
 
				+	b3AlignedObjectArray<b3Contact4Data>* globalContactOut,
			
 
				+	int& nContacts,
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
			
 
				+	const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3Vector3>& verticesA,
			
 
				+	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
			
 
				+	const b3AlignedObjectArray<b3GpuFace>& facesA,
			
 
				+	const b3AlignedObjectArray<int>& indicesA,
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3Vector3>& verticesB,
			
 
				+	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
			
 
				+	const b3AlignedObjectArray<b3GpuFace>& facesB,
			
 
				+	const b3AlignedObjectArray<int>& indicesB,
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
			
 
				+	const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
			
 
				+	const b3Vector3& sepNormalWorldSpace,
			
 
				+	int maxContactCapacity)
			
 
				+{
			
 
				+	int contactIndex = -1;
			
 
				+	b3ConvexPolyhedronData hullA, hullB;
			
 
				+
			
 
				+	b3Collidable colA = hostCollidablesA[collidableIndexA];
			
 
				+	hullA = hostConvexDataA[colA.m_shapeIndex];
			
 
				+	//printf("numvertsA = %d\n",hullA.m_numVertices);
			
 
				+
			
 
				+	b3Collidable colB = hostCollidablesB[collidableIndexB];
			
 
				+	hullB = hostConvexDataB[colB.m_shapeIndex];
			
 
				+	//printf("numvertsB = %d\n",hullB.m_numVertices);
			
 
				+
			
 
				+	b3Float4 contactsOut[B3_MAX_VERTS];
			
 
				+	int localContactCapacity = B3_MAX_VERTS;
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+	b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
			
 
				+	b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
			
 
				+#endif
			
 
				+
			
 
				+	{
			
 
				+		b3Float4 worldVertsB1[B3_MAX_VERTS];
			
 
				+		b3Float4 worldVertsB2[B3_MAX_VERTS];
			
 
				+		int capacityWorldVerts = B3_MAX_VERTS;
			
 
				+
			
 
				+		b3Float4 hostNormal = b3MakeFloat4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f);
			
 
				+		int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
			
 
				+		int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;
			
 
				+
			
 
				+		b3Scalar minDist = -1;
			
 
				+		b3Scalar maxDist = 0.;
			
 
				+
			
 
				+		b3Transform trA, trB;
			
 
				+		{
			
 
				+			//B3_PROFILE("b3TransformPoint computation");
			
 
				+			//trA.setIdentity();
			
 
				+			trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z));
			
 
				+			trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w));
			
 
				+
			
 
				+			//trB.setIdentity();
			
 
				+			trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z));
			
 
				+			trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w));
			
 
				+		}
			
 
				+
			
 
				+		b3Quaternion trAorn = trA.getRotation();
			
 
				+		b3Quaternion trBorn = trB.getRotation();
			
 
				+
			
 
				+		int numContactsOut = b3ClipHullAgainstHull(hostNormal,
			
 
				+												   hostConvexDataA.at(shapeA),
			
 
				+												   hostConvexDataB.at(shapeB),
			
 
				+												   (b3Float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
			
 
				+												   (b3Float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
			
 
				+												   worldVertsB1, worldVertsB2, capacityWorldVerts,
			
 
				+												   minDist, maxDist,
			
 
				+												   verticesA, facesA, indicesA,
			
 
				+												   verticesB, facesB, indicesB,
			
 
				+
			
 
				+												   contactsOut, localContactCapacity);
			
 
				+
			
 
				+		if (numContactsOut > 0)
			
 
				+		{
			
 
				+			B3_PROFILE("overlap");
			
 
				+
			
 
				+			b3Float4 normalOnSurfaceB = (b3Float4&)hostNormal;
			
 
				+			//			b3Float4 centerOut;
			
 
				+
			
 
				+			b3Int4 contactIdx;
			
 
				+			contactIdx.x = 0;
			
 
				+			contactIdx.y = 1;
			
 
				+			contactIdx.z = 2;
			
 
				+			contactIdx.w = 3;
			
 
				+
			
 
				+			int numPoints = 0;
			
 
				+
			
 
				+			{
			
 
				+				B3_PROFILE("extractManifold");
			
 
				+				numPoints = b3ReduceContacts(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);
			
 
				+			}
			
 
				+
			
 
				+			b3Assert(numPoints);
			
 
				+
			
 
				+			if (nContacts < maxContactCapacity)
			
 
				+			{
			
 
				+				contactIndex = nContacts;
			
 
				+				globalContactOut->expand();
			
 
				+				b3Contact4Data& contact = globalContactOut->at(nContacts);
			
 
				+				contact.m_batchIdx = 0;  //i;
			
 
				+				contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
			
 
				+				contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
			
 
				+
			
 
				+				contact.m_frictionCoeffCmp = 45874;
			
 
				+				contact.m_restituitionCoeffCmp = 0;
			
 
				+
			
 
				+				//	float distance = 0.f;
			
 
				+				for (int p = 0; p < numPoints; p++)
			
 
				+				{
			
 
				+					contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];  //check if it is actually on B
			
 
				+					contact.m_worldNormalOnB = normalOnSurfaceB;
			
 
				+				}
			
 
				+				//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
			
 
				+				contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
			
 
				+				nContacts++;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return contactIndex;
			
 
				+}
			
 
				+
			
 
				+inline int b3ContactConvexConvexSAT(
			
 
				+	int pairIndex,
			
 
				+	int bodyIndexA, int bodyIndexB,
			
 
				+	int collidableIndexA, int collidableIndexB,
			
 
				+	const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,
			
 
				+	const b3AlignedObjectArray<b3Collidable>& collidables,
			
 
				+	const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
			
 
				+	const b3AlignedObjectArray<b3Float4>& convexVertices,
			
 
				+	const b3AlignedObjectArray<b3Float4>& uniqueEdges,
			
 
				+	const b3AlignedObjectArray<int>& convexIndices,
			
 
				+	const b3AlignedObjectArray<b3GpuFace>& faces,
			
 
				+	b3AlignedObjectArray<b3Contact4Data>& globalContactsOut,
			
 
				+	int& nGlobalContactsOut,
			
 
				+	int maxContactCapacity)
			
 
				+{
			
 
				+	int contactIndex = -1;
			
 
				+
			
 
				+	b3Float4 posA = rigidBodies[bodyIndexA].m_pos;
			
 
				+	b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
			
 
				+	b3Float4 posB = rigidBodies[bodyIndexB].m_pos;
			
 
				+	b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
			
 
				+
			
 
				+	b3ConvexPolyhedronData hullA, hullB;
			
 
				+
			
 
				+	b3Float4 sepNormalWorldSpace;
			
 
				+
			
 
				+	b3Collidable colA = collidables[collidableIndexA];
			
 
				+	hullA = convexShapes[colA.m_shapeIndex];
			
 
				+	//printf("numvertsA = %d\n",hullA.m_numVertices);
			
 
				+
			
 
				+	b3Collidable colB = collidables[collidableIndexB];
			
 
				+	hullB = convexShapes[colB.m_shapeIndex];
			
 
				+	//printf("numvertsB = %d\n",hullB.m_numVertices);
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+	b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
			
 
				+	b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
			
 
				+#endif
			
 
				+
			
 
				+	bool foundSepAxis = b3FindSeparatingAxis(hullA, hullB,
			
 
				+											 posA,
			
 
				+											 ornA,
			
 
				+											 posB,
			
 
				+											 ornB,
			
 
				+
			
 
				+											 convexVertices, uniqueEdges, faces, convexIndices,
			
 
				+											 convexVertices, uniqueEdges, faces, convexIndices,
			
 
				+
			
 
				+											 sepNormalWorldSpace);
			
 
				+
			
 
				+	if (foundSepAxis)
			
 
				+	{
			
 
				+		contactIndex = b3ClipHullHullSingle(
			
 
				+			bodyIndexA, bodyIndexB,
			
 
				+			posA, ornA,
			
 
				+			posB, ornB,
			
 
				+			collidableIndexA, collidableIndexB,
			
 
				+			&rigidBodies,
			
 
				+			&globalContactsOut,
			
 
				+			nGlobalContactsOut,
			
 
				+
			
 
				+			convexShapes,
			
 
				+			convexShapes,
			
 
				+
			
 
				+			convexVertices,
			
 
				+			uniqueEdges,
			
 
				+			faces,
			
 
				+			convexIndices,
			
 
				+
			
 
				+			convexVertices,
			
 
				+			uniqueEdges,
			
 
				+			faces,
			
 
				+			convexIndices,
			
 
				+
			
 
				+			collidables,
			
 
				+			collidables,
			
 
				+			sepNormalWorldSpace,
			
 
				+			maxContactCapacity);
			
 
				+	}
			
 
				+
			
 
				+	return contactIndex;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_CONTACT_CONVEX_CONVEX_SAT_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h
@@ -0,0 +1,153 @@
 
				+
			
 
				+#ifndef B3_CONTACT_SPHERE_SPHERE_H
			
 
				+#define B3_CONTACT_SPHERE_SPHERE_H
			
 
				+
			
 
				+void computeContactSphereConvex(int pairIndex,
			
 
				+								int bodyIndexA, int bodyIndexB,
			
 
				+								int collidableIndexA, int collidableIndexB,
			
 
				+								const b3RigidBodyData* rigidBodies,
			
 
				+								const b3Collidable* collidables,
			
 
				+								const b3ConvexPolyhedronData* convexShapes,
			
 
				+								const b3Vector3* convexVertices,
			
 
				+								const int* convexIndices,
			
 
				+								const b3GpuFace* faces,
			
 
				+								b3Contact4* globalContactsOut,
			
 
				+								int& nGlobalContactsOut,
			
 
				+								int maxContactCapacity)
			
 
				+{
			
 
				+	float radius = collidables[collidableIndexA].m_radius;
			
 
				+	float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
			
 
				+	b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;
			
 
				+
			
 
				+	float4 pos = rigidBodies[bodyIndexB].m_pos;
			
 
				+
			
 
				+	b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;
			
 
				+
			
 
				+	b3Transform tr;
			
 
				+	tr.setIdentity();
			
 
				+	tr.setOrigin(pos);
			
 
				+	tr.setRotation(quat);
			
 
				+	b3Transform trInv = tr.inverse();
			
 
				+
			
 
				+	float4 spherePos = trInv(spherePos1);
			
 
				+
			
 
				+	int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
			
 
				+	int shapeIndex = collidables[collidableIndex].m_shapeIndex;
			
 
				+	int numFaces = convexShapes[shapeIndex].m_numFaces;
			
 
				+	float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
			
 
				+	float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
			
 
				+	float minDist = -1000000.f;  // TODO: What is the largest/smallest float?
			
 
				+	bool bCollide = true;
			
 
				+	int region = -1;
			
 
				+	float4 localHitNormal;
			
 
				+	for (int f = 0; f < numFaces; f++)
			
 
				+	{
			
 
				+		b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f];
			
 
				+		float4 planeEqn;
			
 
				+		float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f);
			
 
				+		float4 n1 = localPlaneNormal;  //quatRotate(quat,localPlaneNormal);
			
 
				+		planeEqn = n1;
			
 
				+		planeEqn[3] = face.m_plane.w;
			
 
				+
			
 
				+		float4 pntReturn;
			
 
				+		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);
			
 
				+
			
 
				+		if (dist > radius)
			
 
				+		{
			
 
				+			bCollide = false;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if (dist > 0)
			
 
				+		{
			
 
				+			//might hit an edge or vertex
			
 
				+			b3Vector3 out;
			
 
				+
			
 
				+			bool isInPoly = IsPointInPolygon(spherePos,
			
 
				+											 &face,
			
 
				+											 &convexVertices[convexShapes[shapeIndex].m_vertexOffset],
			
 
				+											 convexIndices,
			
 
				+											 &out);
			
 
				+			if (isInPoly)
			
 
				+			{
			
 
				+				if (dist > minDist)
			
 
				+				{
			
 
				+					minDist = dist;
			
 
				+					closestPnt = pntReturn;
			
 
				+					localHitNormal = planeEqn;
			
 
				+					region = 1;
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				b3Vector3 tmp = spherePos - out;
			
 
				+				b3Scalar l2 = tmp.length2();
			
 
				+				if (l2 < radius * radius)
			
 
				+				{
			
 
				+					dist = b3Sqrt(l2);
			
 
				+					if (dist > minDist)
			
 
				+					{
			
 
				+						minDist = dist;
			
 
				+						closestPnt = out;
			
 
				+						localHitNormal = tmp / dist;
			
 
				+						region = 2;
			
 
				+					}
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					bCollide = false;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (dist > minDist)
			
 
				+			{
			
 
				+				minDist = dist;
			
 
				+				closestPnt = pntReturn;
			
 
				+				localHitNormal = planeEqn;
			
 
				+				region = 3;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	static int numChecks = 0;
			
 
				+	numChecks++;
			
 
				+
			
 
				+	if (bCollide && minDist > -10000)
			
 
				+	{
			
 
				+		float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal;  //-hitNormalWorld;
			
 
				+		float4 pOnB1 = tr(closestPnt);
			
 
				+		//printf("dist ,%f,",minDist);
			
 
				+		float actualDepth = minDist - radius;
			
 
				+		if (actualDepth < 0)
			
 
				+		{
			
 
				+			//printf("actualDepth = ,%f,", actualDepth);
			
 
				+			//printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
			
 
				+			//printf("region=,%d,\n", region);
			
 
				+			pOnB1[3] = actualDepth;
			
 
				+
			
 
				+			int dstIdx;
			
 
				+			//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
			
 
				+
			
 
				+			if (nGlobalContactsOut < maxContactCapacity)
			
 
				+			{
			
 
				+				dstIdx = nGlobalContactsOut;
			
 
				+				nGlobalContactsOut++;
			
 
				+
			
 
				+				b3Contact4* c = &globalContactsOut[dstIdx];
			
 
				+				c->m_worldNormalOnB = normalOnSurfaceB1;
			
 
				+				c->setFrictionCoeff(0.7);
			
 
				+				c->setRestituitionCoeff(0.f);
			
 
				+
			
 
				+				c->m_batchIdx = pairIndex;
			
 
				+				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
			
 
				+				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
			
 
				+				c->m_worldPosB[0] = pOnB1;
			
 
				+				int numPoints = 1;
			
 
				+				c->m_worldNormalOnB.w = (b3Scalar)numPoints;
			
 
				+			}  //if (dstIdx < numPairs)
			
 
				+		}
			
 
				+	}  //if (hasCollision)
			
 
				+}
			
 
				+#endif  //B3_CONTACT_SPHERE_SPHERE_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h
@@ -0,0 +1,38 @@
 
				+
			
 
				+#ifndef B3_CONVEX_POLYHEDRON_DATA_H
			
 
				+#define B3_CONVEX_POLYHEDRON_DATA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+
			
 
				+typedef struct b3GpuFace b3GpuFace_t;
			
 
				+struct b3GpuFace
			
 
				+{
			
 
				+	b3Float4 m_plane;
			
 
				+	int m_indexOffset;
			
 
				+	int m_numIndices;
			
 
				+	int m_unusedPadding1;
			
 
				+	int m_unusedPadding2;
			
 
				+};
			
 
				+
			
 
				+typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;
			
 
				+
			
 
				+struct b3ConvexPolyhedronData
			
 
				+{
			
 
				+	b3Float4 m_localCenter;
			
 
				+	b3Float4 m_extents;
			
 
				+	b3Float4 mC;
			
 
				+	b3Float4 mE;
			
 
				+
			
 
				+	float m_radius;
			
 
				+	int m_faceOffset;
			
 
				+	int m_numFaces;
			
 
				+	int m_numVertices;
			
 
				+
			
 
				+	int m_vertexOffset;
			
 
				+	int m_uniqueEdgesOffset;
			
 
				+	int m_numUniqueEdges;
			
 
				+	int m_unused;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CONVEX_POLYHEDRON_DATA_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h
@@ -0,0 +1,797 @@
 
				+#ifndef B3_FIND_CONCAVE_SEPARATING_AXIS_H
			
 
				+#define B3_FIND_CONCAVE_SEPARATING_AXIS_H
			
 
				+
			
 
				+#define B3_TRIANGLE_NUM_CONVEX_FACES 5
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+
			
 
				+inline void b3Project(__global const b3ConvexPolyhedronData* hull, b3Float4ConstArg pos, b3QuatConstArg orn,
			
 
				+					  const b3Float4* dir, __global const b3Float4* vertices, float* min, float* max)
			
 
				+{
			
 
				+	min[0] = FLT_MAX;
			
 
				+	max[0] = -FLT_MAX;
			
 
				+	int numVerts = hull->m_numVertices;
			
 
				+
			
 
				+	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn), *dir);
			
 
				+	float offset = b3Dot(pos, *dir);
			
 
				+	for (int i = 0; i < numVerts; i++)
			
 
				+	{
			
 
				+		float dp = b3Dot(vertices[hull->m_vertexOffset + i], localDir);
			
 
				+		if (dp < min[0])
			
 
				+			min[0] = dp;
			
 
				+		if (dp > max[0])
			
 
				+			max[0] = dp;
			
 
				+	}
			
 
				+	if (min[0] > max[0])
			
 
				+	{
			
 
				+		float tmp = min[0];
			
 
				+		min[0] = max[0];
			
 
				+		max[0] = tmp;
			
 
				+	}
			
 
				+	min[0] += offset;
			
 
				+	max[0] += offset;
			
 
				+}
			
 
				+
			
 
				+inline bool b3TestSepAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
			
 
				+						  b3Float4ConstArg posA, b3QuatConstArg ornA,
			
 
				+						  b3Float4ConstArg posB, b3QuatConstArg ornB,
			
 
				+						  b3Float4* sep_axis, const b3Float4* verticesA, __global const b3Float4* verticesB, float* depth)
			
 
				+{
			
 
				+	float Min0, Max0;
			
 
				+	float Min1, Max1;
			
 
				+	b3Project(hullA, posA, ornA, sep_axis, verticesA, &Min0, &Max0);
			
 
				+	b3Project(hullB, posB, ornB, sep_axis, verticesB, &Min1, &Max1);
			
 
				+
			
 
				+	if (Max0 < Min1 || Max1 < Min0)
			
 
				+		return false;
			
 
				+
			
 
				+	float d0 = Max0 - Min1;
			
 
				+	float d1 = Max1 - Min0;
			
 
				+	*depth = d0 < d1 ? d0 : d1;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+bool b3FindSeparatingAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
			
 
				+						  b3Float4ConstArg posA1,
			
 
				+						  b3QuatConstArg ornA,
			
 
				+						  b3Float4ConstArg posB1,
			
 
				+						  b3QuatConstArg ornB,
			
 
				+						  b3Float4ConstArg DeltaC2,
			
 
				+
			
 
				+						  const b3Float4* verticesA,
			
 
				+						  const b3Float4* uniqueEdgesA,
			
 
				+						  const b3GpuFace* facesA,
			
 
				+						  const int* indicesA,
			
 
				+
			
 
				+						  __global const b3Float4* verticesB,
			
 
				+						  __global const b3Float4* uniqueEdgesB,
			
 
				+						  __global const b3GpuFace* facesB,
			
 
				+						  __global const int* indicesB,
			
 
				+						  b3Float4* sep,
			
 
				+						  float* dmin)
			
 
				+{
			
 
				+	b3Float4 posA = posA1;
			
 
				+	posA.w = 0.f;
			
 
				+	b3Float4 posB = posB1;
			
 
				+	posB.w = 0.f;
			
 
				+	/*
			
 
				+	static int maxFaceVertex = 0;
			
 
				+
			
 
				+	int curFaceVertexAB = hullA->m_numFaces*hullB->m_numVertices;
			
 
				+	curFaceVertexAB+= hullB->m_numFaces*hullA->m_numVertices;
			
 
				+
			
 
				+	if (curFaceVertexAB>maxFaceVertex)
			
 
				+	{
			
 
				+		maxFaceVertex = curFaceVertexAB;
			
 
				+		printf("curFaceVertexAB = %d\n",curFaceVertexAB);
			
 
				+		printf("hullA->m_numFaces = %d\n",hullA->m_numFaces);
			
 
				+		printf("hullA->m_numVertices = %d\n",hullA->m_numVertices);
			
 
				+		printf("hullB->m_numVertices = %d\n",hullB->m_numVertices);
			
 
				+	}
			
 
				+*/
			
 
				+
			
 
				+	int curPlaneTests = 0;
			
 
				+	{
			
 
				+		int numFacesA = hullA->m_numFaces;
			
 
				+		// Test normals from hullA
			
 
				+		for (int i = 0; i < numFacesA; i++)
			
 
				+		{
			
 
				+			const b3Float4 normal = facesA[hullA->m_faceOffset + i].m_plane;
			
 
				+			b3Float4 faceANormalWS = b3QuatRotate(ornA, normal);
			
 
				+			if (b3Dot(DeltaC2, faceANormalWS) < 0)
			
 
				+				faceANormalWS *= -1.f;
			
 
				+			curPlaneTests++;
			
 
				+			float d;
			
 
				+			if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, &faceANormalWS, verticesA, verticesB, &d))
			
 
				+				return false;
			
 
				+			if (d < *dmin)
			
 
				+			{
			
 
				+				*dmin = d;
			
 
				+				*sep = faceANormalWS;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if ((b3Dot(-DeltaC2, *sep)) > 0.0f)
			
 
				+	{
			
 
				+		*sep = -(*sep);
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+b3Vector3 unitSphere162[] =
			
 
				+	{
			
 
				+		b3MakeVector3(0.000000, -1.000000, 0.000000),
			
 
				+		b3MakeVector3(0.203181, -0.967950, 0.147618),
			
 
				+		b3MakeVector3(-0.077607, -0.967950, 0.238853),
			
 
				+		b3MakeVector3(0.723607, -0.447220, 0.525725),
			
 
				+		b3MakeVector3(0.609547, -0.657519, 0.442856),
			
 
				+		b3MakeVector3(0.812729, -0.502301, 0.295238),
			
 
				+		b3MakeVector3(-0.251147, -0.967949, 0.000000),
			
 
				+		b3MakeVector3(-0.077607, -0.967950, -0.238853),
			
 
				+		b3MakeVector3(0.203181, -0.967950, -0.147618),
			
 
				+		b3MakeVector3(0.860698, -0.251151, 0.442858),
			
 
				+		b3MakeVector3(-0.276388, -0.447220, 0.850649),
			
 
				+		b3MakeVector3(-0.029639, -0.502302, 0.864184),
			
 
				+		b3MakeVector3(-0.155215, -0.251152, 0.955422),
			
 
				+		b3MakeVector3(-0.894426, -0.447216, 0.000000),
			
 
				+		b3MakeVector3(-0.831051, -0.502299, 0.238853),
			
 
				+		b3MakeVector3(-0.956626, -0.251149, 0.147618),
			
 
				+		b3MakeVector3(-0.276388, -0.447220, -0.850649),
			
 
				+		b3MakeVector3(-0.483971, -0.502302, -0.716565),
			
 
				+		b3MakeVector3(-0.436007, -0.251152, -0.864188),
			
 
				+		b3MakeVector3(0.723607, -0.447220, -0.525725),
			
 
				+		b3MakeVector3(0.531941, -0.502302, -0.681712),
			
 
				+		b3MakeVector3(0.687159, -0.251152, -0.681715),
			
 
				+		b3MakeVector3(0.687159, -0.251152, 0.681715),
			
 
				+		b3MakeVector3(-0.436007, -0.251152, 0.864188),
			
 
				+		b3MakeVector3(-0.956626, -0.251149, -0.147618),
			
 
				+		b3MakeVector3(-0.155215, -0.251152, -0.955422),
			
 
				+		b3MakeVector3(0.860698, -0.251151, -0.442858),
			
 
				+		b3MakeVector3(0.276388, 0.447220, 0.850649),
			
 
				+		b3MakeVector3(0.483971, 0.502302, 0.716565),
			
 
				+		b3MakeVector3(0.232822, 0.657519, 0.716563),
			
 
				+		b3MakeVector3(-0.723607, 0.447220, 0.525725),
			
 
				+		b3MakeVector3(-0.531941, 0.502302, 0.681712),
			
 
				+		b3MakeVector3(-0.609547, 0.657519, 0.442856),
			
 
				+		b3MakeVector3(-0.723607, 0.447220, -0.525725),
			
 
				+		b3MakeVector3(-0.812729, 0.502301, -0.295238),
			
 
				+		b3MakeVector3(-0.609547, 0.657519, -0.442856),
			
 
				+		b3MakeVector3(0.276388, 0.447220, -0.850649),
			
 
				+		b3MakeVector3(0.029639, 0.502302, -0.864184),
			
 
				+		b3MakeVector3(0.232822, 0.657519, -0.716563),
			
 
				+		b3MakeVector3(0.894426, 0.447216, 0.000000),
			
 
				+		b3MakeVector3(0.831051, 0.502299, -0.238853),
			
 
				+		b3MakeVector3(0.753442, 0.657515, 0.000000),
			
 
				+		b3MakeVector3(-0.232822, -0.657519, 0.716563),
			
 
				+		b3MakeVector3(-0.162456, -0.850654, 0.499995),
			
 
				+		b3MakeVector3(0.052790, -0.723612, 0.688185),
			
 
				+		b3MakeVector3(0.138199, -0.894429, 0.425321),
			
 
				+		b3MakeVector3(0.262869, -0.525738, 0.809012),
			
 
				+		b3MakeVector3(0.361805, -0.723611, 0.587779),
			
 
				+		b3MakeVector3(0.531941, -0.502302, 0.681712),
			
 
				+		b3MakeVector3(0.425323, -0.850654, 0.309011),
			
 
				+		b3MakeVector3(0.812729, -0.502301, -0.295238),
			
 
				+		b3MakeVector3(0.609547, -0.657519, -0.442856),
			
 
				+		b3MakeVector3(0.850648, -0.525736, 0.000000),
			
 
				+		b3MakeVector3(0.670817, -0.723611, -0.162457),
			
 
				+		b3MakeVector3(0.670817, -0.723610, 0.162458),
			
 
				+		b3MakeVector3(0.425323, -0.850654, -0.309011),
			
 
				+		b3MakeVector3(0.447211, -0.894428, 0.000001),
			
 
				+		b3MakeVector3(-0.753442, -0.657515, 0.000000),
			
 
				+		b3MakeVector3(-0.525730, -0.850652, 0.000000),
			
 
				+		b3MakeVector3(-0.638195, -0.723609, 0.262864),
			
 
				+		b3MakeVector3(-0.361801, -0.894428, 0.262864),
			
 
				+		b3MakeVector3(-0.688189, -0.525736, 0.499997),
			
 
				+		b3MakeVector3(-0.447211, -0.723610, 0.525729),
			
 
				+		b3MakeVector3(-0.483971, -0.502302, 0.716565),
			
 
				+		b3MakeVector3(-0.232822, -0.657519, -0.716563),
			
 
				+		b3MakeVector3(-0.162456, -0.850654, -0.499995),
			
 
				+		b3MakeVector3(-0.447211, -0.723611, -0.525727),
			
 
				+		b3MakeVector3(-0.361801, -0.894429, -0.262863),
			
 
				+		b3MakeVector3(-0.688189, -0.525736, -0.499997),
			
 
				+		b3MakeVector3(-0.638195, -0.723609, -0.262863),
			
 
				+		b3MakeVector3(-0.831051, -0.502299, -0.238853),
			
 
				+		b3MakeVector3(0.361804, -0.723612, -0.587779),
			
 
				+		b3MakeVector3(0.138197, -0.894429, -0.425321),
			
 
				+		b3MakeVector3(0.262869, -0.525738, -0.809012),
			
 
				+		b3MakeVector3(0.052789, -0.723611, -0.688186),
			
 
				+		b3MakeVector3(-0.029639, -0.502302, -0.864184),
			
 
				+		b3MakeVector3(0.956626, 0.251149, 0.147618),
			
 
				+		b3MakeVector3(0.956626, 0.251149, -0.147618),
			
 
				+		b3MakeVector3(0.951058, -0.000000, 0.309013),
			
 
				+		b3MakeVector3(1.000000, 0.000000, 0.000000),
			
 
				+		b3MakeVector3(0.947213, -0.276396, 0.162458),
			
 
				+		b3MakeVector3(0.951058, 0.000000, -0.309013),
			
 
				+		b3MakeVector3(0.947213, -0.276396, -0.162458),
			
 
				+		b3MakeVector3(0.155215, 0.251152, 0.955422),
			
 
				+		b3MakeVector3(0.436007, 0.251152, 0.864188),
			
 
				+		b3MakeVector3(-0.000000, -0.000000, 1.000000),
			
 
				+		b3MakeVector3(0.309017, 0.000000, 0.951056),
			
 
				+		b3MakeVector3(0.138199, -0.276398, 0.951055),
			
 
				+		b3MakeVector3(0.587786, 0.000000, 0.809017),
			
 
				+		b3MakeVector3(0.447216, -0.276398, 0.850648),
			
 
				+		b3MakeVector3(-0.860698, 0.251151, 0.442858),
			
 
				+		b3MakeVector3(-0.687159, 0.251152, 0.681715),
			
 
				+		b3MakeVector3(-0.951058, -0.000000, 0.309013),
			
 
				+		b3MakeVector3(-0.809018, 0.000000, 0.587783),
			
 
				+		b3MakeVector3(-0.861803, -0.276396, 0.425324),
			
 
				+		b3MakeVector3(-0.587786, 0.000000, 0.809017),
			
 
				+		b3MakeVector3(-0.670819, -0.276397, 0.688191),
			
 
				+		b3MakeVector3(-0.687159, 0.251152, -0.681715),
			
 
				+		b3MakeVector3(-0.860698, 0.251151, -0.442858),
			
 
				+		b3MakeVector3(-0.587786, -0.000000, -0.809017),
			
 
				+		b3MakeVector3(-0.809018, -0.000000, -0.587783),
			
 
				+		b3MakeVector3(-0.670819, -0.276397, -0.688191),
			
 
				+		b3MakeVector3(-0.951058, 0.000000, -0.309013),
			
 
				+		b3MakeVector3(-0.861803, -0.276396, -0.425324),
			
 
				+		b3MakeVector3(0.436007, 0.251152, -0.864188),
			
 
				+		b3MakeVector3(0.155215, 0.251152, -0.955422),
			
 
				+		b3MakeVector3(0.587786, -0.000000, -0.809017),
			
 
				+		b3MakeVector3(0.309017, -0.000000, -0.951056),
			
 
				+		b3MakeVector3(0.447216, -0.276398, -0.850648),
			
 
				+		b3MakeVector3(0.000000, 0.000000, -1.000000),
			
 
				+		b3MakeVector3(0.138199, -0.276398, -0.951055),
			
 
				+		b3MakeVector3(0.670820, 0.276396, 0.688190),
			
 
				+		b3MakeVector3(0.809019, -0.000002, 0.587783),
			
 
				+		b3MakeVector3(0.688189, 0.525736, 0.499997),
			
 
				+		b3MakeVector3(0.861804, 0.276394, 0.425323),
			
 
				+		b3MakeVector3(0.831051, 0.502299, 0.238853),
			
 
				+		b3MakeVector3(-0.447216, 0.276397, 0.850649),
			
 
				+		b3MakeVector3(-0.309017, -0.000001, 0.951056),
			
 
				+		b3MakeVector3(-0.262869, 0.525738, 0.809012),
			
 
				+		b3MakeVector3(-0.138199, 0.276397, 0.951055),
			
 
				+		b3MakeVector3(0.029639, 0.502302, 0.864184),
			
 
				+		b3MakeVector3(-0.947213, 0.276396, -0.162458),
			
 
				+		b3MakeVector3(-1.000000, 0.000001, 0.000000),
			
 
				+		b3MakeVector3(-0.850648, 0.525736, -0.000000),
			
 
				+		b3MakeVector3(-0.947213, 0.276397, 0.162458),
			
 
				+		b3MakeVector3(-0.812729, 0.502301, 0.295238),
			
 
				+		b3MakeVector3(-0.138199, 0.276397, -0.951055),
			
 
				+		b3MakeVector3(-0.309016, -0.000000, -0.951057),
			
 
				+		b3MakeVector3(-0.262869, 0.525738, -0.809012),
			
 
				+		b3MakeVector3(-0.447215, 0.276397, -0.850649),
			
 
				+		b3MakeVector3(-0.531941, 0.502302, -0.681712),
			
 
				+		b3MakeVector3(0.861804, 0.276396, -0.425322),
			
 
				+		b3MakeVector3(0.809019, 0.000000, -0.587782),
			
 
				+		b3MakeVector3(0.688189, 0.525736, -0.499997),
			
 
				+		b3MakeVector3(0.670821, 0.276397, -0.688189),
			
 
				+		b3MakeVector3(0.483971, 0.502302, -0.716565),
			
 
				+		b3MakeVector3(0.077607, 0.967950, 0.238853),
			
 
				+		b3MakeVector3(0.251147, 0.967949, 0.000000),
			
 
				+		b3MakeVector3(0.000000, 1.000000, 0.000000),
			
 
				+		b3MakeVector3(0.162456, 0.850654, 0.499995),
			
 
				+		b3MakeVector3(0.361800, 0.894429, 0.262863),
			
 
				+		b3MakeVector3(0.447209, 0.723612, 0.525728),
			
 
				+		b3MakeVector3(0.525730, 0.850652, 0.000000),
			
 
				+		b3MakeVector3(0.638194, 0.723610, 0.262864),
			
 
				+		b3MakeVector3(-0.203181, 0.967950, 0.147618),
			
 
				+		b3MakeVector3(-0.425323, 0.850654, 0.309011),
			
 
				+		b3MakeVector3(-0.138197, 0.894430, 0.425320),
			
 
				+		b3MakeVector3(-0.361804, 0.723612, 0.587778),
			
 
				+		b3MakeVector3(-0.052790, 0.723612, 0.688185),
			
 
				+		b3MakeVector3(-0.203181, 0.967950, -0.147618),
			
 
				+		b3MakeVector3(-0.425323, 0.850654, -0.309011),
			
 
				+		b3MakeVector3(-0.447210, 0.894429, 0.000000),
			
 
				+		b3MakeVector3(-0.670817, 0.723611, -0.162457),
			
 
				+		b3MakeVector3(-0.670817, 0.723611, 0.162457),
			
 
				+		b3MakeVector3(0.077607, 0.967950, -0.238853),
			
 
				+		b3MakeVector3(0.162456, 0.850654, -0.499995),
			
 
				+		b3MakeVector3(-0.138197, 0.894430, -0.425320),
			
 
				+		b3MakeVector3(-0.052790, 0.723612, -0.688185),
			
 
				+		b3MakeVector3(-0.361804, 0.723612, -0.587778),
			
 
				+		b3MakeVector3(0.361800, 0.894429, -0.262863),
			
 
				+		b3MakeVector3(0.638194, 0.723610, -0.262864),
			
 
				+		b3MakeVector3(0.447209, 0.723612, -0.525728)};
			
 
				+
			
 
				+bool b3FindSeparatingAxisEdgeEdge(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
			
 
				+								  b3Float4ConstArg posA1,
			
 
				+								  b3QuatConstArg ornA,
			
 
				+								  b3Float4ConstArg posB1,
			
 
				+								  b3QuatConstArg ornB,
			
 
				+								  b3Float4ConstArg DeltaC2,
			
 
				+								  const b3Float4* verticesA,
			
 
				+								  const b3Float4* uniqueEdgesA,
			
 
				+								  const b3GpuFace* facesA,
			
 
				+								  const int* indicesA,
			
 
				+								  __global const b3Float4* verticesB,
			
 
				+								  __global const b3Float4* uniqueEdgesB,
			
 
				+								  __global const b3GpuFace* facesB,
			
 
				+								  __global const int* indicesB,
			
 
				+								  b3Float4* sep,
			
 
				+								  float* dmin,
			
 
				+								  bool searchAllEdgeEdge)
			
 
				+{
			
 
				+	b3Float4 posA = posA1;
			
 
				+	posA.w = 0.f;
			
 
				+	b3Float4 posB = posB1;
			
 
				+	posB.w = 0.f;
			
 
				+
			
 
				+	//	int curPlaneTests=0;
			
 
				+
			
 
				+	int curEdgeEdge = 0;
			
 
				+	// Test edges
			
 
				+	static int maxEdgeTests = 0;
			
 
				+	int curEdgeTests = hullA->m_numUniqueEdges * hullB->m_numUniqueEdges;
			
 
				+	if (curEdgeTests > maxEdgeTests)
			
 
				+	{
			
 
				+		maxEdgeTests = curEdgeTests;
			
 
				+		printf("maxEdgeTests = %d\n", maxEdgeTests);
			
 
				+		printf("hullA->m_numUniqueEdges = %d\n", hullA->m_numUniqueEdges);
			
 
				+		printf("hullB->m_numUniqueEdges = %d\n", hullB->m_numUniqueEdges);
			
 
				+	}
			
 
				+
			
 
				+	if (searchAllEdgeEdge)
			
 
				+	{
			
 
				+		for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++)
			
 
				+		{
			
 
				+			const b3Float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset + e0];
			
 
				+			b3Float4 edge0World = b3QuatRotate(ornA, edge0);
			
 
				+
			
 
				+			for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++)
			
 
				+			{
			
 
				+				const b3Float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset + e1];
			
 
				+				b3Float4 edge1World = b3QuatRotate(ornB, edge1);
			
 
				+
			
 
				+				b3Float4 crossje = b3Cross(edge0World, edge1World);
			
 
				+
			
 
				+				curEdgeEdge++;
			
 
				+				if (!b3IsAlmostZero(crossje))
			
 
				+				{
			
 
				+					crossje = b3Normalized(crossje);
			
 
				+					if (b3Dot(DeltaC2, crossje) < 0)
			
 
				+						crossje *= -1.f;
			
 
				+
			
 
				+					float dist;
			
 
				+					bool result = true;
			
 
				+					{
			
 
				+						float Min0, Max0;
			
 
				+						float Min1, Max1;
			
 
				+						b3Project(hullA, posA, ornA, &crossje, verticesA, &Min0, &Max0);
			
 
				+						b3Project(hullB, posB, ornB, &crossje, verticesB, &Min1, &Max1);
			
 
				+
			
 
				+						if (Max0 < Min1 || Max1 < Min0)
			
 
				+							return false;
			
 
				+
			
 
				+						float d0 = Max0 - Min1;
			
 
				+						float d1 = Max1 - Min0;
			
 
				+						dist = d0 < d1 ? d0 : d1;
			
 
				+						result = true;
			
 
				+					}
			
 
				+
			
 
				+					if (dist < *dmin)
			
 
				+					{
			
 
				+						*dmin = dist;
			
 
				+						*sep = crossje;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
			
 
				+		//printf("numDirections =%d\n",numDirections );
			
 
				+
			
 
				+		for (int i = 0; i < numDirections; i++)
			
 
				+		{
			
 
				+			b3Float4 crossje = unitSphere162[i];
			
 
				+			{
			
 
				+				//if (b3Dot(DeltaC2,crossje)>0)
			
 
				+				{
			
 
				+					float dist;
			
 
				+					bool result = true;
			
 
				+					{
			
 
				+						float Min0, Max0;
			
 
				+						float Min1, Max1;
			
 
				+						b3Project(hullA, posA, ornA, &crossje, verticesA, &Min0, &Max0);
			
 
				+						b3Project(hullB, posB, ornB, &crossje, verticesB, &Min1, &Max1);
			
 
				+
			
 
				+						if (Max0 < Min1 || Max1 < Min0)
			
 
				+							return false;
			
 
				+
			
 
				+						float d0 = Max0 - Min1;
			
 
				+						float d1 = Max1 - Min0;
			
 
				+						dist = d0 < d1 ? d0 : d1;
			
 
				+						result = true;
			
 
				+					}
			
 
				+
			
 
				+					if (dist < *dmin)
			
 
				+					{
			
 
				+						*dmin = dist;
			
 
				+						*sep = crossje;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if ((b3Dot(-DeltaC2, *sep)) > 0.0f)
			
 
				+	{
			
 
				+		*sep = -(*sep);
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+inline int b3FindClippingFaces(b3Float4ConstArg separatingNormal,
			
 
				+							   __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
			
 
				+							   b3Float4ConstArg posA, b3QuatConstArg ornA, b3Float4ConstArg posB, b3QuatConstArg ornB,
			
 
				+							   __global b3Float4* worldVertsA1,
			
 
				+							   __global b3Float4* worldNormalsA1,
			
 
				+							   __global b3Float4* worldVertsB1,
			
 
				+							   int capacityWorldVerts,
			
 
				+							   const float minDist, float maxDist,
			
 
				+							   __global const b3Float4* verticesA,
			
 
				+							   __global const b3GpuFace_t* facesA,
			
 
				+							   __global const int* indicesA,
			
 
				+							   __global const b3Float4* verticesB,
			
 
				+							   __global const b3GpuFace_t* facesB,
			
 
				+							   __global const int* indicesB,
			
 
				+
			
 
				+							   __global b3Int4* clippingFaces, int pairIndex)
			
 
				+{
			
 
				+	int numContactsOut = 0;
			
 
				+	int numWorldVertsB1 = 0;
			
 
				+
			
 
				+	int closestFaceB = -1;
			
 
				+	float dmax = -FLT_MAX;
			
 
				+
			
 
				+	{
			
 
				+		for (int face = 0; face < hullB->m_numFaces; face++)
			
 
				+		{
			
 
				+			const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset + face].m_plane.x,
			
 
				+												 facesB[hullB->m_faceOffset + face].m_plane.y, facesB[hullB->m_faceOffset + face].m_plane.z, 0.f);
			
 
				+			const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
			
 
				+			float d = b3Dot(WorldNormal, separatingNormal);
			
 
				+			if (d > dmax)
			
 
				+			{
			
 
				+				dmax = d;
			
 
				+				closestFaceB = face;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		const b3GpuFace_t polyB = facesB[hullB->m_faceOffset + closestFaceB];
			
 
				+		const int numVertices = polyB.m_numIndices;
			
 
				+		for (int e0 = 0; e0 < numVertices; e0++)
			
 
				+		{
			
 
				+			const b3Float4 b = verticesB[hullB->m_vertexOffset + indicesB[polyB.m_indexOffset + e0]];
			
 
				+			worldVertsB1[pairIndex * capacityWorldVerts + numWorldVertsB1++] = b3TransformPoint(b, posB, ornB);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int closestFaceA = -1;
			
 
				+	{
			
 
				+		float dmin = FLT_MAX;
			
 
				+		for (int face = 0; face < hullA->m_numFaces; face++)
			
 
				+		{
			
 
				+			const b3Float4 Normal = b3MakeFloat4(
			
 
				+				facesA[hullA->m_faceOffset + face].m_plane.x,
			
 
				+				facesA[hullA->m_faceOffset + face].m_plane.y,
			
 
				+				facesA[hullA->m_faceOffset + face].m_plane.z,
			
 
				+				0.f);
			
 
				+			const b3Float4 faceANormalWS = b3QuatRotate(ornA, Normal);
			
 
				+
			
 
				+			float d = b3Dot(faceANormalWS, separatingNormal);
			
 
				+			if (d < dmin)
			
 
				+			{
			
 
				+				dmin = d;
			
 
				+				closestFaceA = face;
			
 
				+				worldNormalsA1[pairIndex] = faceANormalWS;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int numVerticesA = facesA[hullA->m_faceOffset + closestFaceA].m_numIndices;
			
 
				+	for (int e0 = 0; e0 < numVerticesA; e0++)
			
 
				+	{
			
 
				+		const b3Float4 a = verticesA[hullA->m_vertexOffset + indicesA[facesA[hullA->m_faceOffset + closestFaceA].m_indexOffset + e0]];
			
 
				+		worldVertsA1[pairIndex * capacityWorldVerts + e0] = b3TransformPoint(a, posA, ornA);
			
 
				+	}
			
 
				+
			
 
				+	clippingFaces[pairIndex].x = closestFaceA;
			
 
				+	clippingFaces[pairIndex].y = closestFaceB;
			
 
				+	clippingFaces[pairIndex].z = numVerticesA;
			
 
				+	clippingFaces[pairIndex].w = numWorldVertsB1;
			
 
				+
			
 
				+	return numContactsOut;
			
 
				+}
			
 
				+
			
 
				+__kernel void b3FindConcaveSeparatingAxisKernel(__global b3Int4* concavePairs,
			
 
				+												__global const b3RigidBodyData* rigidBodies,
			
 
				+												__global const b3Collidable* collidables,
			
 
				+												__global const b3ConvexPolyhedronData* convexShapes,
			
 
				+												__global const b3Float4* vertices,
			
 
				+												__global const b3Float4* uniqueEdges,
			
 
				+												__global const b3GpuFace* faces,
			
 
				+												__global const int* indices,
			
 
				+												__global const b3GpuChildShape* gpuChildShapes,
			
 
				+												__global b3Aabb* aabbs,
			
 
				+												__global b3Float4* concaveSeparatingNormalsOut,
			
 
				+												__global b3Int4* clippingFacesOut,
			
 
				+												__global b3Vector3* worldVertsA1Out,
			
 
				+												__global b3Vector3* worldNormalsA1Out,
			
 
				+												__global b3Vector3* worldVertsB1Out,
			
 
				+												__global int* hasSeparatingNormals,
			
 
				+												int vertexFaceCapacity,
			
 
				+												int numConcavePairs,
			
 
				+												int pairIdx)
			
 
				+{
			
 
				+	int i = pairIdx;
			
 
				+	/*	int i = get_global_id(0);
			
 
				+	if (i>=numConcavePairs)
			
 
				+		return;
			
 
				+	int pairIdx = i;
			
 
				+	*/
			
 
				+
			
 
				+	int bodyIndexA = concavePairs[i].x;
			
 
				+	int bodyIndexB = concavePairs[i].y;
			
 
				+
			
 
				+	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
			
 
				+	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
			
 
				+
			
 
				+	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
			
 
				+	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
			
 
				+
			
 
				+	if (collidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL &&
			
 
				+		collidables[collidableIndexB].m_shapeType != SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+	{
			
 
				+		concavePairs[pairIdx].w = -1;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	hasSeparatingNormals[i] = 0;
			
 
				+
			
 
				+	//	int numFacesA = convexShapes[shapeIndexA].m_numFaces;
			
 
				+	int numActualConcaveConvexTests = 0;
			
 
				+
			
 
				+	int f = concavePairs[i].z;
			
 
				+
			
 
				+	bool overlap = false;
			
 
				+
			
 
				+	b3ConvexPolyhedronData convexPolyhedronA;
			
 
				+
			
 
				+	//add 3 vertices of the triangle
			
 
				+	convexPolyhedronA.m_numVertices = 3;
			
 
				+	convexPolyhedronA.m_vertexOffset = 0;
			
 
				+	b3Float4 localCenter = b3MakeFloat4(0.f, 0.f, 0.f, 0.f);
			
 
				+
			
 
				+	b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset + f];
			
 
				+	b3Aabb triAabb;
			
 
				+	triAabb.m_minVec = b3MakeFloat4(1e30f, 1e30f, 1e30f, 0.f);
			
 
				+	triAabb.m_maxVec = b3MakeFloat4(-1e30f, -1e30f, -1e30f, 0.f);
			
 
				+
			
 
				+	b3Float4 verticesA[3];
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		int index = indices[face.m_indexOffset + i];
			
 
				+		b3Float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset + index];
			
 
				+		verticesA[i] = vert;
			
 
				+		localCenter += vert;
			
 
				+
			
 
				+		triAabb.m_minVec = b3MinFloat4(triAabb.m_minVec, vert);
			
 
				+		triAabb.m_maxVec = b3MaxFloat4(triAabb.m_maxVec, vert);
			
 
				+	}
			
 
				+
			
 
				+	overlap = true;
			
 
				+	overlap = (triAabb.m_minVec.x > aabbs[bodyIndexB].m_maxVec.x || triAabb.m_maxVec.x < aabbs[bodyIndexB].m_minVec.x) ? false : overlap;
			
 
				+	overlap = (triAabb.m_minVec.z > aabbs[bodyIndexB].m_maxVec.z || triAabb.m_maxVec.z < aabbs[bodyIndexB].m_minVec.z) ? false : overlap;
			
 
				+	overlap = (triAabb.m_minVec.y > aabbs[bodyIndexB].m_maxVec.y || triAabb.m_maxVec.y < aabbs[bodyIndexB].m_minVec.y) ? false : overlap;
			
 
				+
			
 
				+	if (overlap)
			
 
				+	{
			
 
				+		float dmin = FLT_MAX;
			
 
				+		int hasSeparatingAxis = 5;
			
 
				+		b3Float4 sepAxis = b3MakeFloat4(1, 2, 3, 4);
			
 
				+
			
 
				+		//	int localCC=0;
			
 
				+		numActualConcaveConvexTests++;
			
 
				+
			
 
				+		//a triangle has 3 unique edges
			
 
				+		convexPolyhedronA.m_numUniqueEdges = 3;
			
 
				+		convexPolyhedronA.m_uniqueEdgesOffset = 0;
			
 
				+		b3Float4 uniqueEdgesA[3];
			
 
				+
			
 
				+		uniqueEdgesA[0] = (verticesA[1] - verticesA[0]);
			
 
				+		uniqueEdgesA[1] = (verticesA[2] - verticesA[1]);
			
 
				+		uniqueEdgesA[2] = (verticesA[0] - verticesA[2]);
			
 
				+
			
 
				+		convexPolyhedronA.m_faceOffset = 0;
			
 
				+
			
 
				+		b3Float4 normal = b3MakeFloat4(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f);
			
 
				+
			
 
				+		b3GpuFace facesA[B3_TRIANGLE_NUM_CONVEX_FACES];
			
 
				+		int indicesA[3 + 3 + 2 + 2 + 2];
			
 
				+		int curUsedIndices = 0;
			
 
				+		int fidx = 0;
			
 
				+
			
 
				+		//front size of triangle
			
 
				+		{
			
 
				+			facesA[fidx].m_indexOffset = curUsedIndices;
			
 
				+			indicesA[0] = 0;
			
 
				+			indicesA[1] = 1;
			
 
				+			indicesA[2] = 2;
			
 
				+			curUsedIndices += 3;
			
 
				+			float c = face.m_plane.w;
			
 
				+			facesA[fidx].m_plane.x = normal.x;
			
 
				+			facesA[fidx].m_plane.y = normal.y;
			
 
				+			facesA[fidx].m_plane.z = normal.z;
			
 
				+			facesA[fidx].m_plane.w = c;
			
 
				+			facesA[fidx].m_numIndices = 3;
			
 
				+		}
			
 
				+		fidx++;
			
 
				+		//back size of triangle
			
 
				+		{
			
 
				+			facesA[fidx].m_indexOffset = curUsedIndices;
			
 
				+			indicesA[3] = 2;
			
 
				+			indicesA[4] = 1;
			
 
				+			indicesA[5] = 0;
			
 
				+			curUsedIndices += 3;
			
 
				+			float c = b3Dot(normal, verticesA[0]);
			
 
				+			//	float c1 = -face.m_plane.w;
			
 
				+			facesA[fidx].m_plane.x = -normal.x;
			
 
				+			facesA[fidx].m_plane.y = -normal.y;
			
 
				+			facesA[fidx].m_plane.z = -normal.z;
			
 
				+			facesA[fidx].m_plane.w = c;
			
 
				+			facesA[fidx].m_numIndices = 3;
			
 
				+		}
			
 
				+		fidx++;
			
 
				+
			
 
				+		bool addEdgePlanes = true;
			
 
				+		if (addEdgePlanes)
			
 
				+		{
			
 
				+			int numVertices = 3;
			
 
				+			int prevVertex = numVertices - 1;
			
 
				+			for (int i = 0; i < numVertices; i++)
			
 
				+			{
			
 
				+				b3Float4 v0 = verticesA[i];
			
 
				+				b3Float4 v1 = verticesA[prevVertex];
			
 
				+
			
 
				+				b3Float4 edgeNormal = b3Normalized(b3Cross(normal, v1 - v0));
			
 
				+				float c = -b3Dot(edgeNormal, v0);
			
 
				+
			
 
				+				facesA[fidx].m_numIndices = 2;
			
 
				+				facesA[fidx].m_indexOffset = curUsedIndices;
			
 
				+				indicesA[curUsedIndices++] = i;
			
 
				+				indicesA[curUsedIndices++] = prevVertex;
			
 
				+
			
 
				+				facesA[fidx].m_plane.x = edgeNormal.x;
			
 
				+				facesA[fidx].m_plane.y = edgeNormal.y;
			
 
				+				facesA[fidx].m_plane.z = edgeNormal.z;
			
 
				+				facesA[fidx].m_plane.w = c;
			
 
				+				fidx++;
			
 
				+				prevVertex = i;
			
 
				+			}
			
 
				+		}
			
 
				+		convexPolyhedronA.m_numFaces = B3_TRIANGLE_NUM_CONVEX_FACES;
			
 
				+		convexPolyhedronA.m_localCenter = localCenter * (1.f / 3.f);
			
 
				+
			
 
				+		b3Float4 posA = rigidBodies[bodyIndexA].m_pos;
			
 
				+		posA.w = 0.f;
			
 
				+		b3Float4 posB = rigidBodies[bodyIndexB].m_pos;
			
 
				+		posB.w = 0.f;
			
 
				+
			
 
				+		b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
			
 
				+		b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
			
 
				+
			
 
				+		///////////////////
			
 
				+		///compound shape support
			
 
				+
			
 
				+		if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+		{
			
 
				+			int compoundChild = concavePairs[pairIdx].w;
			
 
				+			int childShapeIndexB = compoundChild;  //collidables[collidableIndexB].m_shapeIndex+compoundChild;
			
 
				+			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
			
 
				+			b3Float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
			
 
				+			b3Quaternion childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
			
 
				+			b3Float4 newPosB = b3TransformPoint(childPosB, posB, ornB);
			
 
				+			b3Quaternion newOrnB = b3QuatMul(ornB, childOrnB);
			
 
				+			posB = newPosB;
			
 
				+			ornB = newOrnB;
			
 
				+			shapeIndexB = collidables[childColIndexB].m_shapeIndex;
			
 
				+		}
			
 
				+		//////////////////
			
 
				+
			
 
				+		b3Float4 c0local = convexPolyhedronA.m_localCenter;
			
 
				+		b3Float4 c0 = b3TransformPoint(c0local, posA, ornA);
			
 
				+		b3Float4 c1local = convexShapes[shapeIndexB].m_localCenter;
			
 
				+		b3Float4 c1 = b3TransformPoint(c1local, posB, ornB);
			
 
				+		const b3Float4 DeltaC2 = c0 - c1;
			
 
				+
			
 
				+		bool sepA = b3FindSeparatingAxis(&convexPolyhedronA, &convexShapes[shapeIndexB],
			
 
				+										 posA, ornA,
			
 
				+										 posB, ornB,
			
 
				+										 DeltaC2,
			
 
				+										 verticesA, uniqueEdgesA, facesA, indicesA,
			
 
				+										 vertices, uniqueEdges, faces, indices,
			
 
				+										 &sepAxis, &dmin);
			
 
				+		hasSeparatingAxis = 4;
			
 
				+		if (!sepA)
			
 
				+		{
			
 
				+			hasSeparatingAxis = 0;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			bool sepB = b3FindSeparatingAxis(&convexShapes[shapeIndexB], &convexPolyhedronA,
			
 
				+											 posB, ornB,
			
 
				+											 posA, ornA,
			
 
				+											 DeltaC2,
			
 
				+											 vertices, uniqueEdges, faces, indices,
			
 
				+											 verticesA, uniqueEdgesA, facesA, indicesA,
			
 
				+											 &sepAxis, &dmin);
			
 
				+
			
 
				+			if (!sepB)
			
 
				+			{
			
 
				+				hasSeparatingAxis = 0;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				bool sepEE = b3FindSeparatingAxisEdgeEdge(&convexPolyhedronA, &convexShapes[shapeIndexB],
			
 
				+														  posA, ornA,
			
 
				+														  posB, ornB,
			
 
				+														  DeltaC2,
			
 
				+														  verticesA, uniqueEdgesA, facesA, indicesA,
			
 
				+														  vertices, uniqueEdges, faces, indices,
			
 
				+														  &sepAxis, &dmin, true);
			
 
				+
			
 
				+				if (!sepEE)
			
 
				+				{
			
 
				+					hasSeparatingAxis = 0;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					hasSeparatingAxis = 1;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (hasSeparatingAxis)
			
 
				+		{
			
 
				+			hasSeparatingNormals[i] = 1;
			
 
				+			sepAxis.w = dmin;
			
 
				+			concaveSeparatingNormalsOut[pairIdx] = sepAxis;
			
 
				+
			
 
				+			//now compute clipping faces A and B, and world-space clipping vertices A and B...
			
 
				+
			
 
				+			float minDist = -1e30f;
			
 
				+			float maxDist = 0.02f;
			
 
				+
			
 
				+			b3FindClippingFaces(sepAxis,
			
 
				+								&convexPolyhedronA,
			
 
				+								&convexShapes[shapeIndexB],
			
 
				+								posA, ornA,
			
 
				+								posB, ornB,
			
 
				+								worldVertsA1Out,
			
 
				+								worldNormalsA1Out,
			
 
				+								worldVertsB1Out,
			
 
				+								vertexFaceCapacity,
			
 
				+								minDist, maxDist,
			
 
				+								verticesA,
			
 
				+								facesA,
			
 
				+								indicesA,
			
 
				+
			
 
				+								vertices,
			
 
				+								faces,
			
 
				+								indices,
			
 
				+								clippingFacesOut, pairIdx);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			//mark this pair as in-active
			
 
				+			concavePairs[pairIdx].w = -1;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		//mark this pair as in-active
			
 
				+		concavePairs[pairIdx].w = -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_FIND_CONCAVE_SEPARATING_AXIS_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h
@@ -0,0 +1,197 @@
 
				+#ifndef B3_FIND_SEPARATING_AXIS_H
			
 
				+#define B3_FIND_SEPARATING_AXIS_H
			
 
				+
			
 
				+inline void b3ProjectAxis(const b3ConvexPolyhedronData& hull, const b3Float4& pos, const b3Quaternion& orn, const b3Float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
			
 
				+{
			
 
				+	min = FLT_MAX;
			
 
				+	max = -FLT_MAX;
			
 
				+	int numVerts = hull.m_numVertices;
			
 
				+
			
 
				+	const b3Float4 localDir = b3QuatRotate(orn.inverse(), dir);
			
 
				+
			
 
				+	b3Scalar offset = b3Dot3F4(pos, dir);
			
 
				+
			
 
				+	for (int i = 0; i < numVerts; i++)
			
 
				+	{
			
 
				+		//b3Vector3 pt = trans * vertices[m_vertexOffset+i];
			
 
				+		//b3Scalar dp = pt.dot(dir);
			
 
				+		//b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
			
 
				+		b3Scalar dp = b3Dot3F4((b3Float4&)vertices[hull.m_vertexOffset + i], localDir);
			
 
				+		//b3Assert(dp==dpL);
			
 
				+		if (dp < min) min = dp;
			
 
				+		if (dp > max) max = dp;
			
 
				+	}
			
 
				+	if (min > max)
			
 
				+	{
			
 
				+		b3Scalar tmp = min;
			
 
				+		min = max;
			
 
				+		max = tmp;
			
 
				+	}
			
 
				+	min += offset;
			
 
				+	max += offset;
			
 
				+}
			
 
				+
			
 
				+inline bool b3TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
			
 
				+						  const b3Float4& posA, const b3Quaternion& ornA,
			
 
				+						  const b3Float4& posB, const b3Quaternion& ornB,
			
 
				+						  const b3Float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth)
			
 
				+{
			
 
				+	b3Scalar Min0, Max0;
			
 
				+	b3Scalar Min1, Max1;
			
 
				+	b3ProjectAxis(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0);
			
 
				+	b3ProjectAxis(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1);
			
 
				+
			
 
				+	if (Max0 < Min1 || Max1 < Min0)
			
 
				+		return false;
			
 
				+
			
 
				+	b3Scalar d0 = Max0 - Min1;
			
 
				+	b3Assert(d0 >= 0.0f);
			
 
				+	b3Scalar d1 = Max1 - Min0;
			
 
				+	b3Assert(d1 >= 0.0f);
			
 
				+	depth = d0 < d1 ? d0 : d1;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+inline bool b3FindSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
			
 
				+								 const b3Float4& posA1,
			
 
				+								 const b3Quaternion& ornA,
			
 
				+								 const b3Float4& posB1,
			
 
				+								 const b3Quaternion& ornB,
			
 
				+								 const b3AlignedObjectArray<b3Vector3>& verticesA,
			
 
				+								 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
			
 
				+								 const b3AlignedObjectArray<b3GpuFace>& facesA,
			
 
				+								 const b3AlignedObjectArray<int>& indicesA,
			
 
				+								 const b3AlignedObjectArray<b3Vector3>& verticesB,
			
 
				+								 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
			
 
				+								 const b3AlignedObjectArray<b3GpuFace>& facesB,
			
 
				+								 const b3AlignedObjectArray<int>& indicesB,
			
 
				+
			
 
				+								 b3Vector3& sep)
			
 
				+{
			
 
				+	B3_PROFILE("findSeparatingAxis");
			
 
				+
			
 
				+	b3Float4 posA = posA1;
			
 
				+	posA.w = 0.f;
			
 
				+	b3Float4 posB = posB1;
			
 
				+	posB.w = 0.f;
			
 
				+	//#ifdef TEST_INTERNAL_OBJECTS
			
 
				+	b3Float4 c0local = (b3Float4&)hullA.m_localCenter;
			
 
				+
			
 
				+	b3Float4 c0 = b3TransformPoint(c0local, posA, ornA);
			
 
				+	b3Float4 c1local = (b3Float4&)hullB.m_localCenter;
			
 
				+	b3Float4 c1 = b3TransformPoint(c1local, posB, ornB);
			
 
				+	const b3Float4 deltaC2 = c0 - c1;
			
 
				+	//#endif
			
 
				+
			
 
				+	b3Scalar dmin = FLT_MAX;
			
 
				+	int curPlaneTests = 0;
			
 
				+
			
 
				+	int numFacesA = hullA.m_numFaces;
			
 
				+	// Test normals from hullA
			
 
				+	for (int i = 0; i < numFacesA; i++)
			
 
				+	{
			
 
				+		const b3Float4& normal = (b3Float4&)facesA[hullA.m_faceOffset + i].m_plane;
			
 
				+		b3Float4 faceANormalWS = b3QuatRotate(ornA, normal);
			
 
				+
			
 
				+		if (b3Dot3F4(deltaC2, faceANormalWS) < 0)
			
 
				+			faceANormalWS *= -1.f;
			
 
				+
			
 
				+		curPlaneTests++;
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+		gExpectedNbTests++;
			
 
				+		if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
			
 
				+			continue;
			
 
				+		gActualNbTests++;
			
 
				+#endif
			
 
				+
			
 
				+		b3Scalar d;
			
 
				+		if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d))
			
 
				+			return false;
			
 
				+
			
 
				+		if (d < dmin)
			
 
				+		{
			
 
				+			dmin = d;
			
 
				+			sep = (b3Vector3&)faceANormalWS;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int numFacesB = hullB.m_numFaces;
			
 
				+	// Test normals from hullB
			
 
				+	for (int i = 0; i < numFacesB; i++)
			
 
				+	{
			
 
				+		b3Float4 normal = (b3Float4&)facesB[hullB.m_faceOffset + i].m_plane;
			
 
				+		b3Float4 WorldNormal = b3QuatRotate(ornB, normal);
			
 
				+
			
 
				+		if (b3Dot3F4(deltaC2, WorldNormal) < 0)
			
 
				+		{
			
 
				+			WorldNormal *= -1.f;
			
 
				+		}
			
 
				+		curPlaneTests++;
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+		gExpectedNbTests++;
			
 
				+		if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin))
			
 
				+			continue;
			
 
				+		gActualNbTests++;
			
 
				+#endif
			
 
				+
			
 
				+		b3Scalar d;
			
 
				+		if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d))
			
 
				+			return false;
			
 
				+
			
 
				+		if (d < dmin)
			
 
				+		{
			
 
				+			dmin = d;
			
 
				+			sep = (b3Vector3&)WorldNormal;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//	b3Vector3 edgeAstart,edgeAend,edgeBstart,edgeBend;
			
 
				+
			
 
				+	int curEdgeEdge = 0;
			
 
				+	// Test edges
			
 
				+	for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++)
			
 
				+	{
			
 
				+		const b3Float4& edge0 = (b3Float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0];
			
 
				+		b3Float4 edge0World = b3QuatRotate(ornA, (b3Float4&)edge0);
			
 
				+
			
 
				+		for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++)
			
 
				+		{
			
 
				+			const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1];
			
 
				+			b3Float4 edge1World = b3QuatRotate(ornB, (b3Float4&)edge1);
			
 
				+
			
 
				+			b3Float4 crossje = b3Cross3(edge0World, edge1World);
			
 
				+
			
 
				+			curEdgeEdge++;
			
 
				+			if (!b3IsAlmostZero((b3Vector3&)crossje))
			
 
				+			{
			
 
				+				crossje = b3FastNormalized3(crossje);
			
 
				+				if (b3Dot3F4(deltaC2, crossje) < 0)
			
 
				+					crossje *= -1.f;
			
 
				+
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+				gExpectedNbTests++;
			
 
				+				if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin))
			
 
				+					continue;
			
 
				+				gActualNbTests++;
			
 
				+#endif
			
 
				+
			
 
				+				b3Scalar dist;
			
 
				+				if (!b3TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist))
			
 
				+					return false;
			
 
				+
			
 
				+				if (dist < dmin)
			
 
				+				{
			
 
				+					dmin = dist;
			
 
				+					sep = (b3Vector3&)crossje;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if ((b3Dot3F4(-deltaC2, (b3Float4&)sep)) > 0.0f)
			
 
				+		sep = -sep;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_FIND_SEPARATING_AXIS_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h
@@ -0,0 +1,888 @@
 
				+
			
 
				+/***
			
 
				+ * ---------------------------------
			
 
				+ * Copyright (c)2012 Daniel Fiser <[email protected]>
			
 
				+ *
			
 
				+ *  This file was ported from mpr.c file, part of libccd.
			
 
				+ *  The Minkoski Portal Refinement implementation was ported 
			
 
				+ *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.
			
 
				+ *  at http://github.com/erwincoumans/bullet3
			
 
				+ *
			
 
				+ *  Distributed under the OSI-approved BSD License (the "License");
			
 
				+ *  see <http://www.opensource.org/licenses/bsd-license.php>.
			
 
				+ *  This software is distributed WITHOUT ANY WARRANTY; without even the
			
 
				+ *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *  See the License for more information.
			
 
				+ */
			
 
				+
			
 
				+#ifndef B3_MPR_PENETRATION_H
			
 
				+#define B3_MPR_PENETRATION_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3PlatformDefinitions.h"
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+#define B3_MPR_SQRT sqrtf
			
 
				+#else
			
 
				+#define B3_MPR_SQRT sqrt
			
 
				+#endif
			
 
				+#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))
			
 
				+#define B3_MPR_FABS fabs
			
 
				+
			
 
				+#define B3_MPR_TOLERANCE 1E-6f
			
 
				+#define B3_MPR_MAX_ITERATIONS 1000
			
 
				+
			
 
				+struct _b3MprSupport_t
			
 
				+{
			
 
				+	b3Float4 v;   //!< Support point in minkowski sum
			
 
				+	b3Float4 v1;  //!< Support point in obj1
			
 
				+	b3Float4 v2;  //!< Support point in obj2
			
 
				+};
			
 
				+typedef struct _b3MprSupport_t b3MprSupport_t;
			
 
				+
			
 
				+struct _b3MprSimplex_t
			
 
				+{
			
 
				+	b3MprSupport_t ps[4];
			
 
				+	int last;  //!< index of last added point
			
 
				+};
			
 
				+typedef struct _b3MprSimplex_t b3MprSimplex_t;
			
 
				+
			
 
				+inline b3MprSupport_t *b3MprSimplexPointW(b3MprSimplex_t *s, int idx)
			
 
				+{
			
 
				+	return &s->ps[idx];
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)
			
 
				+{
			
 
				+	s->last = size - 1;
			
 
				+}
			
 
				+
			
 
				+inline int b3MprSimplexSize(const b3MprSimplex_t *s)
			
 
				+{
			
 
				+	return s->last + 1;
			
 
				+}
			
 
				+
			
 
				+inline const b3MprSupport_t *b3MprSimplexPoint(const b3MprSimplex_t *s, int idx)
			
 
				+{
			
 
				+	// here is no check on boundaries
			
 
				+	return &s->ps[idx];
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)
			
 
				+{
			
 
				+	*d = *s;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)
			
 
				+{
			
 
				+	b3MprSupportCopy(s->ps + pos, a);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)
			
 
				+{
			
 
				+	b3MprSupport_t supp;
			
 
				+
			
 
				+	b3MprSupportCopy(&supp, &s->ps[pos1]);
			
 
				+	b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);
			
 
				+	b3MprSupportCopy(&s->ps[pos2], &supp);
			
 
				+}
			
 
				+
			
 
				+inline int b3MprIsZero(float val)
			
 
				+{
			
 
				+	return B3_MPR_FABS(val) < FLT_EPSILON;
			
 
				+}
			
 
				+
			
 
				+inline int b3MprEq(float _a, float _b)
			
 
				+{
			
 
				+	float ab;
			
 
				+	float a, b;
			
 
				+
			
 
				+	ab = B3_MPR_FABS(_a - _b);
			
 
				+	if (B3_MPR_FABS(ab) < FLT_EPSILON)
			
 
				+		return 1;
			
 
				+
			
 
				+	a = B3_MPR_FABS(_a);
			
 
				+	b = B3_MPR_FABS(_b);
			
 
				+	if (b > a)
			
 
				+	{
			
 
				+		return ab < FLT_EPSILON * b;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return ab < FLT_EPSILON * a;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inline int b3MprVec3Eq(const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+	return b3MprEq((*a).x, (*b).x) && b3MprEq((*a).y, (*b).y) && b3MprEq((*a).z, (*b).z);
			
 
				+}
			
 
				+
			
 
				+inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec, __global const b3ConvexPolyhedronData_t *hull, b3ConstArray(b3Float4) verticesA)
			
 
				+{
			
 
				+	b3Float4 supVec = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	float maxDot = -B3_LARGE_FLOAT;
			
 
				+
			
 
				+	if (0 < hull->m_numVertices)
			
 
				+	{
			
 
				+		const b3Float4 scaled = supportVec;
			
 
				+		int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);
			
 
				+		return verticesA[hull->m_vertexOffset + index];
			
 
				+	}
			
 
				+
			
 
				+	return supVec;
			
 
				+}
			
 
				+
			
 
				+B3_STATIC void b3MprConvexSupport(int pairIndex, int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+								  b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
			
 
				+								  b3ConstArray(b3Collidable_t) cpuCollidables,
			
 
				+								  b3ConstArray(b3Float4) cpuVertices,
			
 
				+								  __global b3Float4 *sepAxis,
			
 
				+								  const b3Float4 *_dir, b3Float4 *outp, int logme)
			
 
				+{
			
 
				+	//dir is in worldspace, move to local space
			
 
				+
			
 
				+	b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;
			
 
				+	b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;
			
 
				+
			
 
				+	b3Float4 dir = b3MakeFloat4((*_dir).x, (*_dir).y, (*_dir).z, 0.f);
			
 
				+
			
 
				+	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn), dir);
			
 
				+
			
 
				+	//find local support vertex
			
 
				+	int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;
			
 
				+
			
 
				+	b3Assert(cpuCollidables[colIndex].m_shapeType == SHAPE_CONVEX_HULL);
			
 
				+	__global const b3ConvexPolyhedronData_t *hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];
			
 
				+
			
 
				+	b3Float4 pInA;
			
 
				+	if (logme)
			
 
				+	{
			
 
				+		//	b3Float4 supVec = b3MakeFloat4(0,0,0,0);
			
 
				+		float maxDot = -B3_LARGE_FLOAT;
			
 
				+
			
 
				+		if (0 < hull->m_numVertices)
			
 
				+		{
			
 
				+			const b3Float4 scaled = localDir;
			
 
				+			int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);
			
 
				+			pInA = cpuVertices[hull->m_vertexOffset + index];
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		pInA = b3LocalGetSupportVertex(localDir, hull, cpuVertices);
			
 
				+	}
			
 
				+
			
 
				+	//move vertex to world space
			
 
				+	*outp = b3TransformPoint(pInA, pos, orn);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSupport(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+						 b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
			
 
				+						 b3ConstArray(b3Collidable_t) cpuCollidables,
			
 
				+						 b3ConstArray(b3Float4) cpuVertices,
			
 
				+						 __global b3Float4 *sepAxis,
			
 
				+						 const b3Float4 *_dir, b3MprSupport_t *supp)
			
 
				+{
			
 
				+	b3Float4 dir;
			
 
				+	dir = *_dir;
			
 
				+	b3MprConvexSupport(pairIndex, bodyIndexA, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &supp->v1, 0);
			
 
				+	dir = *_dir * -1.f;
			
 
				+	b3MprConvexSupport(pairIndex, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &supp->v2, 0);
			
 
				+	supp->v = supp->v1 - supp->v2;
			
 
				+}
			
 
				+
			
 
				+inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)
			
 
				+{
			
 
				+	center->v1 = cpuBodyBuf[bodyIndexA].m_pos;
			
 
				+	center->v2 = cpuBodyBuf[bodyIndexB].m_pos;
			
 
				+	center->v = center->v1 - center->v2;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)
			
 
				+{
			
 
				+	(*v).x = x;
			
 
				+	(*v).y = y;
			
 
				+	(*v).z = z;
			
 
				+	(*v).w = 0.f;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)
			
 
				+{
			
 
				+	(*v).x += (*w).x;
			
 
				+	(*v).y += (*w).y;
			
 
				+	(*v).z += (*w).z;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)
			
 
				+{
			
 
				+	*v = *w;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Scale(b3Float4 *d, float k)
			
 
				+{
			
 
				+	*d *= k;
			
 
				+}
			
 
				+
			
 
				+inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+	float dot;
			
 
				+
			
 
				+	dot = b3Dot3F4(*a, *b);
			
 
				+	return dot;
			
 
				+}
			
 
				+
			
 
				+inline float b3MprVec3Len2(const b3Float4 *v)
			
 
				+{
			
 
				+	return b3MprVec3Dot(v, v);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Normalize(b3Float4 *d)
			
 
				+{
			
 
				+	float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));
			
 
				+	b3MprVec3Scale(d, k);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+	*d = b3Cross3(*a, *b);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)
			
 
				+{
			
 
				+	*d = *v - *w;
			
 
				+}
			
 
				+
			
 
				+inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)
			
 
				+{
			
 
				+	b3Float4 v2v1, v3v1;
			
 
				+
			
 
				+	b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+				  &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+				  &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	b3MprVec3Cross(dir, &v2v1, &v3v1);
			
 
				+	b3MprVec3Normalize(dir);
			
 
				+}
			
 
				+
			
 
				+inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,
			
 
				+								  const b3Float4 *dir)
			
 
				+{
			
 
				+	float dot;
			
 
				+	dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	return b3MprIsZero(dot) || dot > 0.f;
			
 
				+}
			
 
				+
			
 
				+inline int portalReachTolerance(const b3MprSimplex_t *portal,
			
 
				+								const b3MprSupport_t *v4,
			
 
				+								const b3Float4 *dir)
			
 
				+{
			
 
				+	float dv1, dv2, dv3, dv4;
			
 
				+	float dot1, dot2, dot3;
			
 
				+
			
 
				+	// find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}
			
 
				+
			
 
				+	dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);
			
 
				+	dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);
			
 
				+	dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);
			
 
				+	dv4 = b3MprVec3Dot(&v4->v, dir);
			
 
				+
			
 
				+	dot1 = dv4 - dv1;
			
 
				+	dot2 = dv4 - dv2;
			
 
				+	dot3 = dv4 - dv3;
			
 
				+
			
 
				+	dot1 = B3_MPR_FMIN(dot1, dot2);
			
 
				+	dot1 = B3_MPR_FMIN(dot1, dot3);
			
 
				+
			
 
				+	return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;
			
 
				+}
			
 
				+
			
 
				+inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,
			
 
				+									const b3MprSupport_t *v4,
			
 
				+									const b3Float4 *dir)
			
 
				+{
			
 
				+	float dot;
			
 
				+	dot = b3MprVec3Dot(&v4->v, dir);
			
 
				+	return b3MprIsZero(dot) || dot > 0.f;
			
 
				+}
			
 
				+
			
 
				+inline void b3ExpandPortal(b3MprSimplex_t *portal,
			
 
				+						   const b3MprSupport_t *v4)
			
 
				+{
			
 
				+	float dot;
			
 
				+	b3Float4 v4v0;
			
 
				+
			
 
				+	b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+	dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);
			
 
				+	if (dot > 0.f)
			
 
				+	{
			
 
				+		dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);
			
 
				+		if (dot > 0.f)
			
 
				+		{
			
 
				+			b3MprSimplexSet(portal, 1, v4);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			b3MprSimplexSet(portal, 3, v4);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);
			
 
				+		if (dot > 0.f)
			
 
				+		{
			
 
				+			b3MprSimplexSet(portal, 2, v4);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			b3MprSimplexSet(portal, 1, v4);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+							   b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
			
 
				+							   b3ConstArray(b3Collidable_t) cpuCollidables,
			
 
				+							   b3ConstArray(b3Float4) cpuVertices,
			
 
				+							   __global b3Float4 *sepAxis,
			
 
				+							   __global int *hasSepAxis,
			
 
				+							   b3MprSimplex_t *portal)
			
 
				+{
			
 
				+	b3Float4 dir, va, vb;
			
 
				+	float dot;
			
 
				+	int cont;
			
 
				+
			
 
				+	// vertex 0 is center of portal
			
 
				+	b3FindOrigin(bodyIndexA, bodyIndexB, cpuBodyBuf, b3MprSimplexPointW(portal, 0));
			
 
				+	// vertex 0 is center of portal
			
 
				+	b3MprSimplexSetSize(portal, 1);
			
 
				+
			
 
				+	b3Float4 zero = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	b3Float4 *b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+	if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin))
			
 
				+	{
			
 
				+		// Portal's center lies on origin (0,0,0) => we know that objects
			
 
				+		// intersect but we would need to know penetration info.
			
 
				+		// So move center little bit...
			
 
				+		b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);
			
 
				+		b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);
			
 
				+	}
			
 
				+
			
 
				+	// vertex 1 = support in direction of origin
			
 
				+	b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+	b3MprVec3Scale(&dir, -1.f);
			
 
				+	b3MprVec3Normalize(&dir);
			
 
				+
			
 
				+	b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, b3MprSimplexPointW(portal, 1));
			
 
				+
			
 
				+	b3MprSimplexSetSize(portal, 2);
			
 
				+
			
 
				+	// test if origin isn't outside of v1
			
 
				+	dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);
			
 
				+
			
 
				+	if (b3MprIsZero(dot) || dot < 0.f)
			
 
				+		return -1;
			
 
				+
			
 
				+	// vertex 2
			
 
				+	b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,
			
 
				+				   &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	if (b3MprIsZero(b3MprVec3Len2(&dir)))
			
 
				+	{
			
 
				+		if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin))
			
 
				+		{
			
 
				+			// origin lies on v1
			
 
				+			return 1;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// origin lies on v0-v1 segment
			
 
				+			return 2;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	b3MprVec3Normalize(&dir);
			
 
				+	b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, b3MprSimplexPointW(portal, 2));
			
 
				+
			
 
				+	dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);
			
 
				+	if (b3MprIsZero(dot) || dot < 0.f)
			
 
				+		return -1;
			
 
				+
			
 
				+	b3MprSimplexSetSize(portal, 3);
			
 
				+
			
 
				+	// vertex 3 direction
			
 
				+	b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+				  &b3MprSimplexPoint(portal, 0)->v);
			
 
				+	b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+				  &b3MprSimplexPoint(portal, 0)->v);
			
 
				+	b3MprVec3Cross(&dir, &va, &vb);
			
 
				+	b3MprVec3Normalize(&dir);
			
 
				+
			
 
				+	// it is better to form portal faces to be oriented "outside" origin
			
 
				+	dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+	if (dot > 0.f)
			
 
				+	{
			
 
				+		b3MprSimplexSwap(portal, 1, 2);
			
 
				+		b3MprVec3Scale(&dir, -1.f);
			
 
				+	}
			
 
				+
			
 
				+	while (b3MprSimplexSize(portal) < 4)
			
 
				+	{
			
 
				+		b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, b3MprSimplexPointW(portal, 3));
			
 
				+
			
 
				+		dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);
			
 
				+		if (b3MprIsZero(dot) || dot < 0.f)
			
 
				+			return -1;
			
 
				+
			
 
				+		cont = 0;
			
 
				+
			
 
				+		// test if origin is outside (v1, v0, v3) - set v2 as v3 and
			
 
				+		// continue
			
 
				+		b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+					   &b3MprSimplexPoint(portal, 3)->v);
			
 
				+		dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+		if (dot < 0.f && !b3MprIsZero(dot))
			
 
				+		{
			
 
				+			b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));
			
 
				+			cont = 1;
			
 
				+		}
			
 
				+
			
 
				+		if (!cont)
			
 
				+		{
			
 
				+			// test if origin is outside (v3, v0, v2) - set v1 as v3 and
			
 
				+			// continue
			
 
				+			b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+						   &b3MprSimplexPoint(portal, 2)->v);
			
 
				+			dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+			if (dot < 0.f && !b3MprIsZero(dot))
			
 
				+			{
			
 
				+				b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));
			
 
				+				cont = 1;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (cont)
			
 
				+		{
			
 
				+			b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+						  &b3MprSimplexPoint(portal, 0)->v);
			
 
				+			b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+						  &b3MprSimplexPoint(portal, 0)->v);
			
 
				+			b3MprVec3Cross(&dir, &va, &vb);
			
 
				+			b3MprVec3Normalize(&dir);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			b3MprSimplexSetSize(portal, 4);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+B3_STATIC int b3RefinePortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+							 b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
			
 
				+							 b3ConstArray(b3Collidable_t) cpuCollidables,
			
 
				+							 b3ConstArray(b3Float4) cpuVertices,
			
 
				+							 __global b3Float4 *sepAxis,
			
 
				+							 b3MprSimplex_t *portal)
			
 
				+{
			
 
				+	b3Float4 dir;
			
 
				+	b3MprSupport_t v4;
			
 
				+
			
 
				+	for (int i = 0; i < B3_MPR_MAX_ITERATIONS; i++)
			
 
				+	//while (1)
			
 
				+	{
			
 
				+		// compute direction outside the portal (from v0 throught v1,v2,v3
			
 
				+		// face)
			
 
				+		b3PortalDir(portal, &dir);
			
 
				+
			
 
				+		// test if origin is inside the portal
			
 
				+		if (portalEncapsulesOrigin(portal, &dir))
			
 
				+			return 0;
			
 
				+
			
 
				+		// get next support point
			
 
				+
			
 
				+		b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &v4);
			
 
				+
			
 
				+		// test if v4 can expand portal to contain origin and if portal
			
 
				+		// expanding doesn't reach given tolerance
			
 
				+		if (!portalCanEncapsuleOrigin(portal, &v4, &dir) || portalReachTolerance(portal, &v4, &dir))
			
 
				+		{
			
 
				+			return -1;
			
 
				+		}
			
 
				+
			
 
				+		// v1-v2-v3 triangle must be rearranged to face outside Minkowski
			
 
				+		// difference (direction from v0).
			
 
				+		b3ExpandPortal(portal, &v4);
			
 
				+	}
			
 
				+
			
 
				+	return -1;
			
 
				+}
			
 
				+
			
 
				+B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)
			
 
				+{
			
 
				+	b3Float4 zero = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	b3Float4 *b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+	b3Float4 dir;
			
 
				+	size_t i;
			
 
				+	float b[4], sum, inv;
			
 
				+	b3Float4 vec, p1, p2;
			
 
				+
			
 
				+	b3PortalDir(portal, &dir);
			
 
				+
			
 
				+	// use barycentric coordinates of tetrahedron to find origin
			
 
				+	b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+				   &b3MprSimplexPoint(portal, 2)->v);
			
 
				+	b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);
			
 
				+
			
 
				+	b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+				   &b3MprSimplexPoint(portal, 2)->v);
			
 
				+	b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+
			
 
				+	b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,
			
 
				+				   &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);
			
 
				+
			
 
				+	b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+				   &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+
			
 
				+	sum = b[0] + b[1] + b[2] + b[3];
			
 
				+
			
 
				+	if (b3MprIsZero(sum) || sum < 0.f)
			
 
				+	{
			
 
				+		b[0] = 0.f;
			
 
				+
			
 
				+		b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+					   &b3MprSimplexPoint(portal, 3)->v);
			
 
				+		b[1] = b3MprVec3Dot(&vec, &dir);
			
 
				+		b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+					   &b3MprSimplexPoint(portal, 1)->v);
			
 
				+		b[2] = b3MprVec3Dot(&vec, &dir);
			
 
				+		b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+					   &b3MprSimplexPoint(portal, 2)->v);
			
 
				+		b[3] = b3MprVec3Dot(&vec, &dir);
			
 
				+
			
 
				+		sum = b[1] + b[2] + b[3];
			
 
				+	}
			
 
				+
			
 
				+	inv = 1.f / sum;
			
 
				+
			
 
				+	b3MprVec3Copy(&p1, b3mpr_vec3_origin);
			
 
				+	b3MprVec3Copy(&p2, b3mpr_vec3_origin);
			
 
				+	for (i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);
			
 
				+		b3MprVec3Scale(&vec, b[i]);
			
 
				+		b3MprVec3Add(&p1, &vec);
			
 
				+
			
 
				+		b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);
			
 
				+		b3MprVec3Scale(&vec, b[i]);
			
 
				+		b3MprVec3Add(&p2, &vec);
			
 
				+	}
			
 
				+	b3MprVec3Scale(&p1, inv);
			
 
				+	b3MprVec3Scale(&p2, inv);
			
 
				+
			
 
				+	b3MprVec3Copy(pos, &p1);
			
 
				+	b3MprVec3Add(pos, &p2);
			
 
				+	b3MprVec3Scale(pos, 0.5);
			
 
				+}
			
 
				+
			
 
				+inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+	b3Float4 ab;
			
 
				+	b3MprVec3Sub2(&ab, a, b);
			
 
				+	return b3MprVec3Len2(&ab);
			
 
				+}
			
 
				+
			
 
				+inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,
			
 
				+										 const b3Float4 *x0,
			
 
				+										 const b3Float4 *b,
			
 
				+										 b3Float4 *witness)
			
 
				+{
			
 
				+	// The computation comes from solving equation of segment:
			
 
				+	//      S(t) = x0 + t.d
			
 
				+	//          where - x0 is initial point of segment
			
 
				+	//                - d is direction of segment from x0 (|d| > 0)
			
 
				+	//                - t belongs to <0, 1> interval
			
 
				+	//
			
 
				+	// Than, distance from a segment to some point P can be expressed:
			
 
				+	//      D(t) = |x0 + t.d - P|^2
			
 
				+	//          which is distance from any point on segment. Minimization
			
 
				+	//          of this function brings distance from P to segment.
			
 
				+	// Minimization of D(t) leads to simple quadratic equation that's
			
 
				+	// solving is straightforward.
			
 
				+	//
			
 
				+	// Bonus of this method is witness point for free.
			
 
				+
			
 
				+	float dist, t;
			
 
				+	b3Float4 d, a;
			
 
				+
			
 
				+	// direction of segment
			
 
				+	b3MprVec3Sub2(&d, b, x0);
			
 
				+
			
 
				+	// precompute vector from P to x0
			
 
				+	b3MprVec3Sub2(&a, x0, P);
			
 
				+
			
 
				+	t = -1.f * b3MprVec3Dot(&a, &d);
			
 
				+	t /= b3MprVec3Len2(&d);
			
 
				+
			
 
				+	if (t < 0.f || b3MprIsZero(t))
			
 
				+	{
			
 
				+		dist = b3MprVec3Dist2(x0, P);
			
 
				+		if (witness)
			
 
				+			b3MprVec3Copy(witness, x0);
			
 
				+	}
			
 
				+	else if (t > 1.f || b3MprEq(t, 1.f))
			
 
				+	{
			
 
				+		dist = b3MprVec3Dist2(b, P);
			
 
				+		if (witness)
			
 
				+			b3MprVec3Copy(witness, b);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		if (witness)
			
 
				+		{
			
 
				+			b3MprVec3Copy(witness, &d);
			
 
				+			b3MprVec3Scale(witness, t);
			
 
				+			b3MprVec3Add(witness, x0);
			
 
				+			dist = b3MprVec3Dist2(witness, P);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// recycling variables
			
 
				+			b3MprVec3Scale(&d, t);
			
 
				+			b3MprVec3Add(&d, &a);
			
 
				+			dist = b3MprVec3Len2(&d);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return dist;
			
 
				+}
			
 
				+
			
 
				+inline float b3MprVec3PointTriDist2(const b3Float4 *P,
			
 
				+									const b3Float4 *x0, const b3Float4 *B,
			
 
				+									const b3Float4 *C,
			
 
				+									b3Float4 *witness)
			
 
				+{
			
 
				+	// Computation comes from analytic expression for triangle (x0, B, C)
			
 
				+	//      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and
			
 
				+	// Then equation for distance is:
			
 
				+	//      D(s, t) = | T(s, t) - P |^2
			
 
				+	// This leads to minimization of quadratic function of two variables.
			
 
				+	// The solution from is taken only if s is between 0 and 1, t is
			
 
				+	// between 0 and 1 and t + s < 1, otherwise distance from segment is
			
 
				+	// computed.
			
 
				+
			
 
				+	b3Float4 d1, d2, a;
			
 
				+	float u, v, w, p, q, r;
			
 
				+	float s, t, dist, dist2;
			
 
				+	b3Float4 witness2;
			
 
				+
			
 
				+	b3MprVec3Sub2(&d1, B, x0);
			
 
				+	b3MprVec3Sub2(&d2, C, x0);
			
 
				+	b3MprVec3Sub2(&a, x0, P);
			
 
				+
			
 
				+	u = b3MprVec3Dot(&a, &a);
			
 
				+	v = b3MprVec3Dot(&d1, &d1);
			
 
				+	w = b3MprVec3Dot(&d2, &d2);
			
 
				+	p = b3MprVec3Dot(&a, &d1);
			
 
				+	q = b3MprVec3Dot(&a, &d2);
			
 
				+	r = b3MprVec3Dot(&d1, &d2);
			
 
				+
			
 
				+	s = (q * r - w * p) / (w * v - r * r);
			
 
				+	t = (-s * r - q) / w;
			
 
				+
			
 
				+	if ((b3MprIsZero(s) || s > 0.f) && (b3MprEq(s, 1.f) || s < 1.f) && (b3MprIsZero(t) || t > 0.f) && (b3MprEq(t, 1.f) || t < 1.f) && (b3MprEq(t + s, 1.f) || t + s < 1.f))
			
 
				+	{
			
 
				+		if (witness)
			
 
				+		{
			
 
				+			b3MprVec3Scale(&d1, s);
			
 
				+			b3MprVec3Scale(&d2, t);
			
 
				+			b3MprVec3Copy(witness, x0);
			
 
				+			b3MprVec3Add(witness, &d1);
			
 
				+			b3MprVec3Add(witness, &d2);
			
 
				+
			
 
				+			dist = b3MprVec3Dist2(witness, P);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			dist = s * s * v;
			
 
				+			dist += t * t * w;
			
 
				+			dist += 2.f * s * t * r;
			
 
				+			dist += 2.f * s * p;
			
 
				+			dist += 2.f * t * q;
			
 
				+			dist += u;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);
			
 
				+
			
 
				+		dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);
			
 
				+		if (dist2 < dist)
			
 
				+		{
			
 
				+			dist = dist2;
			
 
				+			if (witness)
			
 
				+				b3MprVec3Copy(witness, &witness2);
			
 
				+		}
			
 
				+
			
 
				+		dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);
			
 
				+		if (dist2 < dist)
			
 
				+		{
			
 
				+			dist = dist2;
			
 
				+			if (witness)
			
 
				+				b3MprVec3Copy(witness, &witness2);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return dist;
			
 
				+}
			
 
				+
			
 
				+B3_STATIC void b3FindPenetr(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+							b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
			
 
				+							b3ConstArray(b3Collidable_t) cpuCollidables,
			
 
				+							b3ConstArray(b3Float4) cpuVertices,
			
 
				+							__global b3Float4 *sepAxis,
			
 
				+							b3MprSimplex_t *portal,
			
 
				+							float *depth, b3Float4 *pdir, b3Float4 *pos)
			
 
				+{
			
 
				+	b3Float4 dir;
			
 
				+	b3MprSupport_t v4;
			
 
				+	unsigned long iterations;
			
 
				+
			
 
				+	b3Float4 zero = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	b3Float4 *b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+	iterations = 1UL;
			
 
				+	for (int i = 0; i < B3_MPR_MAX_ITERATIONS; i++)
			
 
				+	//while (1)
			
 
				+	{
			
 
				+		// compute portal direction and obtain next support point
			
 
				+		b3PortalDir(portal, &dir);
			
 
				+
			
 
				+		b3MprSupport(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &dir, &v4);
			
 
				+
			
 
				+		// reached tolerance -> find penetration info
			
 
				+		if (portalReachTolerance(portal, &v4, &dir) || iterations == B3_MPR_MAX_ITERATIONS)
			
 
				+		{
			
 
				+			*depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin, &b3MprSimplexPoint(portal, 1)->v, &b3MprSimplexPoint(portal, 2)->v, &b3MprSimplexPoint(portal, 3)->v, pdir);
			
 
				+			*depth = B3_MPR_SQRT(*depth);
			
 
				+
			
 
				+			if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))
			
 
				+			{
			
 
				+				*pdir = dir;
			
 
				+			}
			
 
				+			b3MprVec3Normalize(pdir);
			
 
				+
			
 
				+			// barycentric coordinates:
			
 
				+			b3FindPos(portal, pos);
			
 
				+
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		b3ExpandPortal(portal, &v4);
			
 
				+
			
 
				+		iterations++;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal, float *depth, b3Float4 *dir, b3Float4 *pos)
			
 
				+{
			
 
				+	// Touching contact on portal's v1 - so depth is zero and direction
			
 
				+	// is unimportant and pos can be guessed
			
 
				+	*depth = 0.f;
			
 
				+	b3Float4 zero = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	b3Float4 *b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+	b3MprVec3Copy(dir, b3mpr_vec3_origin);
			
 
				+
			
 
				+	b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);
			
 
				+	b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);
			
 
				+	b3MprVec3Scale(pos, 0.5);
			
 
				+}
			
 
				+
			
 
				+B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,
			
 
				+								   float *depth, b3Float4 *dir, b3Float4 *pos)
			
 
				+{
			
 
				+	// Origin lies on v0-v1 segment.
			
 
				+	// Depth is distance to v1, direction also and position must be
			
 
				+	// computed
			
 
				+
			
 
				+	b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);
			
 
				+	b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);
			
 
				+	b3MprVec3Scale(pos, 0.5f);
			
 
				+
			
 
				+	b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);
			
 
				+	*depth = B3_MPR_SQRT(b3MprVec3Len2(dir));
			
 
				+	b3MprVec3Normalize(dir);
			
 
				+}
			
 
				+
			
 
				+inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,
			
 
				+							b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+							b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
			
 
				+							b3ConstArray(b3Collidable_t) cpuCollidables,
			
 
				+							b3ConstArray(b3Float4) cpuVertices,
			
 
				+							__global b3Float4 *sepAxis,
			
 
				+							__global int *hasSepAxis,
			
 
				+							float *depthOut, b3Float4 *dirOut, b3Float4 *posOut)
			
 
				+{
			
 
				+	b3MprSimplex_t portal;
			
 
				+
			
 
				+	//	if (!hasSepAxis[pairIndex])
			
 
				+	//	return -1;
			
 
				+
			
 
				+	hasSepAxis[pairIndex] = 0;
			
 
				+	int res;
			
 
				+
			
 
				+	// Phase 1: Portal discovery
			
 
				+	res = b3DiscoverPortal(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, hasSepAxis, &portal);
			
 
				+
			
 
				+	//sepAxis[pairIndex] = *pdir;//or -dir?
			
 
				+
			
 
				+	switch (res)
			
 
				+	{
			
 
				+		case 0:
			
 
				+		{
			
 
				+			// Phase 2: Portal refinement
			
 
				+
			
 
				+			res = b3RefinePortal(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &portal);
			
 
				+			if (res < 0)
			
 
				+				return -1;
			
 
				+
			
 
				+			// Phase 3. Penetration info
			
 
				+			b3FindPenetr(pairIndex, bodyIndexA, bodyIndexB, cpuBodyBuf, cpuConvexData, cpuCollidables, cpuVertices, sepAxis, &portal, depthOut, dirOut, posOut);
			
 
				+			hasSepAxis[pairIndex] = 1;
			
 
				+			sepAxis[pairIndex] = -*dirOut;
			
 
				+			break;
			
 
				+		}
			
 
				+		case 1:
			
 
				+		{
			
 
				+			// Touching contact on portal's v1.
			
 
				+			b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);
			
 
				+			break;
			
 
				+		}
			
 
				+		case 2:
			
 
				+		{
			
 
				+			b3FindPenetrSegment(&portal, depthOut, dirOut, posOut);
			
 
				+			break;
			
 
				+		}
			
 
				+		default:
			
 
				+		{
			
 
				+			hasSepAxis[pairIndex] = 0;
			
 
				+			//if (res < 0)
			
 
				+			//{
			
 
				+			// Origin isn't inside portal - no collision.
			
 
				+			return -1;
			
 
				+			//}
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	return 0;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_MPR_PENETRATION_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h
@@ -0,0 +1,175 @@
 
				+
			
 
				+#ifndef B3_NEW_CONTACT_REDUCTION_H
			
 
				+#define B3_NEW_CONTACT_REDUCTION_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+
			
 
				+#define GET_NPOINTS(x) (x).m_worldNormalOnB.w
			
 
				+
			
 
				+int b3ExtractManifoldSequentialGlobal(__global const b3Float4* p, int nPoints, b3Float4ConstArg nearNormal, b3Int4* contactIdx)
			
 
				+{
			
 
				+	if (nPoints == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (nPoints <= 4)
			
 
				+		return nPoints;
			
 
				+
			
 
				+	if (nPoints > 64)
			
 
				+		nPoints = 64;
			
 
				+
			
 
				+	b3Float4 center = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	{
			
 
				+		for (int i = 0; i < nPoints; i++)
			
 
				+			center += p[i];
			
 
				+		center /= (float)nPoints;
			
 
				+	}
			
 
				+
			
 
				+	//	sample 4 directions
			
 
				+
			
 
				+	b3Float4 aVector = p[0] - center;
			
 
				+	b3Float4 u = b3Cross(nearNormal, aVector);
			
 
				+	b3Float4 v = b3Cross(nearNormal, u);
			
 
				+	u = b3Normalized(u);
			
 
				+	v = b3Normalized(v);
			
 
				+
			
 
				+	//keep point with deepest penetration
			
 
				+	float minW = FLT_MAX;
			
 
				+
			
 
				+	int minIndex = -1;
			
 
				+
			
 
				+	b3Float4 maxDots;
			
 
				+	maxDots.x = FLT_MIN;
			
 
				+	maxDots.y = FLT_MIN;
			
 
				+	maxDots.z = FLT_MIN;
			
 
				+	maxDots.w = FLT_MIN;
			
 
				+
			
 
				+	//	idx, distance
			
 
				+	for (int ie = 0; ie < nPoints; ie++)
			
 
				+	{
			
 
				+		if (p[ie].w < minW)
			
 
				+		{
			
 
				+			minW = p[ie].w;
			
 
				+			minIndex = ie;
			
 
				+		}
			
 
				+		float f;
			
 
				+		b3Float4 r = p[ie] - center;
			
 
				+		f = b3Dot(u, r);
			
 
				+		if (f < maxDots.x)
			
 
				+		{
			
 
				+			maxDots.x = f;
			
 
				+			contactIdx[0].x = ie;
			
 
				+		}
			
 
				+
			
 
				+		f = b3Dot(-u, r);
			
 
				+		if (f < maxDots.y)
			
 
				+		{
			
 
				+			maxDots.y = f;
			
 
				+			contactIdx[0].y = ie;
			
 
				+		}
			
 
				+
			
 
				+		f = b3Dot(v, r);
			
 
				+		if (f < maxDots.z)
			
 
				+		{
			
 
				+			maxDots.z = f;
			
 
				+			contactIdx[0].z = ie;
			
 
				+		}
			
 
				+
			
 
				+		f = b3Dot(-v, r);
			
 
				+		if (f < maxDots.w)
			
 
				+		{
			
 
				+			maxDots.w = f;
			
 
				+			contactIdx[0].w = ie;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
			
 
				+	{
			
 
				+		//replace the first contact with minimum (todo: replace contact with least penetration)
			
 
				+		contactIdx[0].x = minIndex;
			
 
				+	}
			
 
				+
			
 
				+	return 4;
			
 
				+}
			
 
				+
			
 
				+__kernel void b3NewContactReductionKernel(__global b3Int4* pairs,
			
 
				+										  __global const b3RigidBodyData_t* rigidBodies,
			
 
				+										  __global const b3Float4* separatingNormals,
			
 
				+										  __global const int* hasSeparatingAxis,
			
 
				+										  __global struct b3Contact4Data* globalContactsOut,
			
 
				+										  __global b3Int4* clippingFaces,
			
 
				+										  __global b3Float4* worldVertsB2,
			
 
				+										  volatile __global int* nGlobalContactsOut,
			
 
				+										  int vertexFaceCapacity,
			
 
				+										  int contactCapacity,
			
 
				+										  int numPairs,
			
 
				+										  int pairIndex)
			
 
				+{
			
 
				+	//    int i = get_global_id(0);
			
 
				+	//int pairIndex = i;
			
 
				+	int i = pairIndex;
			
 
				+
			
 
				+	b3Int4 contactIdx;
			
 
				+	contactIdx = b3MakeInt4(0, 1, 2, 3);
			
 
				+
			
 
				+	if (i < numPairs)
			
 
				+	{
			
 
				+		if (hasSeparatingAxis[i])
			
 
				+		{
			
 
				+			int nPoints = clippingFaces[pairIndex].w;
			
 
				+
			
 
				+			if (nPoints > 0)
			
 
				+			{
			
 
				+				__global b3Float4* pointsIn = &worldVertsB2[pairIndex * vertexFaceCapacity];
			
 
				+				b3Float4 normal = -separatingNormals[i];
			
 
				+
			
 
				+				int nReducedContacts = b3ExtractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
			
 
				+
			
 
				+				int dstIdx;
			
 
				+				dstIdx = b3AtomicInc(nGlobalContactsOut);
			
 
				+
			
 
				+				//#if 0
			
 
				+				b3Assert(dstIdx < contactCapacity);
			
 
				+				if (dstIdx < contactCapacity)
			
 
				+				{
			
 
				+					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
			
 
				+					c->m_worldNormalOnB = -normal;
			
 
				+					c->m_restituitionCoeffCmp = (0.f * 0xffff);
			
 
				+					c->m_frictionCoeffCmp = (0.7f * 0xffff);
			
 
				+					c->m_batchIdx = pairIndex;
			
 
				+					int bodyA = pairs[pairIndex].x;
			
 
				+					int bodyB = pairs[pairIndex].y;
			
 
				+
			
 
				+					pairs[pairIndex].w = dstIdx;
			
 
				+
			
 
				+					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA;
			
 
				+					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB;
			
 
				+					c->m_childIndexA = -1;
			
 
				+					c->m_childIndexB = -1;
			
 
				+
			
 
				+					switch (nReducedContacts)
			
 
				+					{
			
 
				+						case 4:
			
 
				+							c->m_worldPosB[3] = pointsIn[contactIdx.w];
			
 
				+						case 3:
			
 
				+							c->m_worldPosB[2] = pointsIn[contactIdx.z];
			
 
				+						case 2:
			
 
				+							c->m_worldPosB[1] = pointsIn[contactIdx.y];
			
 
				+						case 1:
			
 
				+							c->m_worldPosB[0] = pointsIn[contactIdx.x];
			
 
				+						default:
			
 
				+						{
			
 
				+						}
			
 
				+					};
			
 
				+
			
 
				+					GET_NPOINTS(*c) = nReducedContacts;
			
 
				+				}
			
 
				+
			
 
				+				//#endif
			
 
				+
			
 
				+			}  //		if (numContactsOut>0)
			
 
				+		}      //		if (hasSeparatingAxis[i])
			
 
				+	}          //	if (i<numPairs)
			
 
				+}
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h
@@ -0,0 +1,88 @@
 
				+
			
 
				+
			
 
				+#ifndef B3_QUANTIZED_BVH_NODE_H
			
 
				+#define B3_QUANTIZED_BVH_NODE_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+#define B3_MAX_NUM_PARTS_IN_BITS 10
			
 
				+
			
 
				+///b3QuantizedBvhNodeData is a compressed aabb node, 16 bytes.
			
 
				+///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
			
 
				+typedef struct b3QuantizedBvhNodeData b3QuantizedBvhNodeData_t;
			
 
				+
			
 
				+struct b3QuantizedBvhNodeData
			
 
				+{
			
 
				+	//12 bytes
			
 
				+	unsigned short int m_quantizedAabbMin[3];
			
 
				+	unsigned short int m_quantizedAabbMax[3];
			
 
				+	//4 bytes
			
 
				+	int m_escapeIndexOrTriangleIndex;
			
 
				+};
			
 
				+
			
 
				+inline int b3GetTriangleIndex(const b3QuantizedBvhNodeData* rootNode)
			
 
				+{
			
 
				+	unsigned int x = 0;
			
 
				+	unsigned int y = (~(x & 0)) << (31 - B3_MAX_NUM_PARTS_IN_BITS);
			
 
				+	// Get only the lower bits where the triangle index is stored
			
 
				+	return (rootNode->m_escapeIndexOrTriangleIndex & ~(y));
			
 
				+}
			
 
				+
			
 
				+inline int b3IsLeaf(const b3QuantizedBvhNodeData* rootNode)
			
 
				+{
			
 
				+	//skipindex is negative (internal node), triangleindex >=0 (leafnode)
			
 
				+	return (rootNode->m_escapeIndexOrTriangleIndex >= 0) ? 1 : 0;
			
 
				+}
			
 
				+
			
 
				+inline int b3GetEscapeIndex(const b3QuantizedBvhNodeData* rootNode)
			
 
				+{
			
 
				+	return -rootNode->m_escapeIndexOrTriangleIndex;
			
 
				+}
			
 
				+
			
 
				+inline void b3QuantizeWithClamp(unsigned short* out, b3Float4ConstArg point2, int isMax, b3Float4ConstArg bvhAabbMin, b3Float4ConstArg bvhAabbMax, b3Float4ConstArg bvhQuantization)
			
 
				+{
			
 
				+	b3Float4 clampedPoint = b3MaxFloat4(point2, bvhAabbMin);
			
 
				+	clampedPoint = b3MinFloat4(clampedPoint, bvhAabbMax);
			
 
				+
			
 
				+	b3Float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;
			
 
				+	if (isMax)
			
 
				+	{
			
 
				+		out[0] = (unsigned short)(((unsigned short)(v.x + 1.f) | 1));
			
 
				+		out[1] = (unsigned short)(((unsigned short)(v.y + 1.f) | 1));
			
 
				+		out[2] = (unsigned short)(((unsigned short)(v.z + 1.f) | 1));
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		out[0] = (unsigned short)(((unsigned short)(v.x) & 0xfffe));
			
 
				+		out[1] = (unsigned short)(((unsigned short)(v.y) & 0xfffe));
			
 
				+		out[2] = (unsigned short)(((unsigned short)(v.z) & 0xfffe));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inline int b3TestQuantizedAabbAgainstQuantizedAabbSlow(
			
 
				+	const unsigned short int* aabbMin1,
			
 
				+	const unsigned short int* aabbMax1,
			
 
				+	const unsigned short int* aabbMin2,
			
 
				+	const unsigned short int* aabbMax2)
			
 
				+{
			
 
				+	//int overlap = 1;
			
 
				+	if (aabbMin1[0] > aabbMax2[0])
			
 
				+		return 0;
			
 
				+	if (aabbMax1[0] < aabbMin2[0])
			
 
				+		return 0;
			
 
				+	if (aabbMin1[1] > aabbMax2[1])
			
 
				+		return 0;
			
 
				+	if (aabbMax1[1] < aabbMin2[1])
			
 
				+		return 0;
			
 
				+	if (aabbMin1[2] > aabbMax2[2])
			
 
				+		return 0;
			
 
				+	if (aabbMax1[2] < aabbMin2[2])
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+	//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;
			
 
				+	//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;
			
 
				+	//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;
			
 
				+	//return overlap;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_QUANTIZED_BVH_NODE_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h
@@ -0,0 +1,89 @@
 
				+#ifndef B3_REDUCE_CONTACTS_H
			
 
				+#define B3_REDUCE_CONTACTS_H
			
 
				+
			
 
				+inline int b3ReduceContacts(const b3Float4* p, int nPoints, const b3Float4& nearNormal, b3Int4* contactIdx)
			
 
				+{
			
 
				+	if (nPoints == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (nPoints <= 4)
			
 
				+		return nPoints;
			
 
				+
			
 
				+	if (nPoints > 64)
			
 
				+		nPoints = 64;
			
 
				+
			
 
				+	b3Float4 center = b3MakeFloat4(0, 0, 0, 0);
			
 
				+	{
			
 
				+		for (int i = 0; i < nPoints; i++)
			
 
				+			center += p[i];
			
 
				+		center /= (float)nPoints;
			
 
				+	}
			
 
				+
			
 
				+	//	sample 4 directions
			
 
				+
			
 
				+	b3Float4 aVector = p[0] - center;
			
 
				+	b3Float4 u = b3Cross3(nearNormal, aVector);
			
 
				+	b3Float4 v = b3Cross3(nearNormal, u);
			
 
				+	u = b3FastNormalized3(u);
			
 
				+	v = b3FastNormalized3(v);
			
 
				+
			
 
				+	//keep point with deepest penetration
			
 
				+	float minW = FLT_MAX;
			
 
				+
			
 
				+	int minIndex = -1;
			
 
				+
			
 
				+	b3Float4 maxDots;
			
 
				+	maxDots.x = FLT_MIN;
			
 
				+	maxDots.y = FLT_MIN;
			
 
				+	maxDots.z = FLT_MIN;
			
 
				+	maxDots.w = FLT_MIN;
			
 
				+
			
 
				+	//	idx, distance
			
 
				+	for (int ie = 0; ie < nPoints; ie++)
			
 
				+	{
			
 
				+		if (p[ie].w < minW)
			
 
				+		{
			
 
				+			minW = p[ie].w;
			
 
				+			minIndex = ie;
			
 
				+		}
			
 
				+		float f;
			
 
				+		b3Float4 r = p[ie] - center;
			
 
				+		f = b3Dot3F4(u, r);
			
 
				+		if (f < maxDots.x)
			
 
				+		{
			
 
				+			maxDots.x = f;
			
 
				+			contactIdx[0].x = ie;
			
 
				+		}
			
 
				+
			
 
				+		f = b3Dot3F4(-u, r);
			
 
				+		if (f < maxDots.y)
			
 
				+		{
			
 
				+			maxDots.y = f;
			
 
				+			contactIdx[0].y = ie;
			
 
				+		}
			
 
				+
			
 
				+		f = b3Dot3F4(v, r);
			
 
				+		if (f < maxDots.z)
			
 
				+		{
			
 
				+			maxDots.z = f;
			
 
				+			contactIdx[0].z = ie;
			
 
				+		}
			
 
				+
			
 
				+		f = b3Dot3F4(-v, r);
			
 
				+		if (f < maxDots.w)
			
 
				+		{
			
 
				+			maxDots.w = f;
			
 
				+			contactIdx[0].w = ie;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
			
 
				+	{
			
 
				+		//replace the first contact with minimum (todo: replace contact with least penetration)
			
 
				+		contactIdx[0].x = minIndex;
			
 
				+	}
			
 
				+
			
 
				+	return 4;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_REDUCE_CONTACTS_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h
@@ -0,0 +1,31 @@
 
				+#ifndef B3_RIGIDBODY_DATA_H
			
 
				+#define B3_RIGIDBODY_DATA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+#include "Bullet3Common/shared/b3Mat3x3.h"
			
 
				+
			
 
				+typedef struct b3RigidBodyData b3RigidBodyData_t;
			
 
				+
			
 
				+struct b3RigidBodyData
			
 
				+{
			
 
				+	b3Float4 m_pos;
			
 
				+	b3Quat m_quat;
			
 
				+	b3Float4 m_linVel;
			
 
				+	b3Float4 m_angVel;
			
 
				+
			
 
				+	int m_collidableIdx;
			
 
				+	float m_invMass;
			
 
				+	float m_restituitionCoeff;
			
 
				+	float m_frictionCoeff;
			
 
				+};
			
 
				+
			
 
				+typedef struct b3InertiaData b3InertiaData_t;
			
 
				+
			
 
				+struct b3InertiaData
			
 
				+{
			
 
				+	b3Mat3x3 m_invInertiaWorld;
			
 
				+	b3Mat3x3 m_initInvInertia;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_RIGIDBODY_DATA_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h
+++ b/Dependencies/include/bullet3/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h
@@ -0,0 +1,35 @@
 
				+#ifndef B3_UPDATE_AABBS_H
			
 
				+#define B3_UPDATE_AABBS_H
			
 
				+
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+void b3ComputeWorldAabb(int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)
			
 
				+{
			
 
				+	__global const b3RigidBodyData_t* body = &bodies[bodyId];
			
 
				+
			
 
				+	b3Float4 position = body->m_pos;
			
 
				+	b3Quat orientation = body->m_quat;
			
 
				+
			
 
				+	int collidableIndex = body->m_collidableIdx;
			
 
				+	int shapeIndex = collidables[collidableIndex].m_shapeIndex;
			
 
				+
			
 
				+	if (shapeIndex >= 0)
			
 
				+	{
			
 
				+		b3Aabb_t localAabb = localShapeAABB[collidableIndex];
			
 
				+		b3Aabb_t worldAabb;
			
 
				+
			
 
				+		b3Float4 aabbAMinOut, aabbAMaxOut;
			
 
				+		float margin = 0.f;
			
 
				+		b3TransformAabb2(localAabb.m_minVec, localAabb.m_maxVec, margin, position, orientation, &aabbAMinOut, &aabbAMaxOut);
			
 
				+
			
 
				+		worldAabb.m_minVec = aabbAMinOut;
			
 
				+		worldAabb.m_minIndices[3] = bodyId;
			
 
				+		worldAabb.m_maxVec = aabbAMaxOut;
			
 
				+		worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass == 0.f ? 0 : 1;
			
 
				+		worldAabbs[bodyId] = worldAabb;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_UPDATE_AABBS_H
			
--- a/Dependencies/include/bullet3/Bullet3Collision/premake4.lua
+++ b/Dependencies/include/bullet3/Bullet3Collision/premake4.lua
@@ -0,0 +1,16 @@
 
				+	project "Bullet3Collision"
			
 
				+
			
 
				+	language "C++"
			
 
				+				
			
 
				+	kind "StaticLib"
			
 
				+		
			
 
				+	includedirs {".."}
			
 
				+
			
 
				+    if os.is("Linux") then
			
 
				+        buildoptions{"-fPIC"}
			
 
				+    end
			
 
				+
			
 
				+	files {
			
 
				+		"**.cpp",
			
 
				+		"**.h"
			
 
				+	}
			
--- a/Dependencies/include/bullet3/Bullet3Common/CMakeLists.txt
+++ b/Dependencies/include/bullet3/Bullet3Common/CMakeLists.txt
@@ -0,0 +1,63 @@
 
				+
			
 
				+INCLUDE_DIRECTORIES(
			
 
				+	${BULLET_PHYSICS_SOURCE_DIR}/src
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Common_SRCS
			
 
				+	b3AlignedAllocator.cpp
			
 
				+	b3Vector3.cpp
			
 
				+	b3Logging.cpp
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Common_HDRS
			
 
				+	b3AlignedAllocator.h
			
 
				+	b3AlignedObjectArray.h
			
 
				+	b3CommandLineArgs.h
			
 
				+	b3HashMap.h
			
 
				+	b3Logging.h
			
 
				+	b3Matrix3x3.h
			
 
				+	b3MinMax.h
			
 
				+	b3PoolAllocator.h
			
 
				+	b3QuadWord.h
			
 
				+	b3Quaternion.h
			
 
				+	b3Random.h
			
 
				+	b3Scalar.h
			
 
				+	b3StackAlloc.h
			
 
				+	b3Transform.h
			
 
				+	b3TransformUtil.h
			
 
				+	b3Vector3.h
			
 
				+	shared/b3Float4.h
			
 
				+	shared/b3Int2.h
			
 
				+	shared/b3Int4.h
			
 
				+	shared/b3Mat3x3.h
			
 
				+	shared/b3PlatformDefinitions.h
			
 
				+	shared/b3Quat.h
			
 
				+)
			
 
				+
			
 
				+ADD_LIBRARY(Bullet3Common ${Bullet3Common_SRCS} ${Bullet3Common_HDRS})
			
 
				+SET_TARGET_PROPERTIES(Bullet3Common PROPERTIES VERSION ${BULLET_VERSION})
			
 
				+SET_TARGET_PROPERTIES(Bullet3Common PROPERTIES SOVERSION ${BULLET_VERSION})
			
 
				+
			
 
				+IF (INSTALL_LIBS)
			
 
				+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+		#FILES_MATCHING requires CMake 2.6
			
 
				+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Common DESTINATION .)
			
 
				+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Common
			
 
				+					RUNTIME DESTINATION bin
			
 
				+					LIBRARY DESTINATION lib${LIB_SUFFIX}
			
 
				+					ARCHIVE DESTINATION lib${LIB_SUFFIX})
			
 
				+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
			
 
				+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
			
 
				+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
			
 
				+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+
			
 
				+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Common PROPERTIES FRAMEWORK true)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Common PROPERTIES PUBLIC_HEADER "${Bullet3Common_HDRS}")
			
 
				+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+ENDIF (INSTALL_LIBS)
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3AlignedAllocator.cpp
+++ b/Dependencies/include/bullet3/Bullet3Common/b3AlignedAllocator.cpp
@@ -0,0 +1,186 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+#ifdef B3_ALLOCATOR_STATISTICS
			
 
				+int b3g_numAlignedAllocs = 0;
			
 
				+int b3g_numAlignedFree = 0;
			
 
				+int b3g_totalBytesAlignedAllocs = 0;  //detect memory leaks
			
 
				+#endif
			
 
				+
			
 
				+static void *b3AllocDefault(size_t size)
			
 
				+{
			
 
				+	return malloc(size);
			
 
				+}
			
 
				+
			
 
				+static void b3FreeDefault(void *ptr)
			
 
				+{
			
 
				+	free(ptr);
			
 
				+}
			
 
				+
			
 
				+static b3AllocFunc *b3s_allocFunc = b3AllocDefault;
			
 
				+static b3FreeFunc *b3s_freeFunc = b3FreeDefault;
			
 
				+
			
 
				+#if defined(B3_HAS_ALIGNED_ALLOCATOR)
			
 
				+#include <malloc.h>
			
 
				+static void *b3AlignedAllocDefault(size_t size, int alignment)
			
 
				+{
			
 
				+	return _aligned_malloc(size, (size_t)alignment);
			
 
				+}
			
 
				+
			
 
				+static void b3AlignedFreeDefault(void *ptr)
			
 
				+{
			
 
				+	_aligned_free(ptr);
			
 
				+}
			
 
				+#elif defined(__CELLOS_LV2__)
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+static inline void *b3AlignedAllocDefault(size_t size, int alignment)
			
 
				+{
			
 
				+	return memalign(alignment, size);
			
 
				+}
			
 
				+
			
 
				+static inline void b3AlignedFreeDefault(void *ptr)
			
 
				+{
			
 
				+	free(ptr);
			
 
				+}
			
 
				+#else
			
 
				+
			
 
				+static inline void *b3AlignedAllocDefault(size_t size, int alignment)
			
 
				+{
			
 
				+	void *ret;
			
 
				+	char *real;
			
 
				+	real = (char *)b3s_allocFunc(size + sizeof(void *) + (alignment - 1));
			
 
				+	if (real)
			
 
				+	{
			
 
				+		ret = b3AlignPointer(real + sizeof(void *), alignment);
			
 
				+		*((void **)(ret)-1) = (void *)(real);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		ret = (void *)(real);
			
 
				+	}
			
 
				+	return (ret);
			
 
				+}
			
 
				+
			
 
				+static inline void b3AlignedFreeDefault(void *ptr)
			
 
				+{
			
 
				+	void *real;
			
 
				+
			
 
				+	if (ptr)
			
 
				+	{
			
 
				+		real = *((void **)(ptr)-1);
			
 
				+		b3s_freeFunc(real);
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static b3AlignedAllocFunc *b3s_alignedAllocFunc = b3AlignedAllocDefault;
			
 
				+static b3AlignedFreeFunc *b3s_alignedFreeFunc = b3AlignedFreeDefault;
			
 
				+
			
 
				+void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc)
			
 
				+{
			
 
				+	b3s_alignedAllocFunc = allocFunc ? allocFunc : b3AlignedAllocDefault;
			
 
				+	b3s_alignedFreeFunc = freeFunc ? freeFunc : b3AlignedFreeDefault;
			
 
				+}
			
 
				+
			
 
				+void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc)
			
 
				+{
			
 
				+	b3s_allocFunc = allocFunc ? allocFunc : b3AllocDefault;
			
 
				+	b3s_freeFunc = freeFunc ? freeFunc : b3FreeDefault;
			
 
				+}
			
 
				+
			
 
				+#ifdef B3_DEBUG_MEMORY_ALLOCATIONS
			
 
				+//this generic allocator provides the total allocated number of bytes
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+void *b3AlignedAllocInternal(size_t size, int alignment, int line, char *filename)
			
 
				+{
			
 
				+	void *ret;
			
 
				+	char *real;
			
 
				+#ifdef B3_ALLOCATOR_STATISTICS
			
 
				+	b3g_totalBytesAlignedAllocs += size;
			
 
				+	b3g_numAlignedAllocs++;
			
 
				+#endif
			
 
				+	real = (char *)b3s_allocFunc(size + 2 * sizeof(void *) + (alignment - 1));
			
 
				+	if (real)
			
 
				+	{
			
 
				+		ret = (void *)b3AlignPointer(real + 2 * sizeof(void *), alignment);
			
 
				+		*((void **)(ret)-1) = (void *)(real);
			
 
				+		*((int *)(ret)-2) = size;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		ret = (void *)(real);  //??
			
 
				+	}
			
 
				+
			
 
				+	b3Printf("allocation#%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedAllocs, real, filename, line, size);
			
 
				+
			
 
				+	int *ptr = (int *)ret;
			
 
				+	*ptr = 12;
			
 
				+	return (ret);
			
 
				+}
			
 
				+
			
 
				+void b3AlignedFreeInternal(void *ptr, int line, char *filename)
			
 
				+{
			
 
				+	void *real;
			
 
				+#ifdef B3_ALLOCATOR_STATISTICS
			
 
				+	b3g_numAlignedFree++;
			
 
				+#endif
			
 
				+	if (ptr)
			
 
				+	{
			
 
				+		real = *((void **)(ptr)-1);
			
 
				+		int size = *((int *)(ptr)-2);
			
 
				+#ifdef B3_ALLOCATOR_STATISTICS
			
 
				+		b3g_totalBytesAlignedAllocs -= size;
			
 
				+#endif
			
 
				+		b3Printf("free #%d at address %x, from %s,line %d, size %d\n", b3g_numAlignedFree, real, filename, line, size);
			
 
				+
			
 
				+		b3s_freeFunc(real);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3Printf("NULL ptr\n");
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#else  //B3_DEBUG_MEMORY_ALLOCATIONS
			
 
				+
			
 
				+void *b3AlignedAllocInternal(size_t size, int alignment)
			
 
				+{
			
 
				+#ifdef B3_ALLOCATOR_STATISTICS
			
 
				+	b3g_numAlignedAllocs++;
			
 
				+#endif
			
 
				+	void *ptr;
			
 
				+	ptr = b3s_alignedAllocFunc(size, alignment);
			
 
				+	//	b3Printf("b3AlignedAllocInternal %d, %x\n",size,ptr);
			
 
				+	return ptr;
			
 
				+}
			
 
				+
			
 
				+void b3AlignedFreeInternal(void *ptr)
			
 
				+{
			
 
				+	if (!ptr)
			
 
				+	{
			
 
				+		return;
			
 
				+	}
			
 
				+#ifdef B3_ALLOCATOR_STATISTICS
			
 
				+	b3g_numAlignedFree++;
			
 
				+#endif
			
 
				+	//	b3Printf("b3AlignedFreeInternal %x\n",ptr);
			
 
				+	b3s_alignedFreeFunc(ptr);
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_DEBUG_MEMORY_ALLOCATIONS
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3AlignedAllocator.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3AlignedAllocator.h
@@ -0,0 +1,110 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_ALIGNED_ALLOCATOR
			
 
				+#define B3_ALIGNED_ALLOCATOR
			
 
				+
			
 
				+///we probably replace this with our own aligned memory allocator
			
 
				+///so we replace _aligned_malloc and _aligned_free with our own
			
 
				+///that is better portable and more predictable
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+//#define B3_DEBUG_MEMORY_ALLOCATIONS 1
			
 
				+#ifdef B3_DEBUG_MEMORY_ALLOCATIONS
			
 
				+
			
 
				+#define b3AlignedAlloc(a, b) \
			
 
				+	b3AlignedAllocInternal(a, b, __LINE__, __FILE__)
			
 
				+
			
 
				+#define b3AlignedFree(ptr) \
			
 
				+	b3AlignedFreeInternal(ptr, __LINE__, __FILE__)
			
 
				+
			
 
				+void* b3AlignedAllocInternal(size_t size, int alignment, int line, char* filename);
			
 
				+
			
 
				+void b3AlignedFreeInternal(void* ptr, int line, char* filename);
			
 
				+
			
 
				+#else
			
 
				+void* b3AlignedAllocInternal(size_t size, int alignment);
			
 
				+void b3AlignedFreeInternal(void* ptr);
			
 
				+
			
 
				+#define b3AlignedAlloc(size, alignment) b3AlignedAllocInternal(size, alignment)
			
 
				+#define b3AlignedFree(ptr) b3AlignedFreeInternal(ptr)
			
 
				+
			
 
				+#endif
			
 
				+typedef int btSizeType;
			
 
				+
			
 
				+typedef void*(b3AlignedAllocFunc)(size_t size, int alignment);
			
 
				+typedef void(b3AlignedFreeFunc)(void* memblock);
			
 
				+typedef void*(b3AllocFunc)(size_t size);
			
 
				+typedef void(b3FreeFunc)(void* memblock);
			
 
				+
			
 
				+///The developer can let all Bullet memory allocations go through a custom memory allocator, using b3AlignedAllocSetCustom
			
 
				+void b3AlignedAllocSetCustom(b3AllocFunc* allocFunc, b3FreeFunc* freeFunc);
			
 
				+///If the developer has already an custom aligned allocator, then b3AlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it.
			
 
				+void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc* allocFunc, b3AlignedFreeFunc* freeFunc);
			
 
				+
			
 
				+///The b3AlignedAllocator is a portable class for aligned memory allocations.
			
 
				+///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using b3AlignedAllocSetCustom and b3AlignedAllocSetCustomAligned.
			
 
				+template <typename T, unsigned Alignment>
			
 
				+class b3AlignedAllocator
			
 
				+{
			
 
				+	typedef b3AlignedAllocator<T, Alignment> self_type;
			
 
				+
			
 
				+public:
			
 
				+	//just going down a list:
			
 
				+	b3AlignedAllocator() {}
			
 
				+	/*
			
 
				+	b3AlignedAllocator( const self_type & ) {}
			
 
				+	*/
			
 
				+
			
 
				+	template <typename Other>
			
 
				+	b3AlignedAllocator(const b3AlignedAllocator<Other, Alignment>&)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	typedef const T* const_pointer;
			
 
				+	typedef const T& const_reference;
			
 
				+	typedef T* pointer;
			
 
				+	typedef T& reference;
			
 
				+	typedef T value_type;
			
 
				+
			
 
				+	pointer address(reference ref) const { return &ref; }
			
 
				+	const_pointer address(const_reference ref) const { return &ref; }
			
 
				+	pointer allocate(btSizeType n, const_pointer* hint = 0)
			
 
				+	{
			
 
				+		(void)hint;
			
 
				+		return reinterpret_cast<pointer>(b3AlignedAlloc(sizeof(value_type) * n, Alignment));
			
 
				+	}
			
 
				+	void construct(pointer ptr, const value_type& value) { new (ptr) value_type(value); }
			
 
				+	void deallocate(pointer ptr)
			
 
				+	{
			
 
				+		b3AlignedFree(reinterpret_cast<void*>(ptr));
			
 
				+	}
			
 
				+	void destroy(pointer ptr) { ptr->~value_type(); }
			
 
				+
			
 
				+	template <typename O>
			
 
				+	struct rebind
			
 
				+	{
			
 
				+		typedef b3AlignedAllocator<O, Alignment> other;
			
 
				+	};
			
 
				+	template <typename O>
			
 
				+	self_type& operator=(const b3AlignedAllocator<O, Alignment>&)
			
 
				+	{
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	friend bool operator==(const self_type&, const self_type&) { return true; }
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_ALIGNED_ALLOCATOR
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3AlignedObjectArray.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3AlignedObjectArray.h
@@ -0,0 +1,522 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_OBJECT_ARRAY__
			
 
				+#define B3_OBJECT_ARRAY__
			
 
				+
			
 
				+#include "b3Scalar.h"  // has definitions like B3_FORCE_INLINE
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+///If the platform doesn't support placement new, you can disable B3_USE_PLACEMENT_NEW
			
 
				+///then the b3AlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors
			
 
				+///You can enable B3_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator=
			
 
				+///see discussion here: https://bulletphysics.orgphpBB2/viewtopic.php?t=1231 and
			
 
				+///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240
			
 
				+
			
 
				+#define B3_USE_PLACEMENT_NEW 1
			
 
				+//#define B3_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise...
			
 
				+#define B3_ALLOW_ARRAY_COPY_OPERATOR  // enabling this can accidently perform deep copies of data if you are not careful
			
 
				+
			
 
				+#ifdef B3_USE_MEMCPY
			
 
				+#include <memory.h>
			
 
				+#include <string.h>
			
 
				+#endif  //B3_USE_MEMCPY
			
 
				+
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+#include <new>  //for placement new
			
 
				+#endif          //B3_USE_PLACEMENT_NEW
			
 
				+
			
 
				+///The b3AlignedObjectArray template class uses a subset of the stl::vector interface for its methods
			
 
				+///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data
			
 
				+template <typename T>
			
 
				+//template <class T>
			
 
				+class b3AlignedObjectArray
			
 
				+{
			
 
				+	b3AlignedAllocator<T, 16> m_allocator;
			
 
				+
			
 
				+	int m_size;
			
 
				+	int m_capacity;
			
 
				+	T* m_data;
			
 
				+	//PCK: added this line
			
 
				+	bool m_ownsMemory;
			
 
				+
			
 
				+#ifdef B3_ALLOW_ARRAY_COPY_OPERATOR
			
 
				+public:
			
 
				+	B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T>& other)
			
 
				+	{
			
 
				+		copyFromArray(other);
			
 
				+		return *this;
			
 
				+	}
			
 
				+#else   //B3_ALLOW_ARRAY_COPY_OPERATOR
			
 
				+private:
			
 
				+	B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T>& other);
			
 
				+#endif  //B3_ALLOW_ARRAY_COPY_OPERATOR
			
 
				+
			
 
				+protected:
			
 
				+	B3_FORCE_INLINE int allocSize(int size)
			
 
				+	{
			
 
				+		return (size ? size * 2 : 1);
			
 
				+	}
			
 
				+	B3_FORCE_INLINE void copy(int start, int end, T* dest) const
			
 
				+	{
			
 
				+		int i;
			
 
				+		for (i = start; i < end; ++i)
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+			new (&dest[i]) T(m_data[i]);
			
 
				+#else
			
 
				+			dest[i] = m_data[i];
			
 
				+#endif  //B3_USE_PLACEMENT_NEW
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void init()
			
 
				+	{
			
 
				+		//PCK: added this line
			
 
				+		m_ownsMemory = true;
			
 
				+		m_data = 0;
			
 
				+		m_size = 0;
			
 
				+		m_capacity = 0;
			
 
				+	}
			
 
				+	B3_FORCE_INLINE void destroy(int first, int last)
			
 
				+	{
			
 
				+		int i;
			
 
				+		for (i = first; i < last; i++)
			
 
				+		{
			
 
				+			m_data[i].~T();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void* allocate(int size)
			
 
				+	{
			
 
				+		if (size)
			
 
				+			return m_allocator.allocate(size);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void deallocate()
			
 
				+	{
			
 
				+		if (m_data)
			
 
				+		{
			
 
				+			//PCK: enclosed the deallocation in this block
			
 
				+			if (m_ownsMemory)
			
 
				+			{
			
 
				+				m_allocator.deallocate(m_data);
			
 
				+			}
			
 
				+			m_data = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+public:
			
 
				+	b3AlignedObjectArray()
			
 
				+	{
			
 
				+		init();
			
 
				+	}
			
 
				+
			
 
				+	~b3AlignedObjectArray()
			
 
				+	{
			
 
				+		clear();
			
 
				+	}
			
 
				+
			
 
				+	///Generally it is best to avoid using the copy constructor of an b3AlignedObjectArray, and use a (const) reference to the array instead.
			
 
				+	b3AlignedObjectArray(const b3AlignedObjectArray& otherArray)
			
 
				+	{
			
 
				+		init();
			
 
				+
			
 
				+		int otherSize = otherArray.size();
			
 
				+		resize(otherSize);
			
 
				+		otherArray.copy(0, otherSize, m_data);
			
 
				+	}
			
 
				+
			
 
				+	/// return the number of elements in the array
			
 
				+	B3_FORCE_INLINE int size() const
			
 
				+	{
			
 
				+		return m_size;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE const T& at(int n) const
			
 
				+	{
			
 
				+		b3Assert(n >= 0);
			
 
				+		b3Assert(n < size());
			
 
				+		return m_data[n];
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE T& at(int n)
			
 
				+	{
			
 
				+		b3Assert(n >= 0);
			
 
				+		b3Assert(n < size());
			
 
				+		return m_data[n];
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE const T& operator[](int n) const
			
 
				+	{
			
 
				+		b3Assert(n >= 0);
			
 
				+		b3Assert(n < size());
			
 
				+		return m_data[n];
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE T& operator[](int n)
			
 
				+	{
			
 
				+		b3Assert(n >= 0);
			
 
				+		b3Assert(n < size());
			
 
				+		return m_data[n];
			
 
				+	}
			
 
				+
			
 
				+	///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations.
			
 
				+	B3_FORCE_INLINE void clear()
			
 
				+	{
			
 
				+		destroy(0, size());
			
 
				+
			
 
				+		deallocate();
			
 
				+
			
 
				+		init();
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void pop_back()
			
 
				+	{
			
 
				+		b3Assert(m_size > 0);
			
 
				+		m_size--;
			
 
				+		m_data[m_size].~T();
			
 
				+	}
			
 
				+
			
 
				+	///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument.
			
 
				+	///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations.
			
 
				+	B3_FORCE_INLINE void resizeNoInitialize(int newsize)
			
 
				+	{
			
 
				+		int curSize = size();
			
 
				+
			
 
				+		if (newsize < curSize)
			
 
				+		{
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (newsize > size())
			
 
				+			{
			
 
				+				reserve(newsize);
			
 
				+			}
			
 
				+			//leave this uninitialized
			
 
				+		}
			
 
				+		m_size = newsize;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void resize(int newsize, const T& fillData = T())
			
 
				+	{
			
 
				+		int curSize = size();
			
 
				+
			
 
				+		if (newsize < curSize)
			
 
				+		{
			
 
				+			for (int i = newsize; i < curSize; i++)
			
 
				+			{
			
 
				+				m_data[i].~T();
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (newsize > size())
			
 
				+			{
			
 
				+				reserve(newsize);
			
 
				+			}
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+			for (int i = curSize; i < newsize; i++)
			
 
				+			{
			
 
				+				new (&m_data[i]) T(fillData);
			
 
				+			}
			
 
				+#endif  //B3_USE_PLACEMENT_NEW
			
 
				+		}
			
 
				+
			
 
				+		m_size = newsize;
			
 
				+	}
			
 
				+	B3_FORCE_INLINE T& expandNonInitializing()
			
 
				+	{
			
 
				+		int sz = size();
			
 
				+		if (sz == capacity())
			
 
				+		{
			
 
				+			reserve(allocSize(size()));
			
 
				+		}
			
 
				+		m_size++;
			
 
				+
			
 
				+		return m_data[sz];
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE T& expand(const T& fillValue = T())
			
 
				+	{
			
 
				+		int sz = size();
			
 
				+		if (sz == capacity())
			
 
				+		{
			
 
				+			reserve(allocSize(size()));
			
 
				+		}
			
 
				+		m_size++;
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+		new (&m_data[sz]) T(fillValue);  //use the in-place new (not really allocating heap memory)
			
 
				+#endif
			
 
				+
			
 
				+		return m_data[sz];
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void push_back(const T& _Val)
			
 
				+	{
			
 
				+		int sz = size();
			
 
				+		if (sz == capacity())
			
 
				+		{
			
 
				+			reserve(allocSize(size()));
			
 
				+		}
			
 
				+
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+		new (&m_data[m_size]) T(_Val);
			
 
				+#else
			
 
				+		m_data[size()] = _Val;
			
 
				+#endif  //B3_USE_PLACEMENT_NEW
			
 
				+
			
 
				+		m_size++;
			
 
				+	}
			
 
				+
			
 
				+	/// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve()
			
 
				+	B3_FORCE_INLINE int capacity() const
			
 
				+	{
			
 
				+		return m_capacity;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void reserve(int _Count)
			
 
				+	{  // determine new minimum length of allocated storage
			
 
				+		if (capacity() < _Count)
			
 
				+		{  // not enough room, reallocate
			
 
				+			T* s = (T*)allocate(_Count);
			
 
				+			b3Assert(s);
			
 
				+			if (s == 0)
			
 
				+			{
			
 
				+				b3Error("b3AlignedObjectArray reserve out-of-memory\n");
			
 
				+				_Count = 0;
			
 
				+				m_size = 0;
			
 
				+			}
			
 
				+			copy(0, size(), s);
			
 
				+
			
 
				+			destroy(0, size());
			
 
				+
			
 
				+			deallocate();
			
 
				+
			
 
				+			//PCK: added this line
			
 
				+			m_ownsMemory = true;
			
 
				+
			
 
				+			m_data = s;
			
 
				+
			
 
				+			m_capacity = _Count;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	class less
			
 
				+	{
			
 
				+	public:
			
 
				+		bool operator()(const T& a, const T& b)
			
 
				+		{
			
 
				+			return (a < b);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	template <typename L>
			
 
				+	void quickSortInternal(const L& CompareFunc, int lo, int hi)
			
 
				+	{
			
 
				+		//  lo is the lower index, hi is the upper index
			
 
				+		//  of the region of array a that is to be sorted
			
 
				+		int i = lo, j = hi;
			
 
				+		T x = m_data[(lo + hi) / 2];
			
 
				+
			
 
				+		//  partition
			
 
				+		do
			
 
				+		{
			
 
				+			while (CompareFunc(m_data[i], x))
			
 
				+				i++;
			
 
				+			while (CompareFunc(x, m_data[j]))
			
 
				+				j--;
			
 
				+			if (i <= j)
			
 
				+			{
			
 
				+				swap(i, j);
			
 
				+				i++;
			
 
				+				j--;
			
 
				+			}
			
 
				+		} while (i <= j);
			
 
				+
			
 
				+		//  recursion
			
 
				+		if (lo < j)
			
 
				+			quickSortInternal(CompareFunc, lo, j);
			
 
				+		if (i < hi)
			
 
				+			quickSortInternal(CompareFunc, i, hi);
			
 
				+	}
			
 
				+
			
 
				+	template <typename L>
			
 
				+	void quickSort(const L& CompareFunc)
			
 
				+	{
			
 
				+		//don't sort 0 or 1 elements
			
 
				+		if (size() > 1)
			
 
				+		{
			
 
				+			quickSortInternal(CompareFunc, 0, size() - 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
			
 
				+	template <typename L>
			
 
				+	void downHeap(T* pArr, int k, int n, const L& CompareFunc)
			
 
				+	{
			
 
				+		/*  PRE: a[k+1..N] is a heap */
			
 
				+		/* POST:  a[k..N]  is a heap */
			
 
				+
			
 
				+		T temp = pArr[k - 1];
			
 
				+		/* k has child(s) */
			
 
				+		while (k <= n / 2)
			
 
				+		{
			
 
				+			int child = 2 * k;
			
 
				+
			
 
				+			if ((child < n) && CompareFunc(pArr[child - 1], pArr[child]))
			
 
				+			{
			
 
				+				child++;
			
 
				+			}
			
 
				+			/* pick larger child */
			
 
				+			if (CompareFunc(temp, pArr[child - 1]))
			
 
				+			{
			
 
				+				/* move child up */
			
 
				+				pArr[k - 1] = pArr[child - 1];
			
 
				+				k = child;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		pArr[k - 1] = temp;
			
 
				+	} /*downHeap*/
			
 
				+
			
 
				+	void swap(int index0, int index1)
			
 
				+	{
			
 
				+#ifdef B3_USE_MEMCPY
			
 
				+		char temp[sizeof(T)];
			
 
				+		memcpy(temp, &m_data[index0], sizeof(T));
			
 
				+		memcpy(&m_data[index0], &m_data[index1], sizeof(T));
			
 
				+		memcpy(&m_data[index1], temp, sizeof(T));
			
 
				+#else
			
 
				+		T temp = m_data[index0];
			
 
				+		m_data[index0] = m_data[index1];
			
 
				+		m_data[index1] = temp;
			
 
				+#endif  //B3_USE_PLACEMENT_NEW
			
 
				+	}
			
 
				+
			
 
				+	template <typename L>
			
 
				+	void heapSort(const L& CompareFunc)
			
 
				+	{
			
 
				+		/* sort a[0..N-1],  N.B. 0 to N-1 */
			
 
				+		int k;
			
 
				+		int n = m_size;
			
 
				+		for (k = n / 2; k > 0; k--)
			
 
				+		{
			
 
				+			downHeap(m_data, k, n, CompareFunc);
			
 
				+		}
			
 
				+
			
 
				+		/* a[1..N] is now a heap */
			
 
				+		while (n >= 1)
			
 
				+		{
			
 
				+			swap(0, n - 1); /* largest of a[0..n-1] */
			
 
				+
			
 
				+			n = n - 1;
			
 
				+			/* restore a[1..i-1] heap */
			
 
				+			downHeap(m_data, 1, n, CompareFunc);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	///non-recursive binary search, assumes sorted array
			
 
				+	int findBinarySearch(const T& key) const
			
 
				+	{
			
 
				+		int first = 0;
			
 
				+		int last = size() - 1;
			
 
				+
			
 
				+		//assume sorted array
			
 
				+		while (first <= last)
			
 
				+		{
			
 
				+			int mid = (first + last) / 2;  // compute mid point.
			
 
				+			if (key > m_data[mid])
			
 
				+				first = mid + 1;  // repeat search in top half.
			
 
				+			else if (key < m_data[mid])
			
 
				+				last = mid - 1;  // repeat search in bottom half.
			
 
				+			else
			
 
				+				return mid;  // found it. return position /////
			
 
				+		}
			
 
				+		return size();  // failed to find key
			
 
				+	}
			
 
				+
			
 
				+	int findLinearSearch(const T& key) const
			
 
				+	{
			
 
				+		int index = size();
			
 
				+		int i;
			
 
				+
			
 
				+		for (i = 0; i < size(); i++)
			
 
				+		{
			
 
				+			if (m_data[i] == key)
			
 
				+			{
			
 
				+				index = i;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		return index;
			
 
				+	}
			
 
				+
			
 
				+	int findLinearSearch2(const T& key) const
			
 
				+	{
			
 
				+		int index = -1;
			
 
				+		int i;
			
 
				+
			
 
				+		for (i = 0; i < size(); i++)
			
 
				+		{
			
 
				+			if (m_data[i] == key)
			
 
				+			{
			
 
				+				index = i;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		return index;
			
 
				+	}
			
 
				+
			
 
				+	void remove(const T& key)
			
 
				+	{
			
 
				+		int findIndex = findLinearSearch(key);
			
 
				+		if (findIndex < size())
			
 
				+		{
			
 
				+			swap(findIndex, size() - 1);
			
 
				+			pop_back();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//PCK: whole function
			
 
				+	void initializeFromBuffer(void* buffer, int size, int capacity)
			
 
				+	{
			
 
				+		clear();
			
 
				+		m_ownsMemory = false;
			
 
				+		m_data = (T*)buffer;
			
 
				+		m_size = size;
			
 
				+		m_capacity = capacity;
			
 
				+	}
			
 
				+
			
 
				+	void copyFromArray(const b3AlignedObjectArray& otherArray)
			
 
				+	{
			
 
				+		int otherSize = otherArray.size();
			
 
				+		resize(otherSize);
			
 
				+		otherArray.copy(0, otherSize, m_data);
			
 
				+	}
			
 
				+
			
 
				+	void removeAtIndex(int index)
			
 
				+	{
			
 
				+		if (index < size())
			
 
				+		{
			
 
				+			swap(index, size() - 1);
			
 
				+			pop_back();
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_OBJECT_ARRAY__
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3CommandLineArgs.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3CommandLineArgs.h
@@ -0,0 +1,106 @@
 
				+#ifndef COMMAND_LINE_ARGS_H
			
 
				+#define COMMAND_LINE_ARGS_H
			
 
				+
			
 
				+/******************************************************************************
			
 
				+ * Command-line parsing
			
 
				+ ******************************************************************************/
			
 
				+#include <map>
			
 
				+#include <algorithm>
			
 
				+#include <string>
			
 
				+#include <cstring>
			
 
				+#include <sstream>
			
 
				+class b3CommandLineArgs
			
 
				+{
			
 
				+protected:
			
 
				+	std::map<std::string, std::string> pairs;
			
 
				+
			
 
				+public:
			
 
				+	// Constructor
			
 
				+	b3CommandLineArgs(int argc, char **argv)
			
 
				+	{
			
 
				+		addArgs(argc, argv);
			
 
				+	}
			
 
				+
			
 
				+	void addArgs(int argc, char **argv)
			
 
				+	{
			
 
				+		for (int i = 1; i < argc; i++)
			
 
				+		{
			
 
				+			std::string arg = argv[i];
			
 
				+
			
 
				+			if ((arg.length() < 2) || (arg[0] != '-') || (arg[1] != '-'))
			
 
				+			{
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			std::string::size_type pos;
			
 
				+			std::string key, val;
			
 
				+			if ((pos = arg.find('=')) == std::string::npos)
			
 
				+			{
			
 
				+				key = std::string(arg, 2, arg.length() - 2);
			
 
				+				val = "";
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				key = std::string(arg, 2, pos - 2);
			
 
				+				val = std::string(arg, pos + 1, arg.length() - 1);
			
 
				+			}
			
 
				+
			
 
				+			//only add new keys, don't replace existing
			
 
				+			if (pairs.find(key) == pairs.end())
			
 
				+			{
			
 
				+				pairs[key] = val;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	bool CheckCmdLineFlag(const char *arg_name)
			
 
				+	{
			
 
				+		std::map<std::string, std::string>::iterator itr;
			
 
				+		if ((itr = pairs.find(arg_name)) != pairs.end())
			
 
				+		{
			
 
				+			return true;
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	template <typename T>
			
 
				+	bool GetCmdLineArgument(const char *arg_name, T &val);
			
 
				+
			
 
				+	int ParsedArgc()
			
 
				+	{
			
 
				+		return pairs.size();
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+inline bool b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val)
			
 
				+{
			
 
				+	std::map<std::string, std::string>::iterator itr;
			
 
				+	if ((itr = pairs.find(arg_name)) != pairs.end())
			
 
				+	{
			
 
				+		std::istringstream strstream(itr->second);
			
 
				+		strstream >> val;
			
 
				+		return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline bool b3CommandLineArgs::GetCmdLineArgument<char *>(const char *arg_name, char *&val)
			
 
				+{
			
 
				+	std::map<std::string, std::string>::iterator itr;
			
 
				+	if ((itr = pairs.find(arg_name)) != pairs.end())
			
 
				+	{
			
 
				+		std::string s = itr->second;
			
 
				+		val = (char *)malloc(sizeof(char) * (s.length() + 1));
			
 
				+		std::strcpy(val, s.c_str());
			
 
				+		return true;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		val = NULL;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+#endif  //COMMAND_LINE_ARGS_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3FileUtils.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3FileUtils.h
@@ -0,0 +1,133 @@
 
				+#ifndef B3_FILE_UTILS_H
			
 
				+#define B3_FILE_UTILS_H
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include "b3Scalar.h"
			
 
				+#include <stddef.h>  //ptrdiff_h
			
 
				+#include <string.h>
			
 
				+
			
 
				+struct b3FileUtils
			
 
				+{
			
 
				+	b3FileUtils()
			
 
				+	{
			
 
				+	}
			
 
				+	virtual ~b3FileUtils()
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	static bool findFile(const char* orgFileName, char* relativeFileName, int maxRelativeFileNameMaxLen)
			
 
				+	{
			
 
				+		FILE* f = 0;
			
 
				+		f = fopen(orgFileName, "rb");
			
 
				+		if (f)
			
 
				+		{
			
 
				+			//printf("original file found: [%s]\n", orgFileName);
			
 
				+			sprintf(relativeFileName, "%s", orgFileName);
			
 
				+			fclose(f);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		//printf("Trying various directories, relative to current working directory\n");
			
 
				+		const char* prefix[] = {"./", "./data/", "../data/", "../../data/", "../../../data/", "../../../../data/"};
			
 
				+		int numPrefixes = sizeof(prefix) / sizeof(const char*);
			
 
				+
			
 
				+		f = 0;
			
 
				+		bool fileFound = false;
			
 
				+
			
 
				+		for (int i = 0; !f && i < numPrefixes; i++)
			
 
				+		{
			
 
				+#ifdef _MSC_VER
			
 
				+			sprintf_s(relativeFileName, maxRelativeFileNameMaxLen, "%s%s", prefix[i], orgFileName);
			
 
				+#else
			
 
				+			sprintf(relativeFileName, "%s%s", prefix[i], orgFileName);
			
 
				+#endif
			
 
				+			f = fopen(relativeFileName, "rb");
			
 
				+			if (f)
			
 
				+			{
			
 
				+				fileFound = true;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		if (f)
			
 
				+		{
			
 
				+			fclose(f);
			
 
				+		}
			
 
				+
			
 
				+		return fileFound;
			
 
				+	}
			
 
				+
			
 
				+	static const char* strip2(const char* name, const char* pattern)
			
 
				+	{
			
 
				+		size_t const patlen = strlen(pattern);
			
 
				+		size_t patcnt = 0;
			
 
				+		const char* oriptr;
			
 
				+		const char* patloc;
			
 
				+		// find how many times the pattern occurs in the original string
			
 
				+		for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
			
 
				+		{
			
 
				+			patcnt++;
			
 
				+		}
			
 
				+		return oriptr;
			
 
				+	}
			
 
				+
			
 
				+	static int extractPath(const char* fileName, char* path, int maxPathLength)
			
 
				+	{
			
 
				+		const char* stripped = strip2(fileName, "/");
			
 
				+		stripped = strip2(stripped, "\\");
			
 
				+
			
 
				+		ptrdiff_t len = stripped - fileName;
			
 
				+		b3Assert((len + 1) < maxPathLength);
			
 
				+
			
 
				+		if (len && ((len + 1) < maxPathLength))
			
 
				+		{
			
 
				+			for (int i = 0; i < len; i++)
			
 
				+			{
			
 
				+				path[i] = fileName[i];
			
 
				+			}
			
 
				+			path[len] = 0;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			len = 0;
			
 
				+			b3Assert(maxPathLength > 0);
			
 
				+			if (maxPathLength > 0)
			
 
				+			{
			
 
				+				path[len] = 0;
			
 
				+			}
			
 
				+		}
			
 
				+		return len;
			
 
				+	}
			
 
				+
			
 
				+	static char toLowerChar(const char t)
			
 
				+	{
			
 
				+		if (t >= (char)'A' && t <= (char)'Z')
			
 
				+			return t + ((char)'a' - (char)'A');
			
 
				+		else
			
 
				+			return t;
			
 
				+	}
			
 
				+
			
 
				+	static void toLower(char* str)
			
 
				+	{
			
 
				+		int len = strlen(str);
			
 
				+		for (int i = 0; i < len; i++)
			
 
				+		{
			
 
				+			str[i] = toLowerChar(str[i]);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*static const char* strip2(const char* name, const char* pattern)
			
 
				+	{
			
 
				+		size_t const patlen = strlen(pattern);
			
 
				+		size_t patcnt = 0;
			
 
				+		const char * oriptr;
			
 
				+		const char * patloc;
			
 
				+		// find how many times the pattern occurs in the original string
			
 
				+		for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
			
 
				+		{
			
 
				+			patcnt++;
			
 
				+		}
			
 
				+		return oriptr;
			
 
				+	}
			
 
				+	*/
			
 
				+};
			
 
				+#endif  //B3_FILE_UTILS_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3HashMap.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3HashMap.h
@@ -0,0 +1,462 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_HASH_MAP_H
			
 
				+#define B3_HASH_MAP_H
			
 
				+
			
 
				+#include "b3AlignedObjectArray.h"
			
 
				+
			
 
				+#include <string>
			
 
				+
			
 
				+///very basic hashable string implementation, compatible with b3HashMap
			
 
				+struct b3HashString
			
 
				+{
			
 
				+	std::string m_string;
			
 
				+	unsigned int m_hash;
			
 
				+
			
 
				+	B3_FORCE_INLINE unsigned int getHash() const
			
 
				+	{
			
 
				+		return m_hash;
			
 
				+	}
			
 
				+
			
 
				+	b3HashString(const char* name)
			
 
				+		: m_string(name)
			
 
				+	{
			
 
				+		/* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */
			
 
				+		static const unsigned int InitialFNV = 2166136261u;
			
 
				+		static const unsigned int FNVMultiple = 16777619u;
			
 
				+
			
 
				+		/* Fowler / Noll / Vo (FNV) Hash */
			
 
				+		unsigned int hash = InitialFNV;
			
 
				+		int len = m_string.length();
			
 
				+		for (int i = 0; i < len; i++)
			
 
				+		{
			
 
				+			hash = hash ^ (m_string[i]); /* xor  the low 8 bits */
			
 
				+			hash = hash * FNVMultiple;   /* multiply by the magic number */
			
 
				+		}
			
 
				+		m_hash = hash;
			
 
				+	}
			
 
				+
			
 
				+	int portableStringCompare(const char* src, const char* dst) const
			
 
				+	{
			
 
				+		int ret = 0;
			
 
				+
			
 
				+		while (!(ret = *(unsigned char*)src - *(unsigned char*)dst) && *dst)
			
 
				+			++src, ++dst;
			
 
				+
			
 
				+		if (ret < 0)
			
 
				+			ret = -1;
			
 
				+		else if (ret > 0)
			
 
				+			ret = 1;
			
 
				+
			
 
				+		return (ret);
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashString& other) const
			
 
				+	{
			
 
				+		return (m_string == other.m_string);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+const int B3_HASH_NULL = 0xffffffff;
			
 
				+
			
 
				+class b3HashInt
			
 
				+{
			
 
				+	int m_uid;
			
 
				+
			
 
				+public:
			
 
				+	b3HashInt(int uid) : m_uid(uid)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	int getUid1() const
			
 
				+	{
			
 
				+		return m_uid;
			
 
				+	}
			
 
				+
			
 
				+	void setUid1(int uid)
			
 
				+	{
			
 
				+		m_uid = uid;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashInt& other) const
			
 
				+	{
			
 
				+		return getUid1() == other.getUid1();
			
 
				+	}
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE unsigned int getHash() const
			
 
				+	{
			
 
				+		int key = m_uid;
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);
			
 
				+		key ^= (key >> 10);
			
 
				+		key += (key << 3);
			
 
				+		key ^= (key >> 6);
			
 
				+		key += ~(key << 11);
			
 
				+		key ^= (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+class b3HashPtr
			
 
				+{
			
 
				+	union {
			
 
				+		const void* m_pointer;
			
 
				+		int m_hashValues[2];
			
 
				+	};
			
 
				+
			
 
				+public:
			
 
				+	b3HashPtr(const void* ptr)
			
 
				+		: m_pointer(ptr)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	const void* getPointer() const
			
 
				+	{
			
 
				+		return m_pointer;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashPtr& other) const
			
 
				+	{
			
 
				+		return getPointer() == other.getPointer();
			
 
				+	}
			
 
				+
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE unsigned int getHash() const
			
 
				+	{
			
 
				+		const bool VOID_IS_8 = ((sizeof(void*) == 8));
			
 
				+
			
 
				+		int key = VOID_IS_8 ? m_hashValues[0] + m_hashValues[1] : m_hashValues[0];
			
 
				+
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);
			
 
				+		key ^= (key >> 10);
			
 
				+		key += (key << 3);
			
 
				+		key ^= (key >> 6);
			
 
				+		key += ~(key << 11);
			
 
				+		key ^= (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+template <class Value>
			
 
				+class b3HashKeyPtr
			
 
				+{
			
 
				+	int m_uid;
			
 
				+
			
 
				+public:
			
 
				+	b3HashKeyPtr(int uid) : m_uid(uid)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	int getUid1() const
			
 
				+	{
			
 
				+		return m_uid;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashKeyPtr<Value>& other) const
			
 
				+	{
			
 
				+		return getUid1() == other.getUid1();
			
 
				+	}
			
 
				+
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE unsigned int getHash() const
			
 
				+	{
			
 
				+		int key = m_uid;
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);
			
 
				+		key ^= (key >> 10);
			
 
				+		key += (key << 3);
			
 
				+		key ^= (key >> 6);
			
 
				+		key += ~(key << 11);
			
 
				+		key ^= (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+template <class Value>
			
 
				+class b3HashKey
			
 
				+{
			
 
				+	int m_uid;
			
 
				+
			
 
				+public:
			
 
				+	b3HashKey(int uid) : m_uid(uid)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	int getUid1() const
			
 
				+	{
			
 
				+		return m_uid;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashKey<Value>& other) const
			
 
				+	{
			
 
				+		return getUid1() == other.getUid1();
			
 
				+	}
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE unsigned int getHash() const
			
 
				+	{
			
 
				+		int key = m_uid;
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);
			
 
				+		key ^= (key >> 10);
			
 
				+		key += (key << 3);
			
 
				+		key ^= (key >> 6);
			
 
				+		key += ~(key << 11);
			
 
				+		key ^= (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///The b3HashMap template class implements a generic and lightweight hashmap.
			
 
				+///A basic sample of how to use b3HashMap is located in Demos\BasicDemo\main.cpp
			
 
				+template <class Key, class Value>
			
 
				+class b3HashMap
			
 
				+{
			
 
				+protected:
			
 
				+	b3AlignedObjectArray<int> m_hashTable;
			
 
				+	b3AlignedObjectArray<int> m_next;
			
 
				+
			
 
				+	b3AlignedObjectArray<Value> m_valueArray;
			
 
				+	b3AlignedObjectArray<Key> m_keyArray;
			
 
				+
			
 
				+	void growTables(const Key& /*key*/)
			
 
				+	{
			
 
				+		int newCapacity = m_valueArray.capacity();
			
 
				+
			
 
				+		if (m_hashTable.size() < newCapacity)
			
 
				+		{
			
 
				+			//grow hashtable and next table
			
 
				+			int curHashtableSize = m_hashTable.size();
			
 
				+
			
 
				+			m_hashTable.resize(newCapacity);
			
 
				+			m_next.resize(newCapacity);
			
 
				+
			
 
				+			int i;
			
 
				+
			
 
				+			for (i = 0; i < newCapacity; ++i)
			
 
				+			{
			
 
				+				m_hashTable[i] = B3_HASH_NULL;
			
 
				+			}
			
 
				+			for (i = 0; i < newCapacity; ++i)
			
 
				+			{
			
 
				+				m_next[i] = B3_HASH_NULL;
			
 
				+			}
			
 
				+
			
 
				+			for (i = 0; i < curHashtableSize; i++)
			
 
				+			{
			
 
				+				//const Value& value = m_valueArray[i];
			
 
				+				//const Key& key = m_keyArray[i];
			
 
				+
			
 
				+				int hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity() - 1);  // New hash value with new mask
			
 
				+				m_next[i] = m_hashTable[hashValue];
			
 
				+				m_hashTable[hashValue] = i;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+public:
			
 
				+	void insert(const Key& key, const Value& value)
			
 
				+	{
			
 
				+		int hash = key.getHash() & (m_valueArray.capacity() - 1);
			
 
				+
			
 
				+		//replace value if the key is already there
			
 
				+		int index = findIndex(key);
			
 
				+		if (index != B3_HASH_NULL)
			
 
				+		{
			
 
				+			m_valueArray[index] = value;
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		int count = m_valueArray.size();
			
 
				+		int oldCapacity = m_valueArray.capacity();
			
 
				+		m_valueArray.push_back(value);
			
 
				+		m_keyArray.push_back(key);
			
 
				+
			
 
				+		int newCapacity = m_valueArray.capacity();
			
 
				+		if (oldCapacity < newCapacity)
			
 
				+		{
			
 
				+			growTables(key);
			
 
				+			//hash with new capacity
			
 
				+			hash = key.getHash() & (m_valueArray.capacity() - 1);
			
 
				+		}
			
 
				+		m_next[count] = m_hashTable[hash];
			
 
				+		m_hashTable[hash] = count;
			
 
				+	}
			
 
				+
			
 
				+	void remove(const Key& key)
			
 
				+	{
			
 
				+		int hash = key.getHash() & (m_valueArray.capacity() - 1);
			
 
				+
			
 
				+		int pairIndex = findIndex(key);
			
 
				+
			
 
				+		if (pairIndex == B3_HASH_NULL)
			
 
				+		{
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		// Remove the pair from the hash table.
			
 
				+		int index = m_hashTable[hash];
			
 
				+		b3Assert(index != B3_HASH_NULL);
			
 
				+
			
 
				+		int previous = B3_HASH_NULL;
			
 
				+		while (index != pairIndex)
			
 
				+		{
			
 
				+			previous = index;
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+
			
 
				+		if (previous != B3_HASH_NULL)
			
 
				+		{
			
 
				+			b3Assert(m_next[previous] == pairIndex);
			
 
				+			m_next[previous] = m_next[pairIndex];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			m_hashTable[hash] = m_next[pairIndex];
			
 
				+		}
			
 
				+
			
 
				+		// We now move the last pair into spot of the
			
 
				+		// pair being removed. We need to fix the hash
			
 
				+		// table indices to support the move.
			
 
				+
			
 
				+		int lastPairIndex = m_valueArray.size() - 1;
			
 
				+
			
 
				+		// If the removed pair is the last pair, we are done.
			
 
				+		if (lastPairIndex == pairIndex)
			
 
				+		{
			
 
				+			m_valueArray.pop_back();
			
 
				+			m_keyArray.pop_back();
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		// Remove the last pair from the hash table.
			
 
				+		int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity() - 1);
			
 
				+
			
 
				+		index = m_hashTable[lastHash];
			
 
				+		b3Assert(index != B3_HASH_NULL);
			
 
				+
			
 
				+		previous = B3_HASH_NULL;
			
 
				+		while (index != lastPairIndex)
			
 
				+		{
			
 
				+			previous = index;
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+
			
 
				+		if (previous != B3_HASH_NULL)
			
 
				+		{
			
 
				+			b3Assert(m_next[previous] == lastPairIndex);
			
 
				+			m_next[previous] = m_next[lastPairIndex];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			m_hashTable[lastHash] = m_next[lastPairIndex];
			
 
				+		}
			
 
				+
			
 
				+		// Copy the last pair into the remove pair's spot.
			
 
				+		m_valueArray[pairIndex] = m_valueArray[lastPairIndex];
			
 
				+		m_keyArray[pairIndex] = m_keyArray[lastPairIndex];
			
 
				+
			
 
				+		// Insert the last pair into the hash table
			
 
				+		m_next[pairIndex] = m_hashTable[lastHash];
			
 
				+		m_hashTable[lastHash] = pairIndex;
			
 
				+
			
 
				+		m_valueArray.pop_back();
			
 
				+		m_keyArray.pop_back();
			
 
				+	}
			
 
				+
			
 
				+	int size() const
			
 
				+	{
			
 
				+		return m_valueArray.size();
			
 
				+	}
			
 
				+
			
 
				+	const Value* getAtIndex(int index) const
			
 
				+	{
			
 
				+		b3Assert(index < m_valueArray.size());
			
 
				+
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Value* getAtIndex(int index)
			
 
				+	{
			
 
				+		b3Assert(index < m_valueArray.size());
			
 
				+
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Key getKeyAtIndex(int index)
			
 
				+	{
			
 
				+		b3Assert(index < m_keyArray.size());
			
 
				+		return m_keyArray[index];
			
 
				+	}
			
 
				+
			
 
				+	const Key getKeyAtIndex(int index) const
			
 
				+	{
			
 
				+		b3Assert(index < m_keyArray.size());
			
 
				+		return m_keyArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Value* operator[](const Key& key)
			
 
				+	{
			
 
				+		return find(key);
			
 
				+	}
			
 
				+
			
 
				+	const Value* find(const Key& key) const
			
 
				+	{
			
 
				+		int index = findIndex(key);
			
 
				+		if (index == B3_HASH_NULL)
			
 
				+		{
			
 
				+			return NULL;
			
 
				+		}
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Value* find(const Key& key)
			
 
				+	{
			
 
				+		int index = findIndex(key);
			
 
				+		if (index == B3_HASH_NULL)
			
 
				+		{
			
 
				+			return NULL;
			
 
				+		}
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	int findIndex(const Key& key) const
			
 
				+	{
			
 
				+		unsigned int hash = key.getHash() & (m_valueArray.capacity() - 1);
			
 
				+
			
 
				+		if (hash >= (unsigned int)m_hashTable.size())
			
 
				+		{
			
 
				+			return B3_HASH_NULL;
			
 
				+		}
			
 
				+
			
 
				+		int index = m_hashTable[hash];
			
 
				+		while ((index != B3_HASH_NULL) && key.equals(m_keyArray[index]) == false)
			
 
				+		{
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+		return index;
			
 
				+	}
			
 
				+
			
 
				+	void clear()
			
 
				+	{
			
 
				+		m_hashTable.clear();
			
 
				+		m_next.clear();
			
 
				+		m_valueArray.clear();
			
 
				+		m_keyArray.clear();
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_HASH_MAP_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Logging.cpp
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Logging.cpp
@@ -0,0 +1,145 @@
 
				+/*
			
 
				+Copyright (c) 2013 Advanced Micro Devices, Inc.  
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Originally written by Erwin Coumans
			
 
				+
			
 
				+#include "b3Logging.h"
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <stdarg.h>
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+#include <windows.h>
			
 
				+#endif  //_WIN32
			
 
				+
			
 
				+void b3PrintfFuncDefault(const char* msg)
			
 
				+{
			
 
				+#ifdef _WIN32
			
 
				+	OutputDebugStringA(msg);
			
 
				+#endif
			
 
				+	printf("%s", msg);
			
 
				+	//is this portable?
			
 
				+	fflush(stdout);
			
 
				+}
			
 
				+
			
 
				+void b3WarningMessageFuncDefault(const char* msg)
			
 
				+{
			
 
				+#ifdef _WIN32
			
 
				+	OutputDebugStringA(msg);
			
 
				+#endif
			
 
				+	printf("%s", msg);
			
 
				+	//is this portable?
			
 
				+	fflush(stdout);
			
 
				+}
			
 
				+
			
 
				+void b3ErrorMessageFuncDefault(const char* msg)
			
 
				+{
			
 
				+#ifdef _WIN32
			
 
				+	OutputDebugStringA(msg);
			
 
				+#endif
			
 
				+	printf("%s", msg);
			
 
				+
			
 
				+	//is this portable?
			
 
				+	fflush(stdout);
			
 
				+}
			
 
				+
			
 
				+static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault;
			
 
				+static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault;
			
 
				+static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault;
			
 
				+
			
 
				+///The developer can route b3Printf output using their own implementation
			
 
				+void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc)
			
 
				+{
			
 
				+	b3s_printfFunc = printfFunc;
			
 
				+}
			
 
				+void b3SetCustomWarningMessageFunc(b3PrintfFunc* warningMessageFunc)
			
 
				+{
			
 
				+	b3s_warningMessageFunc = warningMessageFunc;
			
 
				+}
			
 
				+void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc)
			
 
				+{
			
 
				+	b3s_errorMessageFunc = errorMessageFunc;
			
 
				+}
			
 
				+
			
 
				+//#define B3_MAX_DEBUG_STRING_LENGTH 2048
			
 
				+#define B3_MAX_DEBUG_STRING_LENGTH 32768
			
 
				+
			
 
				+void b3OutputPrintfVarArgsInternal(const char* str, ...)
			
 
				+{
			
 
				+	char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0};
			
 
				+	va_list argList;
			
 
				+	va_start(argList, str);
			
 
				+#ifdef _MSC_VER
			
 
				+	vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
			
 
				+#else
			
 
				+	vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
			
 
				+#endif
			
 
				+	(b3s_printfFunc)(strDebug);
			
 
				+	va_end(argList);
			
 
				+}
			
 
				+void b3OutputWarningMessageVarArgsInternal(const char* str, ...)
			
 
				+{
			
 
				+	char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0};
			
 
				+	va_list argList;
			
 
				+	va_start(argList, str);
			
 
				+#ifdef _MSC_VER
			
 
				+	vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
			
 
				+#else
			
 
				+	vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
			
 
				+#endif
			
 
				+	(b3s_warningMessageFunc)(strDebug);
			
 
				+	va_end(argList);
			
 
				+}
			
 
				+void b3OutputErrorMessageVarArgsInternal(const char* str, ...)
			
 
				+{
			
 
				+	char strDebug[B3_MAX_DEBUG_STRING_LENGTH] = {0};
			
 
				+	va_list argList;
			
 
				+	va_start(argList, str);
			
 
				+#ifdef _MSC_VER
			
 
				+	vsprintf_s(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
			
 
				+#else
			
 
				+	vsnprintf(strDebug, B3_MAX_DEBUG_STRING_LENGTH, str, argList);
			
 
				+#endif
			
 
				+	(b3s_errorMessageFunc)(strDebug);
			
 
				+	va_end(argList);
			
 
				+}
			
 
				+
			
 
				+void b3EnterProfileZoneDefault(const char* name)
			
 
				+{
			
 
				+}
			
 
				+void b3LeaveProfileZoneDefault()
			
 
				+{
			
 
				+}
			
 
				+static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault;
			
 
				+static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault;
			
 
				+void b3EnterProfileZone(const char* name)
			
 
				+{
			
 
				+	(b3s_enterFunc)(name);
			
 
				+}
			
 
				+void b3LeaveProfileZone()
			
 
				+{
			
 
				+	(b3s_leaveFunc)();
			
 
				+}
			
 
				+
			
 
				+void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc)
			
 
				+{
			
 
				+	b3s_enterFunc = enterFunc;
			
 
				+}
			
 
				+void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc)
			
 
				+{
			
 
				+	b3s_leaveFunc = leaveFunc;
			
 
				+}
			
 
				+
			
 
				+#ifndef _MSC_VER
			
 
				+#undef vsprintf_s
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Logging.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Logging.h
@@ -0,0 +1,74 @@
 
				+
			
 
				+#ifndef B3_LOGGING_H
			
 
				+#define B3_LOGGING_H
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail
			
 
				+///You can also customize the message by uncommenting out a different line below
			
 
				+#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__)
			
 
				+	//#define b3Printf(...) do {b3OutputPrintfVarArgsInternal("b3Printf[%s,%d]:",__FILE__,__LINE__);b3OutputPrintfVarArgsInternal(__VA_ARGS__); } while(0)
			
 
				+	//#define b3Printf b3OutputPrintfVarArgsInternal
			
 
				+	//#define b3Printf(...) printf(__VA_ARGS__)
			
 
				+	//#define b3Printf(...)
			
 
				+#define b3Warning(...) do{	b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n", __FILE__, __LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__);} while (0)
			
 
				+#define b3Error(...)do	{b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n", __FILE__, __LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__);} while (0)
			
 
				+#ifndef B3_NO_PROFILE
			
 
				+
			
 
				+	void b3EnterProfileZone(const char* name);
			
 
				+	void b3LeaveProfileZone();
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+	class b3ProfileZone
			
 
				+	{
			
 
				+	public:
			
 
				+		b3ProfileZone(const char* name)
			
 
				+		{
			
 
				+			b3EnterProfileZone(name);
			
 
				+		}
			
 
				+
			
 
				+		~b3ProfileZone()
			
 
				+		{
			
 
				+			b3LeaveProfileZone();
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+#define B3_PROFILE(name) b3ProfileZone __profile(name)
			
 
				+#endif
			
 
				+
			
 
				+#else  //B3_NO_PROFILE
			
 
				+
			
 
				+#define B3_PROFILE(name)
			
 
				+#define b3StartProfile(a)
			
 
				+#define b3StopProfile
			
 
				+
			
 
				+#endif  //#ifndef B3_NO_PROFILE
			
 
				+
			
 
				+	typedef void(b3PrintfFunc)(const char* msg);
			
 
				+	typedef void(b3WarningMessageFunc)(const char* msg);
			
 
				+	typedef void(b3ErrorMessageFunc)(const char* msg);
			
 
				+	typedef void(b3EnterProfileZoneFunc)(const char* msg);
			
 
				+	typedef void(b3LeaveProfileZoneFunc)();
			
 
				+
			
 
				+	///The developer can route b3Printf output using their own implementation
			
 
				+	void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc);
			
 
				+	void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc);
			
 
				+	void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc);
			
 
				+
			
 
				+	///Set custom profile zone functions (zones can be nested)
			
 
				+	void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc);
			
 
				+	void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc);
			
 
				+
			
 
				+	///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version)
			
 
				+	void b3OutputPrintfVarArgsInternal(const char* str, ...);
			
 
				+	void b3OutputWarningMessageVarArgsInternal(const char* str, ...);
			
 
				+	void b3OutputErrorMessageVarArgsInternal(const char* str, ...);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif  //B3_LOGGING_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Matrix3x3.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Matrix3x3.h
@@ -0,0 +1,1354 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_MATRIX3x3_H
			
 
				+#define B3_MATRIX3x3_H
			
 
				+
			
 
				+#include "b3Vector3.h"
			
 
				+#include "b3Quaternion.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
			
 
				+#endif
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
			
 
				+#endif
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3Matrix3x3Data b3Matrix3x3DoubleData
			
 
				+#else
			
 
				+#define b3Matrix3x3Data b3Matrix3x3FloatData
			
 
				+#endif  //B3_USE_DOUBLE_PRECISION
			
 
				+
			
 
				+/**@brief The b3Matrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with b3Quaternion, b3Transform and b3Vector3.
			
 
				+* Make sure to only include a pure orthogonal matrix without scaling. */
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3Matrix3x3
			
 
				+{
			
 
				+	///Data storage for the matrix, each vector is a row of the matrix
			
 
				+	b3Vector3 m_el[3];
			
 
				+
			
 
				+public:
			
 
				+	/** @brief No initializaion constructor */
			
 
				+	b3Matrix3x3() {}
			
 
				+
			
 
				+	//		explicit b3Matrix3x3(const b3Scalar *m) { setFromOpenGLSubMatrix(m); }
			
 
				+
			
 
				+	/**@brief Constructor from Quaternion */
			
 
				+	explicit b3Matrix3x3(const b3Quaternion& q) { setRotation(q); }
			
 
				+	/*
			
 
				+	template <typename b3Scalar>
			
 
				+	Matrix3x3(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{ 
			
 
				+	setEulerYPR(yaw, pitch, roll);
			
 
				+	}
			
 
				+	*/
			
 
				+	/** @brief Constructor with row major formatting */
			
 
				+	b3Matrix3x3(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz,
			
 
				+				const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz,
			
 
				+				const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz)
			
 
				+	{
			
 
				+		setValue(xx, xy, xz,
			
 
				+				 yx, yy, yz,
			
 
				+				 zx, zy, zz);
			
 
				+	}
			
 
				+
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	B3_FORCE_INLINE b3Matrix3x3(const b3SimdFloat4 v0, const b3SimdFloat4 v1, const b3SimdFloat4 v2)
			
 
				+	{
			
 
				+		m_el[0].mVec128 = v0;
			
 
				+		m_el[1].mVec128 = v1;
			
 
				+		m_el[2].mVec128 = v2;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Matrix3x3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2)
			
 
				+	{
			
 
				+		m_el[0] = v0;
			
 
				+		m_el[1] = v1;
			
 
				+		m_el[2] = v2;
			
 
				+	}
			
 
				+
			
 
				+	// Copy constructor
			
 
				+	B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& rhs)
			
 
				+	{
			
 
				+		m_el[0].mVec128 = rhs.m_el[0].mVec128;
			
 
				+		m_el[1].mVec128 = rhs.m_el[1].mVec128;
			
 
				+		m_el[2].mVec128 = rhs.m_el[2].mVec128;
			
 
				+	}
			
 
				+
			
 
				+	// Assignment Operator
			
 
				+	B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& m)
			
 
				+	{
			
 
				+		m_el[0].mVec128 = m.m_el[0].mVec128;
			
 
				+		m_el[1].mVec128 = m.m_el[1].mVec128;
			
 
				+		m_el[2].mVec128 = m.m_el[2].mVec128;
			
 
				+
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+	/** @brief Copy constructor */
			
 
				+	B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& other)
			
 
				+	{
			
 
				+		m_el[0] = other.m_el[0];
			
 
				+		m_el[1] = other.m_el[1];
			
 
				+		m_el[2] = other.m_el[2];
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Assignment Operator */
			
 
				+	B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& other)
			
 
				+	{
			
 
				+		m_el[0] = other.m_el[0];
			
 
				+		m_el[1] = other.m_el[1];
			
 
				+		m_el[2] = other.m_el[2];
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+	/** @brief Get a column of the matrix as a vector 
			
 
				+	*  @param i Column number 0 indexed */
			
 
				+	B3_FORCE_INLINE b3Vector3 getColumn(int i) const
			
 
				+	{
			
 
				+		return b3MakeVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Get a row of the matrix as a vector 
			
 
				+	*  @param i Row number 0 indexed */
			
 
				+	B3_FORCE_INLINE const b3Vector3& getRow(int i) const
			
 
				+	{
			
 
				+		b3FullAssert(0 <= i && i < 3);
			
 
				+		return m_el[i];
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Get a mutable reference to a row of the matrix as a vector 
			
 
				+	*  @param i Row number 0 indexed */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator[](int i)
			
 
				+	{
			
 
				+		b3FullAssert(0 <= i && i < 3);
			
 
				+		return m_el[i];
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Get a const reference to a row of the matrix as a vector 
			
 
				+	*  @param i Row number 0 indexed */
			
 
				+	B3_FORCE_INLINE const b3Vector3& operator[](int i) const
			
 
				+	{
			
 
				+		b3FullAssert(0 <= i && i < 3);
			
 
				+		return m_el[i];
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Multiply by the target matrix on the right
			
 
				+	*  @param m Rotation matrix to be applied 
			
 
				+	* Equivilant to this = this * m */
			
 
				+	b3Matrix3x3& operator*=(const b3Matrix3x3& m);
			
 
				+
			
 
				+	/** @brief Adds by the target matrix on the right
			
 
				+	*  @param m matrix to be applied 
			
 
				+	* Equivilant to this = this + m */
			
 
				+	b3Matrix3x3& operator+=(const b3Matrix3x3& m);
			
 
				+
			
 
				+	/** @brief Substractss by the target matrix on the right
			
 
				+	*  @param m matrix to be applied 
			
 
				+	* Equivilant to this = this - m */
			
 
				+	b3Matrix3x3& operator-=(const b3Matrix3x3& m);
			
 
				+
			
 
				+	/** @brief Set from the rotational part of a 4x4 OpenGL matrix
			
 
				+	*  @param m A pointer to the beginning of the array of scalars*/
			
 
				+	void setFromOpenGLSubMatrix(const b3Scalar* m)
			
 
				+	{
			
 
				+		m_el[0].setValue(m[0], m[4], m[8]);
			
 
				+		m_el[1].setValue(m[1], m[5], m[9]);
			
 
				+		m_el[2].setValue(m[2], m[6], m[10]);
			
 
				+	}
			
 
				+	/** @brief Set the values of the matrix explicitly (row major)
			
 
				+	*  @param xx Top left
			
 
				+	*  @param xy Top Middle
			
 
				+	*  @param xz Top Right
			
 
				+	*  @param yx Middle Left
			
 
				+	*  @param yy Middle Middle
			
 
				+	*  @param yz Middle Right
			
 
				+	*  @param zx Bottom Left
			
 
				+	*  @param zy Bottom Middle
			
 
				+	*  @param zz Bottom Right*/
			
 
				+	void setValue(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz,
			
 
				+				  const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz,
			
 
				+				  const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz)
			
 
				+	{
			
 
				+		m_el[0].setValue(xx, xy, xz);
			
 
				+		m_el[1].setValue(yx, yy, yz);
			
 
				+		m_el[2].setValue(zx, zy, zz);
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Set the matrix from a quaternion
			
 
				+	*  @param q The Quaternion to match */
			
 
				+	void setRotation(const b3Quaternion& q)
			
 
				+	{
			
 
				+		b3Scalar d = q.length2();
			
 
				+		b3FullAssert(d != b3Scalar(0.0));
			
 
				+		b3Scalar s = b3Scalar(2.0) / d;
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vs, Q = q.get128();
			
 
				+		__m128i Qi = b3CastfTo128i(Q);
			
 
				+		__m128 Y, Z;
			
 
				+		__m128 V1, V2, V3;
			
 
				+		__m128 V11, V21, V31;
			
 
				+		__m128 NQ = _mm_xor_ps(Q, b3vMzeroMask);
			
 
				+		__m128i NQi = b3CastfTo128i(NQ);
			
 
				+
			
 
				+		V1 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 2, 3)));  // Y X Z W
			
 
				+		V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0, 0, 1, 3));                 // -X -X  Y  W
			
 
				+		V3 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(2, 1, 0, 3)));  // Z Y X W
			
 
				+		V1 = _mm_xor_ps(V1, b3vMPPP);                                       //	change the sign of the first element
			
 
				+
			
 
				+		V11 = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 1, 0, 3)));  // Y Y X W
			
 
				+		V21 = _mm_unpackhi_ps(Q, Q);                                         //  Z  Z  W  W
			
 
				+		V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(0, 2, 0, 3));                 //  X  Z -X -W
			
 
				+
			
 
				+		V2 = V2 * V1;   //
			
 
				+		V1 = V1 * V11;  //
			
 
				+		V3 = V3 * V31;  //
			
 
				+
			
 
				+		V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2, 3, 1, 3));                //	-Z -W  Y  W
			
 
				+		V11 = V11 * V21;                                                    //
			
 
				+		V21 = _mm_xor_ps(V21, b3vMPPP);                                     //	change the sign of the first element
			
 
				+		V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3, 3, 1, 3));                //	 W  W -Y -W
			
 
				+		V31 = _mm_xor_ps(V31, b3vMPPP);                                     //	change the sign of the first element
			
 
				+		Y = b3CastiTo128f(_mm_shuffle_epi32(NQi, B3_SHUFFLE(3, 2, 0, 3)));  // -W -Z -X -W
			
 
				+		Z = b3CastiTo128f(_mm_shuffle_epi32(Qi, B3_SHUFFLE(1, 0, 1, 3)));   //  Y  X  Y  W
			
 
				+
			
 
				+		vs = _mm_load_ss(&s);
			
 
				+		V21 = V21 * Y;
			
 
				+		V31 = V31 * Z;
			
 
				+
			
 
				+		V1 = V1 + V11;
			
 
				+		V2 = V2 + V21;
			
 
				+		V3 = V3 + V31;
			
 
				+
			
 
				+		vs = b3_splat3_ps(vs, 0);
			
 
				+		//	s ready
			
 
				+		V1 = V1 * vs;
			
 
				+		V2 = V2 * vs;
			
 
				+		V3 = V3 * vs;
			
 
				+
			
 
				+		V1 = V1 + b3v1000;
			
 
				+		V2 = V2 + b3v0100;
			
 
				+		V3 = V3 + b3v0010;
			
 
				+
			
 
				+		m_el[0] = b3MakeVector3(V1);
			
 
				+		m_el[1] = b3MakeVector3(V2);
			
 
				+		m_el[2] = b3MakeVector3(V3);
			
 
				+#else
			
 
				+		b3Scalar xs = q.getX() * s, ys = q.getY() * s, zs = q.getZ() * s;
			
 
				+		b3Scalar wx = q.getW() * xs, wy = q.getW() * ys, wz = q.getW() * zs;
			
 
				+		b3Scalar xx = q.getX() * xs, xy = q.getX() * ys, xz = q.getX() * zs;
			
 
				+		b3Scalar yy = q.getY() * ys, yz = q.getY() * zs, zz = q.getZ() * zs;
			
 
				+		setValue(
			
 
				+			b3Scalar(1.0) - (yy + zz), xy - wz, xz + wy,
			
 
				+			xy + wz, b3Scalar(1.0) - (xx + zz), yz - wx,
			
 
				+			xz - wy, yz + wx, b3Scalar(1.0) - (xx + yy));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Set the matrix from euler angles using YPR around YXZ respectively
			
 
				+	*  @param yaw Yaw about Y axis
			
 
				+	*  @param pitch Pitch about X axis
			
 
				+	*  @param roll Roll about Z axis 
			
 
				+	*/
			
 
				+	void setEulerYPR(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{
			
 
				+		setEulerZYX(roll, pitch, yaw);
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Set the matrix from euler angles YPR around ZYX axes
			
 
				+	* @param eulerX Roll about X axis
			
 
				+	* @param eulerY Pitch around Y axis
			
 
				+	* @param eulerZ Yaw aboud Z axis
			
 
				+	* 
			
 
				+	* These angles are used to produce a rotation matrix. The euler
			
 
				+	* angles are applied in ZYX order. I.e a vector is first rotated 
			
 
				+	* about X then Y and then Z
			
 
				+	**/
			
 
				+	void setEulerZYX(b3Scalar eulerX, b3Scalar eulerY, b3Scalar eulerZ)
			
 
				+	{
			
 
				+		///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
			
 
				+		b3Scalar ci(b3Cos(eulerX));
			
 
				+		b3Scalar cj(b3Cos(eulerY));
			
 
				+		b3Scalar ch(b3Cos(eulerZ));
			
 
				+		b3Scalar si(b3Sin(eulerX));
			
 
				+		b3Scalar sj(b3Sin(eulerY));
			
 
				+		b3Scalar sh(b3Sin(eulerZ));
			
 
				+		b3Scalar cc = ci * ch;
			
 
				+		b3Scalar cs = ci * sh;
			
 
				+		b3Scalar sc = si * ch;
			
 
				+		b3Scalar ss = si * sh;
			
 
				+
			
 
				+		setValue(cj * ch, sj * sc - cs, sj * cc + ss,
			
 
				+				 cj * sh, sj * ss + cc, sj * cs - sc,
			
 
				+				 -sj, cj * si, cj * ci);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set the matrix to the identity */
			
 
				+	void setIdentity()
			
 
				+	{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+		m_el[0] = b3MakeVector3(b3v1000);
			
 
				+		m_el[1] = b3MakeVector3(b3v0100);
			
 
				+		m_el[2] = b3MakeVector3(b3v0010);
			
 
				+#else
			
 
				+		setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
			
 
				+				 b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
			
 
				+				 b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	static const b3Matrix3x3& getIdentity()
			
 
				+	{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+		static const b3Matrix3x3
			
 
				+			identityMatrix(b3v1000, b3v0100, b3v0010);
			
 
				+#else
			
 
				+		static const b3Matrix3x3
			
 
				+			identityMatrix(
			
 
				+				b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0),
			
 
				+				b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0),
			
 
				+				b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0));
			
 
				+#endif
			
 
				+		return identityMatrix;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
			
 
				+	* @param m The array to be filled */
			
 
				+	void getOpenGLSubMatrix(b3Scalar * m) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 v0 = m_el[0].mVec128;
			
 
				+		__m128 v1 = m_el[1].mVec128;
			
 
				+		__m128 v2 = m_el[2].mVec128;  //  x2 y2 z2 w2
			
 
				+		__m128* vm = (__m128*)m;
			
 
				+		__m128 vT;
			
 
				+
			
 
				+		v2 = _mm_and_ps(v2, b3vFFF0fMask);  //  x2 y2 z2 0
			
 
				+
			
 
				+		vT = _mm_unpackhi_ps(v0, v1);  //	z0 z1 * *
			
 
				+		v0 = _mm_unpacklo_ps(v0, v1);  //	x0 x1 y0 y1
			
 
				+
			
 
				+		v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3));                    // y0 y1 y2 0
			
 
				+		v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3));                    // x0 x1 x2 0
			
 
				+		v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT)));  // z0 z1 z2 0
			
 
				+
			
 
				+		vm[0] = v0;
			
 
				+		vm[1] = v1;
			
 
				+		vm[2] = v2;
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		// note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
			
 
				+		static const uint32x2_t zMask = (const uint32x2_t){-1, 0};
			
 
				+		float32x4_t* vm = (float32x4_t*)m;
			
 
				+		float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128);               // {x0 x1 z0 z1}, {y0 y1 w0 w1}
			
 
				+		float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f));  // {x2  0 }, {y2 0}
			
 
				+		float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
			
 
				+		float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
			
 
				+		float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
			
 
				+		float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q);  // z0 z1 z2  0
			
 
				+
			
 
				+		vm[0] = v0;
			
 
				+		vm[1] = v1;
			
 
				+		vm[2] = v2;
			
 
				+#else
			
 
				+		m[0] = b3Scalar(m_el[0].getX());
			
 
				+		m[1] = b3Scalar(m_el[1].getX());
			
 
				+		m[2] = b3Scalar(m_el[2].getX());
			
 
				+		m[3] = b3Scalar(0.0);
			
 
				+		m[4] = b3Scalar(m_el[0].getY());
			
 
				+		m[5] = b3Scalar(m_el[1].getY());
			
 
				+		m[6] = b3Scalar(m_el[2].getY());
			
 
				+		m[7] = b3Scalar(0.0);
			
 
				+		m[8] = b3Scalar(m_el[0].getZ());
			
 
				+		m[9] = b3Scalar(m_el[1].getZ());
			
 
				+		m[10] = b3Scalar(m_el[2].getZ());
			
 
				+		m[11] = b3Scalar(0.0);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Get the matrix represented as a quaternion 
			
 
				+	* @param q The quaternion which will be set */
			
 
				+	void getRotation(b3Quaternion & q) const
			
 
				+	{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+		b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ();
			
 
				+		b3Scalar s, x;
			
 
				+
			
 
				+		union {
			
 
				+			b3SimdFloat4 vec;
			
 
				+			b3Scalar f[4];
			
 
				+		} temp;
			
 
				+
			
 
				+		if (trace > b3Scalar(0.0))
			
 
				+		{
			
 
				+			x = trace + b3Scalar(1.0);
			
 
				+
			
 
				+			temp.f[0] = m_el[2].getY() - m_el[1].getZ();
			
 
				+			temp.f[1] = m_el[0].getZ() - m_el[2].getX();
			
 
				+			temp.f[2] = m_el[1].getX() - m_el[0].getY();
			
 
				+			temp.f[3] = x;
			
 
				+			//temp.f[3]= s * b3Scalar(0.5);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			int i, j, k;
			
 
				+			if (m_el[0].getX() < m_el[1].getY())
			
 
				+			{
			
 
				+				if (m_el[1].getY() < m_el[2].getZ())
			
 
				+				{
			
 
				+					i = 2;
			
 
				+					j = 0;
			
 
				+					k = 1;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					i = 1;
			
 
				+					j = 2;
			
 
				+					k = 0;
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (m_el[0].getX() < m_el[2].getZ())
			
 
				+				{
			
 
				+					i = 2;
			
 
				+					j = 0;
			
 
				+					k = 1;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					i = 0;
			
 
				+					j = 1;
			
 
				+					k = 2;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			x = m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0);
			
 
				+
			
 
				+			temp.f[3] = (m_el[k][j] - m_el[j][k]);
			
 
				+			temp.f[j] = (m_el[j][i] + m_el[i][j]);
			
 
				+			temp.f[k] = (m_el[k][i] + m_el[i][k]);
			
 
				+			temp.f[i] = x;
			
 
				+			//temp.f[i] = s * b3Scalar(0.5);
			
 
				+		}
			
 
				+
			
 
				+		s = b3Sqrt(x);
			
 
				+		q.set128(temp.vec);
			
 
				+		s = b3Scalar(0.5) / s;
			
 
				+
			
 
				+		q *= s;
			
 
				+#else
			
 
				+		b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ();
			
 
				+
			
 
				+		b3Scalar temp[4];
			
 
				+
			
 
				+		if (trace > b3Scalar(0.0))
			
 
				+		{
			
 
				+			b3Scalar s = b3Sqrt(trace + b3Scalar(1.0));
			
 
				+			temp[3] = (s * b3Scalar(0.5));
			
 
				+			s = b3Scalar(0.5) / s;
			
 
				+
			
 
				+			temp[0] = ((m_el[2].getY() - m_el[1].getZ()) * s);
			
 
				+			temp[1] = ((m_el[0].getZ() - m_el[2].getX()) * s);
			
 
				+			temp[2] = ((m_el[1].getX() - m_el[0].getY()) * s);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			int i = m_el[0].getX() < m_el[1].getY() ? (m_el[1].getY() < m_el[2].getZ() ? 2 : 1) : (m_el[0].getX() < m_el[2].getZ() ? 2 : 0);
			
 
				+			int j = (i + 1) % 3;
			
 
				+			int k = (i + 2) % 3;
			
 
				+
			
 
				+			b3Scalar s = b3Sqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0));
			
 
				+			temp[i] = s * b3Scalar(0.5);
			
 
				+			s = b3Scalar(0.5) / s;
			
 
				+
			
 
				+			temp[3] = (m_el[k][j] - m_el[j][k]) * s;
			
 
				+			temp[j] = (m_el[j][i] + m_el[i][j]) * s;
			
 
				+			temp[k] = (m_el[k][i] + m_el[i][k]) * s;
			
 
				+		}
			
 
				+		q.setValue(temp[0], temp[1], temp[2], temp[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
			
 
				+	* @param yaw Yaw around Y axis
			
 
				+	* @param pitch Pitch around X axis
			
 
				+	* @param roll around Z axis */
			
 
				+	void getEulerYPR(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll) const
			
 
				+	{
			
 
				+		// first use the normal calculus
			
 
				+		yaw = b3Scalar(b3Atan2(m_el[1].getX(), m_el[0].getX()));
			
 
				+		pitch = b3Scalar(b3Asin(-m_el[2].getX()));
			
 
				+		roll = b3Scalar(b3Atan2(m_el[2].getY(), m_el[2].getZ()));
			
 
				+
			
 
				+		// on pitch = +/-HalfPI
			
 
				+		if (b3Fabs(pitch) == B3_HALF_PI)
			
 
				+		{
			
 
				+			if (yaw > 0)
			
 
				+				yaw -= B3_PI;
			
 
				+			else
			
 
				+				yaw += B3_PI;
			
 
				+
			
 
				+			if (roll > 0)
			
 
				+				roll -= B3_PI;
			
 
				+			else
			
 
				+				roll += B3_PI;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	/**@brief Get the matrix represented as euler angles around ZYX
			
 
				+	* @param yaw Yaw around X axis
			
 
				+	* @param pitch Pitch around Y axis
			
 
				+	* @param roll around X axis 
			
 
				+	* @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/
			
 
				+	void getEulerZYX(b3Scalar & yaw, b3Scalar & pitch, b3Scalar & roll, unsigned int solution_number = 1) const
			
 
				+	{
			
 
				+		struct Euler
			
 
				+		{
			
 
				+			b3Scalar yaw;
			
 
				+			b3Scalar pitch;
			
 
				+			b3Scalar roll;
			
 
				+		};
			
 
				+
			
 
				+		Euler euler_out;
			
 
				+		Euler euler_out2;  //second solution
			
 
				+		//get the pointer to the raw data
			
 
				+
			
 
				+		// Check that pitch is not at a singularity
			
 
				+		if (b3Fabs(m_el[2].getX()) >= 1)
			
 
				+		{
			
 
				+			euler_out.yaw = 0;
			
 
				+			euler_out2.yaw = 0;
			
 
				+
			
 
				+			// From difference of angles formula
			
 
				+			b3Scalar delta = b3Atan2(m_el[0].getX(), m_el[0].getZ());
			
 
				+			if (m_el[2].getX() > 0)  //gimbal locked up
			
 
				+			{
			
 
				+				euler_out.pitch = B3_PI / b3Scalar(2.0);
			
 
				+				euler_out2.pitch = B3_PI / b3Scalar(2.0);
			
 
				+				euler_out.roll = euler_out.pitch + delta;
			
 
				+				euler_out2.roll = euler_out.pitch + delta;
			
 
				+			}
			
 
				+			else  // gimbal locked down
			
 
				+			{
			
 
				+				euler_out.pitch = -B3_PI / b3Scalar(2.0);
			
 
				+				euler_out2.pitch = -B3_PI / b3Scalar(2.0);
			
 
				+				euler_out.roll = -euler_out.pitch + delta;
			
 
				+				euler_out2.roll = -euler_out.pitch + delta;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			euler_out.pitch = -b3Asin(m_el[2].getX());
			
 
				+			euler_out2.pitch = B3_PI - euler_out.pitch;
			
 
				+
			
 
				+			euler_out.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out.pitch),
			
 
				+									 m_el[2].getZ() / b3Cos(euler_out.pitch));
			
 
				+			euler_out2.roll = b3Atan2(m_el[2].getY() / b3Cos(euler_out2.pitch),
			
 
				+									  m_el[2].getZ() / b3Cos(euler_out2.pitch));
			
 
				+
			
 
				+			euler_out.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out.pitch),
			
 
				+									m_el[0].getX() / b3Cos(euler_out.pitch));
			
 
				+			euler_out2.yaw = b3Atan2(m_el[1].getX() / b3Cos(euler_out2.pitch),
			
 
				+									 m_el[0].getX() / b3Cos(euler_out2.pitch));
			
 
				+		}
			
 
				+
			
 
				+		if (solution_number == 1)
			
 
				+		{
			
 
				+			yaw = euler_out.yaw;
			
 
				+			pitch = euler_out.pitch;
			
 
				+			roll = euler_out.roll;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			yaw = euler_out2.yaw;
			
 
				+			pitch = euler_out2.pitch;
			
 
				+			roll = euler_out2.roll;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Create a scaled copy of the matrix 
			
 
				+	* @param s Scaling vector The elements of the vector will scale each column */
			
 
				+
			
 
				+	b3Matrix3x3 scaled(const b3Vector3& s) const
			
 
				+	{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+		return b3Matrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
			
 
				+#else
			
 
				+		return b3Matrix3x3(
			
 
				+			m_el[0].getX() * s.getX(), m_el[0].getY() * s.getY(), m_el[0].getZ() * s.getZ(),
			
 
				+			m_el[1].getX() * s.getX(), m_el[1].getY() * s.getY(), m_el[1].getZ() * s.getZ(),
			
 
				+			m_el[2].getX() * s.getX(), m_el[2].getY() * s.getY(), m_el[2].getZ() * s.getZ());
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the determinant of the matrix */
			
 
				+	b3Scalar determinant() const;
			
 
				+	/**@brief Return the adjoint of the matrix */
			
 
				+	b3Matrix3x3 adjoint() const;
			
 
				+	/**@brief Return the matrix with all values non negative */
			
 
				+	b3Matrix3x3 absolute() const;
			
 
				+	/**@brief Return the transpose of the matrix */
			
 
				+	b3Matrix3x3 transpose() const;
			
 
				+	/**@brief Return the inverse of the matrix */
			
 
				+	b3Matrix3x3 inverse() const;
			
 
				+
			
 
				+	b3Matrix3x3 transposeTimes(const b3Matrix3x3& m) const;
			
 
				+	b3Matrix3x3 timesTranspose(const b3Matrix3x3& m) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Scalar tdotx(const b3Vector3& v) const
			
 
				+	{
			
 
				+		return m_el[0].getX() * v.getX() + m_el[1].getX() * v.getY() + m_el[2].getX() * v.getZ();
			
 
				+	}
			
 
				+	B3_FORCE_INLINE b3Scalar tdoty(const b3Vector3& v) const
			
 
				+	{
			
 
				+		return m_el[0].getY() * v.getX() + m_el[1].getY() * v.getY() + m_el[2].getY() * v.getZ();
			
 
				+	}
			
 
				+	B3_FORCE_INLINE b3Scalar tdotz(const b3Vector3& v) const
			
 
				+	{
			
 
				+		return m_el[0].getZ() * v.getX() + m_el[1].getZ() * v.getY() + m_el[2].getZ() * v.getZ();
			
 
				+	}
			
 
				+
			
 
				+	/**@brief diagonalizes this matrix by the Jacobi method.
			
 
				+	* @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
			
 
				+	* coordinate system, i.e., old_this = rot * new_this * rot^T. 
			
 
				+	* @param threshold See iteration
			
 
				+	* @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied 
			
 
				+	* by the sum of the absolute values of the diagonal, or when maxSteps have been executed. 
			
 
				+	* 
			
 
				+	* Note that this matrix is assumed to be symmetric. 
			
 
				+	*/
			
 
				+	void diagonalize(b3Matrix3x3 & rot, b3Scalar threshold, int maxSteps)
			
 
				+	{
			
 
				+		rot.setIdentity();
			
 
				+		for (int step = maxSteps; step > 0; step--)
			
 
				+		{
			
 
				+			// find off-diagonal element [p][q] with largest magnitude
			
 
				+			int p = 0;
			
 
				+			int q = 1;
			
 
				+			int r = 2;
			
 
				+			b3Scalar max = b3Fabs(m_el[0][1]);
			
 
				+			b3Scalar v = b3Fabs(m_el[0][2]);
			
 
				+			if (v > max)
			
 
				+			{
			
 
				+				q = 2;
			
 
				+				r = 1;
			
 
				+				max = v;
			
 
				+			}
			
 
				+			v = b3Fabs(m_el[1][2]);
			
 
				+			if (v > max)
			
 
				+			{
			
 
				+				p = 1;
			
 
				+				q = 2;
			
 
				+				r = 0;
			
 
				+				max = v;
			
 
				+			}
			
 
				+
			
 
				+			b3Scalar t = threshold * (b3Fabs(m_el[0][0]) + b3Fabs(m_el[1][1]) + b3Fabs(m_el[2][2]));
			
 
				+			if (max <= t)
			
 
				+			{
			
 
				+				if (max <= B3_EPSILON * t)
			
 
				+				{
			
 
				+					return;
			
 
				+				}
			
 
				+				step = 1;
			
 
				+			}
			
 
				+
			
 
				+			// compute Jacobi rotation J which leads to a zero for element [p][q]
			
 
				+			b3Scalar mpq = m_el[p][q];
			
 
				+			b3Scalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
			
 
				+			b3Scalar theta2 = theta * theta;
			
 
				+			b3Scalar cos;
			
 
				+			b3Scalar sin;
			
 
				+			if (theta2 * theta2 < b3Scalar(10 / B3_EPSILON))
			
 
				+			{
			
 
				+				t = (theta >= 0) ? 1 / (theta + b3Sqrt(1 + theta2))
			
 
				+								 : 1 / (theta - b3Sqrt(1 + theta2));
			
 
				+				cos = 1 / b3Sqrt(1 + t * t);
			
 
				+				sin = cos * t;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// approximation for large theta-value, i.e., a nearly diagonal matrix
			
 
				+				t = 1 / (theta * (2 + b3Scalar(0.5) / theta2));
			
 
				+				cos = 1 - b3Scalar(0.5) * t * t;
			
 
				+				sin = cos * t;
			
 
				+			}
			
 
				+
			
 
				+			// apply rotation to matrix (this = J^T * this * J)
			
 
				+			m_el[p][q] = m_el[q][p] = 0;
			
 
				+			m_el[p][p] -= t * mpq;
			
 
				+			m_el[q][q] += t * mpq;
			
 
				+			b3Scalar mrp = m_el[r][p];
			
 
				+			b3Scalar mrq = m_el[r][q];
			
 
				+			m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
			
 
				+			m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
			
 
				+
			
 
				+			// apply rotation to rot (rot = rot * J)
			
 
				+			for (int i = 0; i < 3; i++)
			
 
				+			{
			
 
				+				b3Vector3& row = rot[i];
			
 
				+				mrp = row[p];
			
 
				+				mrq = row[q];
			
 
				+				row[p] = cos * mrp - sin * mrq;
			
 
				+				row[q] = cos * mrq + sin * mrp;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Calculate the matrix cofactor 
			
 
				+	* @param r1 The first row to use for calculating the cofactor
			
 
				+	* @param c1 The first column to use for calculating the cofactor
			
 
				+	* @param r1 The second row to use for calculating the cofactor
			
 
				+	* @param c1 The second column to use for calculating the cofactor
			
 
				+	* See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
			
 
				+	*/
			
 
				+	b3Scalar cofac(int r1, int c1, int r2, int c2) const
			
 
				+	{
			
 
				+		return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
			
 
				+	}
			
 
				+
			
 
				+	void serialize(struct b3Matrix3x3Data & dataOut) const;
			
 
				+
			
 
				+	void serializeFloat(struct b3Matrix3x3FloatData & dataOut) const;
			
 
				+
			
 
				+	void deSerialize(const struct b3Matrix3x3Data& dataIn);
			
 
				+
			
 
				+	void deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn);
			
 
				+
			
 
				+	void deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn);
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3&
			
 
				+b3Matrix3x3::operator*=(const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	__m128 rv00, rv01, rv02;
			
 
				+	__m128 rv10, rv11, rv12;
			
 
				+	__m128 rv20, rv21, rv22;
			
 
				+	__m128 mv0, mv1, mv2;
			
 
				+
			
 
				+	rv02 = m_el[0].mVec128;
			
 
				+	rv12 = m_el[1].mVec128;
			
 
				+	rv22 = m_el[2].mVec128;
			
 
				+
			
 
				+	mv0 = _mm_and_ps(m[0].mVec128, b3vFFF0fMask);
			
 
				+	mv1 = _mm_and_ps(m[1].mVec128, b3vFFF0fMask);
			
 
				+	mv2 = _mm_and_ps(m[2].mVec128, b3vFFF0fMask);
			
 
				+
			
 
				+	// rv0
			
 
				+	rv00 = b3_splat_ps(rv02, 0);
			
 
				+	rv01 = b3_splat_ps(rv02, 1);
			
 
				+	rv02 = b3_splat_ps(rv02, 2);
			
 
				+
			
 
				+	rv00 = _mm_mul_ps(rv00, mv0);
			
 
				+	rv01 = _mm_mul_ps(rv01, mv1);
			
 
				+	rv02 = _mm_mul_ps(rv02, mv2);
			
 
				+
			
 
				+	// rv1
			
 
				+	rv10 = b3_splat_ps(rv12, 0);
			
 
				+	rv11 = b3_splat_ps(rv12, 1);
			
 
				+	rv12 = b3_splat_ps(rv12, 2);
			
 
				+
			
 
				+	rv10 = _mm_mul_ps(rv10, mv0);
			
 
				+	rv11 = _mm_mul_ps(rv11, mv1);
			
 
				+	rv12 = _mm_mul_ps(rv12, mv2);
			
 
				+
			
 
				+	// rv2
			
 
				+	rv20 = b3_splat_ps(rv22, 0);
			
 
				+	rv21 = b3_splat_ps(rv22, 1);
			
 
				+	rv22 = b3_splat_ps(rv22, 2);
			
 
				+
			
 
				+	rv20 = _mm_mul_ps(rv20, mv0);
			
 
				+	rv21 = _mm_mul_ps(rv21, mv1);
			
 
				+	rv22 = _mm_mul_ps(rv22, mv2);
			
 
				+
			
 
				+	rv00 = _mm_add_ps(rv00, rv01);
			
 
				+	rv10 = _mm_add_ps(rv10, rv11);
			
 
				+	rv20 = _mm_add_ps(rv20, rv21);
			
 
				+
			
 
				+	m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
			
 
				+	m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
			
 
				+	m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+	float32x4_t rv0, rv1, rv2;
			
 
				+	float32x4_t v0, v1, v2;
			
 
				+	float32x4_t mv0, mv1, mv2;
			
 
				+
			
 
				+	v0 = m_el[0].mVec128;
			
 
				+	v1 = m_el[1].mVec128;
			
 
				+	v2 = m_el[2].mVec128;
			
 
				+
			
 
				+	mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask);
			
 
				+	mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask);
			
 
				+	mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask);
			
 
				+
			
 
				+	rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
			
 
				+	rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
			
 
				+	rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
			
 
				+
			
 
				+	rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
			
 
				+	rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
			
 
				+	rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
			
 
				+
			
 
				+	rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
			
 
				+	rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
			
 
				+	rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
			
 
				+
			
 
				+	m_el[0].mVec128 = rv0;
			
 
				+	m_el[1].mVec128 = rv1;
			
 
				+	m_el[2].mVec128 = rv2;
			
 
				+#else
			
 
				+	setValue(
			
 
				+		m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
			
 
				+		m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
			
 
				+		m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
			
 
				+#endif
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3&
			
 
				+b3Matrix3x3::operator+=(const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
			
 
				+	m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
			
 
				+	m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
			
 
				+#else
			
 
				+	setValue(
			
 
				+		m_el[0][0] + m.m_el[0][0],
			
 
				+		m_el[0][1] + m.m_el[0][1],
			
 
				+		m_el[0][2] + m.m_el[0][2],
			
 
				+		m_el[1][0] + m.m_el[1][0],
			
 
				+		m_el[1][1] + m.m_el[1][1],
			
 
				+		m_el[1][2] + m.m_el[1][2],
			
 
				+		m_el[2][0] + m.m_el[2][0],
			
 
				+		m_el[2][1] + m.m_el[2][1],
			
 
				+		m_el[2][2] + m.m_el[2][2]);
			
 
				+#endif
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+operator*(const b3Matrix3x3& m, const b3Scalar& k)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	__m128 vk = b3_splat_ps(_mm_load_ss((float*)&k), 0x80);
			
 
				+	return b3Matrix3x3(
			
 
				+		_mm_mul_ps(m[0].mVec128, vk),
			
 
				+		_mm_mul_ps(m[1].mVec128, vk),
			
 
				+		_mm_mul_ps(m[2].mVec128, vk));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3Matrix3x3(
			
 
				+		vmulq_n_f32(m[0].mVec128, k),
			
 
				+		vmulq_n_f32(m[1].mVec128, k),
			
 
				+		vmulq_n_f32(m[2].mVec128, k));
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m[0].getX() * k, m[0].getY() * k, m[0].getZ() * k,
			
 
				+		m[1].getX() * k, m[1].getY() * k, m[1].getZ() * k,
			
 
				+		m[2].getX() * k, m[2].getY() * k, m[2].getZ() * k);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+operator+(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	return b3Matrix3x3(
			
 
				+		m1[0].mVec128 + m2[0].mVec128,
			
 
				+		m1[1].mVec128 + m2[1].mVec128,
			
 
				+		m1[2].mVec128 + m2[2].mVec128);
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m1[0][0] + m2[0][0],
			
 
				+		m1[0][1] + m2[0][1],
			
 
				+		m1[0][2] + m2[0][2],
			
 
				+
			
 
				+		m1[1][0] + m2[1][0],
			
 
				+		m1[1][1] + m2[1][1],
			
 
				+		m1[1][2] + m2[1][2],
			
 
				+
			
 
				+		m1[2][0] + m2[2][0],
			
 
				+		m1[2][1] + m2[2][1],
			
 
				+		m1[2][2] + m2[2][2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+operator-(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	return b3Matrix3x3(
			
 
				+		m1[0].mVec128 - m2[0].mVec128,
			
 
				+		m1[1].mVec128 - m2[1].mVec128,
			
 
				+		m1[2].mVec128 - m2[2].mVec128);
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m1[0][0] - m2[0][0],
			
 
				+		m1[0][1] - m2[0][1],
			
 
				+		m1[0][2] - m2[0][2],
			
 
				+
			
 
				+		m1[1][0] - m2[1][0],
			
 
				+		m1[1][1] - m2[1][1],
			
 
				+		m1[1][2] - m2[1][2],
			
 
				+
			
 
				+		m1[2][0] - m2[2][0],
			
 
				+		m1[2][1] - m2[2][1],
			
 
				+		m1[2][2] - m2[2][2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3&
			
 
				+b3Matrix3x3::operator-=(const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
			
 
				+	m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
			
 
				+	m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
			
 
				+#else
			
 
				+	setValue(
			
 
				+		m_el[0][0] - m.m_el[0][0],
			
 
				+		m_el[0][1] - m.m_el[0][1],
			
 
				+		m_el[0][2] - m.m_el[0][2],
			
 
				+		m_el[1][0] - m.m_el[1][0],
			
 
				+		m_el[1][1] - m.m_el[1][1],
			
 
				+		m_el[1][2] - m.m_el[1][2],
			
 
				+		m_el[2][0] - m.m_el[2][0],
			
 
				+		m_el[2][1] - m.m_el[2][1],
			
 
				+		m_el[2][2] - m.m_el[2][2]);
			
 
				+#endif
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Matrix3x3::determinant() const
			
 
				+{
			
 
				+	return b3Triple((*this)[0], (*this)[1], (*this)[2]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+b3Matrix3x3::absolute() const
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	return b3Matrix3x3(
			
 
				+		_mm_and_ps(m_el[0].mVec128, b3vAbsfMask),
			
 
				+		_mm_and_ps(m_el[1].mVec128, b3vAbsfMask),
			
 
				+		_mm_and_ps(m_el[2].mVec128, b3vAbsfMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3Matrix3x3(
			
 
				+		(float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, b3v3AbsMask),
			
 
				+		(float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, b3v3AbsMask),
			
 
				+		(float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, b3v3AbsMask));
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		b3Fabs(m_el[0].getX()), b3Fabs(m_el[0].getY()), b3Fabs(m_el[0].getZ()),
			
 
				+		b3Fabs(m_el[1].getX()), b3Fabs(m_el[1].getY()), b3Fabs(m_el[1].getZ()),
			
 
				+		b3Fabs(m_el[2].getX()), b3Fabs(m_el[2].getY()), b3Fabs(m_el[2].getZ()));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+b3Matrix3x3::transpose() const
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	__m128 v0 = m_el[0].mVec128;
			
 
				+	__m128 v1 = m_el[1].mVec128;
			
 
				+	__m128 v2 = m_el[2].mVec128;  //  x2 y2 z2 w2
			
 
				+	__m128 vT;
			
 
				+
			
 
				+	v2 = _mm_and_ps(v2, b3vFFF0fMask);  //  x2 y2 z2 0
			
 
				+
			
 
				+	vT = _mm_unpackhi_ps(v0, v1);  //	z0 z1 * *
			
 
				+	v0 = _mm_unpacklo_ps(v0, v1);  //	x0 x1 y0 y1
			
 
				+
			
 
				+	v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3));                    // y0 y1 y2 0
			
 
				+	v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3));                    // x0 x1 x2 0
			
 
				+	v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT)));  // z0 z1 z2 0
			
 
				+
			
 
				+	return b3Matrix3x3(v0, v1, v2);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	// note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
			
 
				+	static const uint32x2_t zMask = (const uint32x2_t){-1, 0};
			
 
				+	float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128);               // {x0 x1 z0 z1}, {y0 y1 w0 w1}
			
 
				+	float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f));  // {x2  0 }, {y2 0}
			
 
				+	float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
			
 
				+	float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
			
 
				+	float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
			
 
				+	float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q);  // z0 z1 z2  0
			
 
				+	return b3Matrix3x3(v0, v1, v2);
			
 
				+#else
			
 
				+	return b3Matrix3x3(m_el[0].getX(), m_el[1].getX(), m_el[2].getX(),
			
 
				+					   m_el[0].getY(), m_el[1].getY(), m_el[2].getY(),
			
 
				+					   m_el[0].getZ(), m_el[1].getZ(), m_el[2].getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+b3Matrix3x3::adjoint() const
			
 
				+{
			
 
				+	return b3Matrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
			
 
				+					   cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
			
 
				+					   cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+b3Matrix3x3::inverse() const
			
 
				+{
			
 
				+	b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
			
 
				+	b3Scalar det = (*this)[0].dot(co);
			
 
				+	b3FullAssert(det != b3Scalar(0.0));
			
 
				+	b3Scalar s = b3Scalar(1.0) / det;
			
 
				+	return b3Matrix3x3(co.getX() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
			
 
				+					   co.getY() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
			
 
				+					   co.getZ() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+b3Matrix3x3::transposeTimes(const b3Matrix3x3& m) const
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	// zeros w
			
 
				+	//    static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
			
 
				+	__m128 row = m_el[0].mVec128;
			
 
				+	__m128 m0 = _mm_and_ps(m.getRow(0).mVec128, b3vFFF0fMask);
			
 
				+	__m128 m1 = _mm_and_ps(m.getRow(1).mVec128, b3vFFF0fMask);
			
 
				+	__m128 m2 = _mm_and_ps(m.getRow(2).mVec128, b3vFFF0fMask);
			
 
				+	__m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
			
 
				+	__m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
			
 
				+	__m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
			
 
				+	row = m_el[1].mVec128;
			
 
				+	r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
			
 
				+	r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
			
 
				+	r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
			
 
				+	row = m_el[2].mVec128;
			
 
				+	r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
			
 
				+	r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
			
 
				+	r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
			
 
				+	return b3Matrix3x3(r0, r1, r2);
			
 
				+
			
 
				+#elif defined B3_USE_NEON
			
 
				+	// zeros w
			
 
				+	static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
			
 
				+	float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask);
			
 
				+	float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask);
			
 
				+	float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask);
			
 
				+	float32x4_t row = m_el[0].mVec128;
			
 
				+	float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
			
 
				+	float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
			
 
				+	float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
			
 
				+	row = m_el[1].mVec128;
			
 
				+	r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
			
 
				+	r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
			
 
				+	r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
			
 
				+	row = m_el[2].mVec128;
			
 
				+	r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
			
 
				+	r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
			
 
				+	r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
			
 
				+	return b3Matrix3x3(r0, r1, r2);
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m_el[0].getX() * m[0].getX() + m_el[1].getX() * m[1].getX() + m_el[2].getX() * m[2].getX(),
			
 
				+		m_el[0].getX() * m[0].getY() + m_el[1].getX() * m[1].getY() + m_el[2].getX() * m[2].getY(),
			
 
				+		m_el[0].getX() * m[0].getZ() + m_el[1].getX() * m[1].getZ() + m_el[2].getX() * m[2].getZ(),
			
 
				+		m_el[0].getY() * m[0].getX() + m_el[1].getY() * m[1].getX() + m_el[2].getY() * m[2].getX(),
			
 
				+		m_el[0].getY() * m[0].getY() + m_el[1].getY() * m[1].getY() + m_el[2].getY() * m[2].getY(),
			
 
				+		m_el[0].getY() * m[0].getZ() + m_el[1].getY() * m[1].getZ() + m_el[2].getY() * m[2].getZ(),
			
 
				+		m_el[0].getZ() * m[0].getX() + m_el[1].getZ() * m[1].getX() + m_el[2].getZ() * m[2].getX(),
			
 
				+		m_el[0].getZ() * m[0].getY() + m_el[1].getZ() * m[1].getY() + m_el[2].getZ() * m[2].getY(),
			
 
				+		m_el[0].getZ() * m[0].getZ() + m_el[1].getZ() * m[1].getZ() + m_el[2].getZ() * m[2].getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+b3Matrix3x3::timesTranspose(const b3Matrix3x3& m) const
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	__m128 a0 = m_el[0].mVec128;
			
 
				+	__m128 a1 = m_el[1].mVec128;
			
 
				+	__m128 a2 = m_el[2].mVec128;
			
 
				+
			
 
				+	b3Matrix3x3 mT = m.transpose();  // we rely on transpose() zeroing w channel so that we don't have to do it here
			
 
				+	__m128 mx = mT[0].mVec128;
			
 
				+	__m128 my = mT[1].mVec128;
			
 
				+	__m128 mz = mT[2].mVec128;
			
 
				+
			
 
				+	__m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
			
 
				+	__m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
			
 
				+	__m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
			
 
				+	r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
			
 
				+	r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
			
 
				+	r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
			
 
				+	r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
			
 
				+	r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
			
 
				+	r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
			
 
				+	return b3Matrix3x3(r0, r1, r2);
			
 
				+
			
 
				+#elif defined B3_USE_NEON
			
 
				+	float32x4_t a0 = m_el[0].mVec128;
			
 
				+	float32x4_t a1 = m_el[1].mVec128;
			
 
				+	float32x4_t a2 = m_el[2].mVec128;
			
 
				+
			
 
				+	b3Matrix3x3 mT = m.transpose();  // we rely on transpose() zeroing w channel so that we don't have to do it here
			
 
				+	float32x4_t mx = mT[0].mVec128;
			
 
				+	float32x4_t my = mT[1].mVec128;
			
 
				+	float32x4_t mz = mT[2].mVec128;
			
 
				+
			
 
				+	float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
			
 
				+	float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
			
 
				+	float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
			
 
				+	r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
			
 
				+	r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
			
 
				+	r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
			
 
				+	r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
			
 
				+	r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
			
 
				+	r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
			
 
				+	return b3Matrix3x3(r0, r1, r2);
			
 
				+
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
			
 
				+		m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
			
 
				+		m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator*(const b3Matrix3x3& m, const b3Vector3& v)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	return v.dot3(m[0], m[1], m[2]);
			
 
				+#else
			
 
				+	return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator*(const b3Vector3& v, const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+
			
 
				+	const __m128 vv = v.mVec128;
			
 
				+
			
 
				+	__m128 c0 = b3_splat_ps(vv, 0);
			
 
				+	__m128 c1 = b3_splat_ps(vv, 1);
			
 
				+	__m128 c2 = b3_splat_ps(vv, 2);
			
 
				+
			
 
				+	c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, b3vFFF0fMask));
			
 
				+	c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, b3vFFF0fMask));
			
 
				+	c0 = _mm_add_ps(c0, c1);
			
 
				+	c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, b3vFFF0fMask));
			
 
				+
			
 
				+	return b3MakeVector3(_mm_add_ps(c0, c2));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	const float32x4_t vv = v.mVec128;
			
 
				+	const float32x2_t vlo = vget_low_f32(vv);
			
 
				+	const float32x2_t vhi = vget_high_f32(vv);
			
 
				+
			
 
				+	float32x4_t c0, c1, c2;
			
 
				+
			
 
				+	c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask);
			
 
				+	c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask);
			
 
				+	c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask);
			
 
				+
			
 
				+	c0 = vmulq_lane_f32(c0, vlo, 0);
			
 
				+	c1 = vmulq_lane_f32(c1, vlo, 1);
			
 
				+	c2 = vmulq_lane_f32(c2, vhi, 0);
			
 
				+	c0 = vaddq_f32(c0, c1);
			
 
				+	c0 = vaddq_f32(c0, c2);
			
 
				+
			
 
				+	return b3MakeVector3(c0);
			
 
				+#else
			
 
				+	return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+operator*(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+
			
 
				+	__m128 m10 = m1[0].mVec128;
			
 
				+	__m128 m11 = m1[1].mVec128;
			
 
				+	__m128 m12 = m1[2].mVec128;
			
 
				+
			
 
				+	__m128 m2v = _mm_and_ps(m2[0].mVec128, b3vFFF0fMask);
			
 
				+
			
 
				+	__m128 c0 = b3_splat_ps(m10, 0);
			
 
				+	__m128 c1 = b3_splat_ps(m11, 0);
			
 
				+	__m128 c2 = b3_splat_ps(m12, 0);
			
 
				+
			
 
				+	c0 = _mm_mul_ps(c0, m2v);
			
 
				+	c1 = _mm_mul_ps(c1, m2v);
			
 
				+	c2 = _mm_mul_ps(c2, m2v);
			
 
				+
			
 
				+	m2v = _mm_and_ps(m2[1].mVec128, b3vFFF0fMask);
			
 
				+
			
 
				+	__m128 c0_1 = b3_splat_ps(m10, 1);
			
 
				+	__m128 c1_1 = b3_splat_ps(m11, 1);
			
 
				+	__m128 c2_1 = b3_splat_ps(m12, 1);
			
 
				+
			
 
				+	c0_1 = _mm_mul_ps(c0_1, m2v);
			
 
				+	c1_1 = _mm_mul_ps(c1_1, m2v);
			
 
				+	c2_1 = _mm_mul_ps(c2_1, m2v);
			
 
				+
			
 
				+	m2v = _mm_and_ps(m2[2].mVec128, b3vFFF0fMask);
			
 
				+
			
 
				+	c0 = _mm_add_ps(c0, c0_1);
			
 
				+	c1 = _mm_add_ps(c1, c1_1);
			
 
				+	c2 = _mm_add_ps(c2, c2_1);
			
 
				+
			
 
				+	m10 = b3_splat_ps(m10, 2);
			
 
				+	m11 = b3_splat_ps(m11, 2);
			
 
				+	m12 = b3_splat_ps(m12, 2);
			
 
				+
			
 
				+	m10 = _mm_mul_ps(m10, m2v);
			
 
				+	m11 = _mm_mul_ps(m11, m2v);
			
 
				+	m12 = _mm_mul_ps(m12, m2v);
			
 
				+
			
 
				+	c0 = _mm_add_ps(c0, m10);
			
 
				+	c1 = _mm_add_ps(c1, m11);
			
 
				+	c2 = _mm_add_ps(c2, m12);
			
 
				+
			
 
				+	return b3Matrix3x3(c0, c1, c2);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+	float32x4_t rv0, rv1, rv2;
			
 
				+	float32x4_t v0, v1, v2;
			
 
				+	float32x4_t mv0, mv1, mv2;
			
 
				+
			
 
				+	v0 = m1[0].mVec128;
			
 
				+	v1 = m1[1].mVec128;
			
 
				+	v2 = m1[2].mVec128;
			
 
				+
			
 
				+	mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, b3vFFF0Mask);
			
 
				+	mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, b3vFFF0Mask);
			
 
				+	mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, b3vFFF0Mask);
			
 
				+
			
 
				+	rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
			
 
				+	rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
			
 
				+	rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
			
 
				+
			
 
				+	rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
			
 
				+	rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
			
 
				+	rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
			
 
				+
			
 
				+	rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
			
 
				+	rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
			
 
				+	rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
			
 
				+
			
 
				+	return b3Matrix3x3(rv0, rv1, rv2);
			
 
				+
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
			
 
				+		m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
			
 
				+		m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+B3_FORCE_INLINE b3Matrix3x3 b3MultTransposeLeft(const b3Matrix3x3& m1, const b3Matrix3x3& m2) {
			
 
				+return b3Matrix3x3(
			
 
				+m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
			
 
				+m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
			
 
				+m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
			
 
				+m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
			
 
				+m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
			
 
				+m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
			
 
				+m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
			
 
				+m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
			
 
				+m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+/**@brief Equality operator between two matrices
			
 
				+* It will test all elements are equal.  */
			
 
				+B3_FORCE_INLINE bool operator==(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+
			
 
				+	__m128 c0, c1, c2;
			
 
				+
			
 
				+	c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
			
 
				+	c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
			
 
				+	c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
			
 
				+
			
 
				+	c0 = _mm_and_ps(c0, c1);
			
 
				+	c0 = _mm_and_ps(c0, c2);
			
 
				+
			
 
				+	return (0x7 == _mm_movemask_ps((__m128)c0));
			
 
				+#else
			
 
				+	return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
			
 
				+			m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
			
 
				+			m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+///for serialization
			
 
				+struct b3Matrix3x3FloatData
			
 
				+{
			
 
				+	b3Vector3FloatData m_el[3];
			
 
				+};
			
 
				+
			
 
				+///for serialization
			
 
				+struct b3Matrix3x3DoubleData
			
 
				+{
			
 
				+	b3Vector3DoubleData m_el[3];
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Matrix3x3::serialize(struct b3Matrix3x3Data& dataOut) const
			
 
				+{
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+		m_el[i].serialize(dataOut.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Matrix3x3::serializeFloat(struct b3Matrix3x3FloatData& dataOut) const
			
 
				+{
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+		m_el[i].serializeFloat(dataOut.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Matrix3x3::deSerialize(const struct b3Matrix3x3Data& dataIn)
			
 
				+{
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+		m_el[i].deSerialize(dataIn.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Matrix3x3::deSerializeFloat(const struct b3Matrix3x3FloatData& dataIn)
			
 
				+{
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+		m_el[i].deSerializeFloat(dataIn.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Matrix3x3::deSerializeDouble(const struct b3Matrix3x3DoubleData& dataIn)
			
 
				+{
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+		m_el[i].deSerializeDouble(dataIn.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_MATRIX3x3_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3MinMax.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3MinMax.h
@@ -0,0 +1,69 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_GEN_MINMAX_H
			
 
				+#define B3_GEN_MINMAX_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE const T& b3Min(const T& a, const T& b)
			
 
				+{
			
 
				+	return a < b ? a : b;
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE const T& b3Max(const T& a, const T& b)
			
 
				+{
			
 
				+	return a > b ? a : b;
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE const T& b3Clamped(const T& a, const T& lb, const T& ub)
			
 
				+{
			
 
				+	return a < lb ? lb : (ub < a ? ub : a);
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3SetMin(T& a, const T& b)
			
 
				+{
			
 
				+	if (b < a)
			
 
				+	{
			
 
				+		a = b;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3SetMax(T& a, const T& b)
			
 
				+{
			
 
				+	if (a < b)
			
 
				+	{
			
 
				+		a = b;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3Clamp(T& a, const T& lb, const T& ub)
			
 
				+{
			
 
				+	if (a < lb)
			
 
				+	{
			
 
				+		a = lb;
			
 
				+	}
			
 
				+	else if (ub < a)
			
 
				+	{
			
 
				+		a = ub;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_GEN_MINMAX_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3PoolAllocator.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3PoolAllocator.h
@@ -0,0 +1,121 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef _BT_POOL_ALLOCATOR_H
			
 
				+#define _BT_POOL_ALLOCATOR_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+///The b3PoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately.
			
 
				+class b3PoolAllocator
			
 
				+{
			
 
				+	int m_elemSize;
			
 
				+	int m_maxElements;
			
 
				+	int m_freeCount;
			
 
				+	void* m_firstFree;
			
 
				+	unsigned char* m_pool;
			
 
				+
			
 
				+public:
			
 
				+	b3PoolAllocator(int elemSize, int maxElements)
			
 
				+		: m_elemSize(elemSize),
			
 
				+		  m_maxElements(maxElements)
			
 
				+	{
			
 
				+		m_pool = (unsigned char*)b3AlignedAlloc(static_cast<unsigned int>(m_elemSize * m_maxElements), 16);
			
 
				+
			
 
				+		unsigned char* p = m_pool;
			
 
				+		m_firstFree = p;
			
 
				+		m_freeCount = m_maxElements;
			
 
				+		int count = m_maxElements;
			
 
				+		while (--count)
			
 
				+		{
			
 
				+			*(void**)p = (p + m_elemSize);
			
 
				+			p += m_elemSize;
			
 
				+		}
			
 
				+		*(void**)p = 0;
			
 
				+	}
			
 
				+
			
 
				+	~b3PoolAllocator()
			
 
				+	{
			
 
				+		b3AlignedFree(m_pool);
			
 
				+	}
			
 
				+
			
 
				+	int getFreeCount() const
			
 
				+	{
			
 
				+		return m_freeCount;
			
 
				+	}
			
 
				+
			
 
				+	int getUsedCount() const
			
 
				+	{
			
 
				+		return m_maxElements - m_freeCount;
			
 
				+	}
			
 
				+
			
 
				+	int getMaxCount() const
			
 
				+	{
			
 
				+		return m_maxElements;
			
 
				+	}
			
 
				+
			
 
				+	void* allocate(int size)
			
 
				+	{
			
 
				+		// release mode fix
			
 
				+		(void)size;
			
 
				+		b3Assert(!size || size <= m_elemSize);
			
 
				+		b3Assert(m_freeCount > 0);
			
 
				+		void* result = m_firstFree;
			
 
				+		m_firstFree = *(void**)m_firstFree;
			
 
				+		--m_freeCount;
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				+	bool validPtr(void* ptr)
			
 
				+	{
			
 
				+		if (ptr)
			
 
				+		{
			
 
				+			if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize))
			
 
				+			{
			
 
				+				return true;
			
 
				+			}
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	void freeMemory(void* ptr)
			
 
				+	{
			
 
				+		if (ptr)
			
 
				+		{
			
 
				+			b3Assert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize);
			
 
				+
			
 
				+			*(void**)ptr = m_firstFree;
			
 
				+			m_firstFree = ptr;
			
 
				+			++m_freeCount;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int getElementSize() const
			
 
				+	{
			
 
				+		return m_elemSize;
			
 
				+	}
			
 
				+
			
 
				+	unsigned char* getPoolAddress()
			
 
				+	{
			
 
				+		return m_pool;
			
 
				+	}
			
 
				+
			
 
				+	const unsigned char* getPoolAddress() const
			
 
				+	{
			
 
				+		return m_pool;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //_BT_POOL_ALLOCATOR_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3QuadWord.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3QuadWord.h
@@ -0,0 +1,242 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SIMD_QUADWORD_H
			
 
				+#define B3_SIMD_QUADWORD_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+#include "b3MinMax.h"
			
 
				+
			
 
				+#if defined(__CELLOS_LV2) && defined(__SPU__)
			
 
				+#include <altivec.h>
			
 
				+#endif
			
 
				+
			
 
				+/**@brief The b3QuadWord class is base class for b3Vector3 and b3Quaternion. 
			
 
				+ * Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword.
			
 
				+ */
			
 
				+#ifndef USE_LIBSPE2
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3QuadWord
			
 
				+#else
			
 
				+class b3QuadWord
			
 
				+#endif
			
 
				+{
			
 
				+protected:
			
 
				+#if defined(__SPU__) && defined(__CELLOS_LV2__)
			
 
				+	union {
			
 
				+		vec_float4 mVec128;
			
 
				+		b3Scalar m_floats[4];
			
 
				+	};
			
 
				+
			
 
				+public:
			
 
				+	vec_float4 get128() const
			
 
				+	{
			
 
				+		return mVec128;
			
 
				+	}
			
 
				+
			
 
				+#else  //__CELLOS_LV2__ __SPU__
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+public:
			
 
				+	union {
			
 
				+		b3SimdFloat4 mVec128;
			
 
				+		b3Scalar m_floats[4];
			
 
				+		struct
			
 
				+		{
			
 
				+			b3Scalar x, y, z, w;
			
 
				+		};
			
 
				+	};
			
 
				+
			
 
				+public:
			
 
				+	B3_FORCE_INLINE b3SimdFloat4 get128() const
			
 
				+	{
			
 
				+		return mVec128;
			
 
				+	}
			
 
				+	B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
			
 
				+	{
			
 
				+		mVec128 = v128;
			
 
				+	}
			
 
				+#else
			
 
				+public:
			
 
				+	union {
			
 
				+		b3Scalar m_floats[4];
			
 
				+		struct
			
 
				+		{
			
 
				+			b3Scalar x, y, z, w;
			
 
				+		};
			
 
				+	};
			
 
				+#endif  // B3_USE_SSE
			
 
				+
			
 
				+#endif  //__CELLOS_LV2__ __SPU__
			
 
				+
			
 
				+public:
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+
			
 
				+	// Set Vector
			
 
				+	B3_FORCE_INLINE b3QuadWord(const b3SimdFloat4 vec)
			
 
				+	{
			
 
				+		mVec128 = vec;
			
 
				+	}
			
 
				+
			
 
				+	// Copy constructor
			
 
				+	B3_FORCE_INLINE b3QuadWord(const b3QuadWord& rhs)
			
 
				+	{
			
 
				+		mVec128 = rhs.mVec128;
			
 
				+	}
			
 
				+
			
 
				+	// Assignment Operator
			
 
				+	B3_FORCE_INLINE b3QuadWord&
			
 
				+	operator=(const b3QuadWord& v)
			
 
				+	{
			
 
				+		mVec128 = v.mVec128;
			
 
				+
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+	/**@brief Return the x value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
			
 
				+	/**@brief Return the y value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
			
 
				+	/**@brief Return the z value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
			
 
				+	/**@brief Set the x value */
			
 
				+	B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
			
 
				+	/**@brief Set the y value */
			
 
				+	B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
			
 
				+	/**@brief Set the z value */
			
 
				+	B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
			
 
				+	/**@brief Set the w value */
			
 
				+	B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
			
 
				+	/**@brief Return the x value */
			
 
				+
			
 
				+	//B3_FORCE_INLINE b3Scalar&       operator[](int i)       { return (&m_floats[0])[i];	}
			
 
				+	//B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
			
 
				+	///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
			
 
				+	B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
			
 
				+	B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
			
 
				+
			
 
				+	B3_FORCE_INLINE bool operator==(const b3QuadWord& other) const
			
 
				+	{
			
 
				+#ifdef B3_USE_SSE
			
 
				+		return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
			
 
				+#else
			
 
				+		return ((m_floats[3] == other.m_floats[3]) &&
			
 
				+				(m_floats[2] == other.m_floats[2]) &&
			
 
				+				(m_floats[1] == other.m_floats[1]) &&
			
 
				+				(m_floats[0] == other.m_floats[0]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE bool operator!=(const b3QuadWord& other) const
			
 
				+	{
			
 
				+		return !(*this == other);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set x,y,z and zero w 
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
			
 
				+	{
			
 
				+		m_floats[0] = _x;
			
 
				+		m_floats[1] = _y;
			
 
				+		m_floats[2] = _z;
			
 
				+		m_floats[3] = 0.f;
			
 
				+	}
			
 
				+
			
 
				+	/*		void getValue(b3Scalar *m) const 
			
 
				+		{
			
 
				+			m[0] = m_floats[0];
			
 
				+			m[1] = m_floats[1];
			
 
				+			m[2] = m_floats[2];
			
 
				+		}
			
 
				+*/
			
 
				+	/**@brief Set the values 
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   * @param w Value of w
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
			
 
				+	{
			
 
				+		m_floats[0] = _x;
			
 
				+		m_floats[1] = _y;
			
 
				+		m_floats[2] = _z;
			
 
				+		m_floats[3] = _w;
			
 
				+	}
			
 
				+	/**@brief No initialization constructor */
			
 
				+	B3_FORCE_INLINE b3QuadWord()
			
 
				+	//	:m_floats[0](b3Scalar(0.)),m_floats[1](b3Scalar(0.)),m_floats[2](b3Scalar(0.)),m_floats[3](b3Scalar(0.))
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Three argument constructor (zeros w)
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   */
			
 
				+	B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
			
 
				+	{
			
 
				+		m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Initializing constructor
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   * @param w Value of w
			
 
				+   */
			
 
				+	B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
			
 
				+	{
			
 
				+		m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set each element to the max of the current values and the values of another b3QuadWord
			
 
				+   * @param other The other b3QuadWord to compare with 
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setMax(const b3QuadWord& other)
			
 
				+	{
			
 
				+#ifdef B3_USE_SSE
			
 
				+		mVec128 = _mm_max_ps(mVec128, other.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmaxq_f32(mVec128, other.mVec128);
			
 
				+#else
			
 
				+		b3SetMax(m_floats[0], other.m_floats[0]);
			
 
				+		b3SetMax(m_floats[1], other.m_floats[1]);
			
 
				+		b3SetMax(m_floats[2], other.m_floats[2]);
			
 
				+		b3SetMax(m_floats[3], other.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+	/**@brief Set each element to the min of the current values and the values of another b3QuadWord
			
 
				+   * @param other The other b3QuadWord to compare with 
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setMin(const b3QuadWord& other)
			
 
				+	{
			
 
				+#ifdef B3_USE_SSE
			
 
				+		mVec128 = _mm_min_ps(mVec128, other.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vminq_f32(mVec128, other.mVec128);
			
 
				+#else
			
 
				+		b3SetMin(m_floats[0], other.m_floats[0]);
			
 
				+		b3SetMin(m_floats[1], other.m_floats[1]);
			
 
				+		b3SetMin(m_floats[2], other.m_floats[2]);
			
 
				+		b3SetMin(m_floats[3], other.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_SIMD_QUADWORD_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Quaternion.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Quaternion.h
@@ -0,0 +1,908 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SIMD__QUATERNION_H_
			
 
				+#define B3_SIMD__QUATERNION_H_
			
 
				+
			
 
				+#include "b3Vector3.h"
			
 
				+#include "b3QuadWord.h"
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
			
 
				+class b3Quaternion : public b3QuadWord
			
 
				+{
			
 
				+public:
			
 
				+	/**@brief No initialization constructor */
			
 
				+	b3Quaternion() {}
			
 
				+
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+	// Set Vector
			
 
				+	B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
			
 
				+	{
			
 
				+		mVec128 = vec;
			
 
				+	}
			
 
				+
			
 
				+	// Copy constructor
			
 
				+	B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
			
 
				+	{
			
 
				+		mVec128 = rhs.mVec128;
			
 
				+	}
			
 
				+
			
 
				+	// Assignment Operator
			
 
				+	B3_FORCE_INLINE b3Quaternion&
			
 
				+	operator=(const b3Quaternion& v)
			
 
				+	{
			
 
				+		mVec128 = v.mVec128;
			
 
				+
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+	//		template <typename b3Scalar>
			
 
				+	//		explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
			
 
				+	/**@brief Constructor from scalars */
			
 
				+	b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
			
 
				+		: b3QuadWord(_x, _y, _z, _w)
			
 
				+	{
			
 
				+		//b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
			
 
				+	}
			
 
				+	/**@brief Axis angle Constructor
			
 
				+   * @param axis The axis which the rotation is around
			
 
				+   * @param angle The magnitude of the rotation around the angle (Radians) */
			
 
				+	b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
			
 
				+	{
			
 
				+		setRotation(_axis, _angle);
			
 
				+	}
			
 
				+	/**@brief Constructor from Euler angles
			
 
				+   * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
			
 
				+   * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
			
 
				+   * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
			
 
				+	b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{
			
 
				+#ifndef B3_EULER_DEFAULT_ZYX
			
 
				+		setEuler(yaw, pitch, roll);
			
 
				+#else
			
 
				+		setEulerZYX(yaw, pitch, roll);
			
 
				+#endif
			
 
				+	}
			
 
				+	/**@brief Set the rotation using axis angle notation 
			
 
				+   * @param axis The axis around which to rotate
			
 
				+   * @param angle The magnitude of the rotation in Radians */
			
 
				+	void setRotation(const b3Vector3& axis1, const b3Scalar& _angle)
			
 
				+	{
			
 
				+		b3Vector3 axis = axis1;
			
 
				+		axis.safeNormalize();
			
 
				+		
			
 
				+		b3Scalar d = axis.length();
			
 
				+		b3Assert(d != b3Scalar(0.0));
			
 
				+		if (d < B3_EPSILON)
			
 
				+		{
			
 
				+			setValue(0, 0, 0, 1);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
			
 
				+			setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
			
 
				+				b3Cos(_angle * b3Scalar(0.5)));
			
 
				+		}
			
 
				+	}
			
 
				+	/**@brief Set the quaternion using Euler angles
			
 
				+   * @param yaw Angle around Y
			
 
				+   * @param pitch Angle around X
			
 
				+   * @param roll Angle around Z */
			
 
				+	void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{
			
 
				+		b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
			
 
				+		b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
			
 
				+		b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
			
 
				+		b3Scalar cosYaw = b3Cos(halfYaw);
			
 
				+		b3Scalar sinYaw = b3Sin(halfYaw);
			
 
				+		b3Scalar cosPitch = b3Cos(halfPitch);
			
 
				+		b3Scalar sinPitch = b3Sin(halfPitch);
			
 
				+		b3Scalar cosRoll = b3Cos(halfRoll);
			
 
				+		b3Scalar sinRoll = b3Sin(halfRoll);
			
 
				+		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
			
 
				+				 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
			
 
				+				 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
			
 
				+				 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set the quaternion using euler angles 
			
 
				+   * @param yaw Angle around Z
			
 
				+   * @param pitch Angle around Y
			
 
				+   * @param roll Angle around X */
			
 
				+	void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX)
			
 
				+	{
			
 
				+		b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
			
 
				+		b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
			
 
				+		b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
			
 
				+		b3Scalar cosYaw = b3Cos(halfYaw);
			
 
				+		b3Scalar sinYaw = b3Sin(halfYaw);
			
 
				+		b3Scalar cosPitch = b3Cos(halfPitch);
			
 
				+		b3Scalar sinPitch = b3Sin(halfPitch);
			
 
				+		b3Scalar cosRoll = b3Cos(halfRoll);
			
 
				+		b3Scalar sinRoll = b3Sin(halfRoll);
			
 
				+		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,   //x
			
 
				+				 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,   //y
			
 
				+				 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,   //z
			
 
				+				 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);  //formerly yzx
			
 
				+		normalize();
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Get the euler angles from this quaternion
			
 
				+	   * @param yaw Angle around Z
			
 
				+	   * @param pitch Angle around Y
			
 
				+	   * @param roll Angle around X */
			
 
				+	void getEulerZYX(b3Scalar& yawZ, b3Scalar& pitchY, b3Scalar& rollX) const
			
 
				+	{
			
 
				+		b3Scalar squ;
			
 
				+		b3Scalar sqx;
			
 
				+		b3Scalar sqy;
			
 
				+		b3Scalar sqz;
			
 
				+		b3Scalar sarg;
			
 
				+		sqx = m_floats[0] * m_floats[0];
			
 
				+		sqy = m_floats[1] * m_floats[1];
			
 
				+		sqz = m_floats[2] * m_floats[2];
			
 
				+		squ = m_floats[3] * m_floats[3];
			
 
				+		rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
			
 
				+		sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
			
 
				+		pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
			
 
				+		yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Add two quaternions
			
 
				+   * @param q The quaternion to add to this one */
			
 
				+	B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_add_ps(mVec128, q.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vaddq_f32(mVec128, q.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] += q.getX();
			
 
				+		m_floats[1] += q.getY();
			
 
				+		m_floats[2] += q.getZ();
			
 
				+		m_floats[3] += q.m_floats[3];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Subtract out a quaternion
			
 
				+   * @param q The quaternion to subtract from this one */
			
 
				+	b3Quaternion& operator-=(const b3Quaternion& q)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vsubq_f32(mVec128, q.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] -= q.getX();
			
 
				+		m_floats[1] -= q.getY();
			
 
				+		m_floats[2] -= q.getZ();
			
 
				+		m_floats[3] -= q.m_floats[3];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Scale this quaternion
			
 
				+   * @param s The scalar to scale by */
			
 
				+	b3Quaternion& operator*=(const b3Scalar& s)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0);     //	(S S S S)
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vs);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmulq_n_f32(mVec128, s);
			
 
				+#else
			
 
				+		m_floats[0] *= s;
			
 
				+		m_floats[1] *= s;
			
 
				+		m_floats[2] *= s;
			
 
				+		m_floats[3] *= s;
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Multiply this quaternion by q on the right
			
 
				+   * @param q The other quaternion 
			
 
				+   * Equivilant to this = this * q */
			
 
				+	b3Quaternion& operator*=(const b3Quaternion& q)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vQ2 = q.get128();
			
 
				+
			
 
				+		__m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0));
			
 
				+		__m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));
			
 
				+
			
 
				+		A1 = A1 * B1;
			
 
				+
			
 
				+		__m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1));
			
 
				+		__m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
			
 
				+
			
 
				+		A2 = A2 * B2;
			
 
				+
			
 
				+		B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2));
			
 
				+		B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
			
 
				+
			
 
				+		B1 = B1 * B2;  //	A3 *= B3
			
 
				+
			
 
				+		mVec128 = b3_splat_ps(mVec128, 3);  //	A0
			
 
				+		mVec128 = mVec128 * vQ2;            //	A0 * B0
			
 
				+
			
 
				+		A1 = A1 + A2;                  //	AB12
			
 
				+		mVec128 = mVec128 - B1;        //	AB03 = AB0 - AB3
			
 
				+		A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
			
 
				+		mVec128 = mVec128 + A1;        //	AB03 + AB12
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+		float32x4_t vQ1 = mVec128;
			
 
				+		float32x4_t vQ2 = q.get128();
			
 
				+		float32x4_t A0, A1, B1, A2, B2, A3, B3;
			
 
				+		float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
			
 
				+
			
 
				+		{
			
 
				+			float32x2x2_t tmp;
			
 
				+			tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
			
 
				+			vQ1zx = tmp.val[0];
			
 
				+
			
 
				+			tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
			
 
				+			vQ2zx = tmp.val[0];
			
 
				+		}
			
 
				+		vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
			
 
				+
			
 
				+		vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+		vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+		vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+		A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
			
 
				+		B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
			
 
				+
			
 
				+		A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+		B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+		A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
			
 
				+		B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
			
 
				+
			
 
				+		A1 = vmulq_f32(A1, B1);
			
 
				+		A2 = vmulq_f32(A2, B2);
			
 
				+		A3 = vmulq_f32(A3, B3);                           //	A3 *= B3
			
 
				+		A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //	A0 * B0
			
 
				+
			
 
				+		A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
			
 
				+		A0 = vsubq_f32(A0, A3);  //	AB03 = AB0 - AB3
			
 
				+
			
 
				+		//	change the sign of the last element
			
 
				+		A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
			
 
				+		A0 = vaddq_f32(A0, A1);  //	AB03 + AB12
			
 
				+
			
 
				+		mVec128 = A0;
			
 
				+#else
			
 
				+		setValue(
			
 
				+			m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
			
 
				+			m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
			
 
				+			m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
			
 
				+			m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+	/**@brief Return the dot product between this quaternion and another
			
 
				+   * @param q The other quaternion */
			
 
				+	b3Scalar dot(const b3Quaternion& q) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vd;
			
 
				+
			
 
				+		vd = _mm_mul_ps(mVec128, q.mVec128);
			
 
				+
			
 
				+		__m128 t = _mm_movehl_ps(vd, vd);
			
 
				+		vd = _mm_add_ps(vd, t);
			
 
				+		t = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, t);
			
 
				+
			
 
				+		return _mm_cvtss_f32(vd);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
			
 
				+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
			
 
				+		x = vpadd_f32(x, x);
			
 
				+		return vget_lane_f32(x, 0);
			
 
				+#else
			
 
				+		return m_floats[0] * q.getX() +
			
 
				+			   m_floats[1] * q.getY() +
			
 
				+			   m_floats[2] * q.getZ() +
			
 
				+			   m_floats[3] * q.m_floats[3];
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the length squared of the quaternion */
			
 
				+	b3Scalar length2() const
			
 
				+	{
			
 
				+		return dot(*this);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the length of the quaternion */
			
 
				+	b3Scalar length() const
			
 
				+	{
			
 
				+		return b3Sqrt(length2());
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Normalize the quaternion 
			
 
				+   * Such that x^2 + y^2 + z^2 +w^2 = 1 */
			
 
				+	b3Quaternion& normalize()
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vd;
			
 
				+
			
 
				+		vd = _mm_mul_ps(mVec128, mVec128);
			
 
				+
			
 
				+		__m128 t = _mm_movehl_ps(vd, vd);
			
 
				+		vd = _mm_add_ps(vd, t);
			
 
				+		t = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, t);
			
 
				+
			
 
				+		vd = _mm_sqrt_ss(vd);
			
 
				+		vd = _mm_div_ss(b3vOnes, vd);
			
 
				+		vd = b3_pshufd_ps(vd, 0);  // splat
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vd);
			
 
				+
			
 
				+		return *this;
			
 
				+#else
			
 
				+		return *this /= length();
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return a scaled version of this quaternion
			
 
				+   * @param s The scale factor */
			
 
				+	B3_FORCE_INLINE b3Quaternion
			
 
				+	operator*(const b3Scalar& s) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0x00);  //	(S S S S)
			
 
				+
			
 
				+		return b3Quaternion(_mm_mul_ps(mVec128, vs));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion(vmulq_n_f32(mVec128, s));
			
 
				+#else
			
 
				+		return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return an inversely scaled versionof this quaternion
			
 
				+   * @param s The inverse scale factor */
			
 
				+	b3Quaternion operator/(const b3Scalar& s) const
			
 
				+	{
			
 
				+		b3Assert(s != b3Scalar(0.0));
			
 
				+		return *this * (b3Scalar(1.0) / s);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Inversely scale this quaternion
			
 
				+   * @param s The scale factor */
			
 
				+	b3Quaternion& operator/=(const b3Scalar& s)
			
 
				+	{
			
 
				+		b3Assert(s != b3Scalar(0.0));
			
 
				+		return *this *= b3Scalar(1.0) / s;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return a normalized version of this quaternion */
			
 
				+	b3Quaternion normalized() const
			
 
				+	{
			
 
				+		return *this / length();
			
 
				+	}
			
 
				+	/**@brief Return the angle between this quaternion and the other 
			
 
				+   * @param q The other quaternion */
			
 
				+	b3Scalar angle(const b3Quaternion& q) const
			
 
				+	{
			
 
				+		b3Scalar s = b3Sqrt(length2() * q.length2());
			
 
				+		b3Assert(s != b3Scalar(0.0));
			
 
				+		return b3Acos(dot(q) / s);
			
 
				+	}
			
 
				+	/**@brief Return the angle of rotation represented by this quaternion */
			
 
				+	b3Scalar getAngle() const
			
 
				+	{
			
 
				+		b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
			
 
				+		return s;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the axis of the rotation represented by this quaternion */
			
 
				+	b3Vector3 getAxis() const
			
 
				+	{
			
 
				+		b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3];
			
 
				+
			
 
				+		if (s_squared < b3Scalar(10.) * B3_EPSILON)  //Check for divide by zero
			
 
				+			return b3MakeVector3(1.0, 0.0, 0.0);     // Arbitrary
			
 
				+		b3Scalar s = 1.f / b3Sqrt(s_squared);
			
 
				+		return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the inverse of this quaternion */
			
 
				+	b3Quaternion inverse() const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
			
 
				+#else
			
 
				+		return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the sum of this quaternion and the other 
			
 
				+   * @param q2 The other quaternion */
			
 
				+	B3_FORCE_INLINE b3Quaternion
			
 
				+	operator+(const b3Quaternion& q2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
			
 
				+#else
			
 
				+		const b3Quaternion& q1 = *this;
			
 
				+		return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the difference between this quaternion and the other 
			
 
				+   * @param q2 The other quaternion */
			
 
				+	B3_FORCE_INLINE b3Quaternion
			
 
				+	operator-(const b3Quaternion& q2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
			
 
				+#else
			
 
				+		const b3Quaternion& q1 = *this;
			
 
				+		return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the negative of this quaternion 
			
 
				+   * This simply negates each element */
			
 
				+	B3_FORCE_INLINE b3Quaternion operator-() const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask));
			
 
				+#else
			
 
				+		const b3Quaternion& q2 = *this;
			
 
				+		return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+	/**@todo document this and it's use */
			
 
				+	B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const
			
 
				+	{
			
 
				+		b3Quaternion diff, sum;
			
 
				+		diff = *this - qd;
			
 
				+		sum = *this + qd;
			
 
				+		if (diff.dot(diff) > sum.dot(sum))
			
 
				+			return qd;
			
 
				+		return (-qd);
			
 
				+	}
			
 
				+
			
 
				+	/**@todo document this and it's use */
			
 
				+	B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const
			
 
				+	{
			
 
				+		b3Quaternion diff, sum;
			
 
				+		diff = *this - qd;
			
 
				+		sum = *this + qd;
			
 
				+		if (diff.dot(diff) < sum.dot(sum))
			
 
				+			return qd;
			
 
				+		return (-qd);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
			
 
				+   * @param q The other quaternion to interpolate with 
			
 
				+   * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
			
 
				+   * Slerp interpolates assuming constant velocity.  */
			
 
				+	b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
			
 
				+	{
			
 
				+		b3Scalar magnitude = b3Sqrt(length2() * q.length2());
			
 
				+		b3Assert(magnitude > b3Scalar(0));
			
 
				+
			
 
				+		b3Scalar product = dot(q) / magnitude;
			
 
				+		if (b3Fabs(product) < b3Scalar(1))
			
 
				+		{
			
 
				+			// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
			
 
				+			const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
			
 
				+
			
 
				+			const b3Scalar theta = b3Acos(sign * product);
			
 
				+			const b3Scalar s1 = b3Sin(sign * t * theta);
			
 
				+			const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
			
 
				+			const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
			
 
				+
			
 
				+			return b3Quaternion(
			
 
				+				(m_floats[0] * s0 + q.getX() * s1) * d,
			
 
				+				(m_floats[1] * s0 + q.getY() * s1) * d,
			
 
				+				(m_floats[2] * s0 + q.getZ() * s1) * d,
			
 
				+				(m_floats[3] * s0 + q.m_floats[3] * s1) * d);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			return *this;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static const b3Quaternion& getIdentity()
			
 
				+	{
			
 
				+		static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.));
			
 
				+		return identityQuat;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
			
 
				+};
			
 
				+
			
 
				+/**@brief Return the product of two quaternions */
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+operator*(const b3Quaternion& q1, const b3Quaternion& q2)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	__m128 vQ1 = q1.get128();
			
 
				+	__m128 vQ2 = q2.get128();
			
 
				+	__m128 A0, A1, B1, A2, B2;
			
 
				+
			
 
				+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0));  // X Y  z x     //      vtrn
			
 
				+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));  // W W  W X     // vdup vext
			
 
				+
			
 
				+	A1 = A1 * B1;
			
 
				+
			
 
				+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));  // Y Z  X Y     // vext
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));  // z x  Y Y     // vtrn vdup
			
 
				+
			
 
				+	A2 = A2 * B2;
			
 
				+
			
 
				+	B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));  // z x Y Z      // vtrn vext
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));  // Y Z x z      // vext vtrn
			
 
				+
			
 
				+	B1 = B1 * B2;  //	A3 *= B3
			
 
				+
			
 
				+	A0 = b3_splat_ps(vQ1, 3);  //	A0
			
 
				+	A0 = A0 * vQ2;             //	A0 * B0
			
 
				+
			
 
				+	A1 = A1 + A2;  //	AB12
			
 
				+	A0 = A0 - B1;  //	AB03 = AB0 - AB3
			
 
				+
			
 
				+	A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
			
 
				+	A0 = A0 + A1;                  //	AB03 + AB12
			
 
				+
			
 
				+	return b3Quaternion(A0);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+	float32x4_t vQ1 = q1.get128();
			
 
				+	float32x4_t vQ2 = q2.get128();
			
 
				+	float32x4_t A0, A1, B1, A2, B2, A3, B3;
			
 
				+	float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
			
 
				+
			
 
				+	{
			
 
				+		float32x2x2_t tmp;
			
 
				+		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
			
 
				+		vQ1zx = tmp.val[0];
			
 
				+
			
 
				+		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
			
 
				+		vQ2zx = tmp.val[0];
			
 
				+	}
			
 
				+	vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
			
 
				+
			
 
				+	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+	A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
			
 
				+	B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
			
 
				+
			
 
				+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
			
 
				+	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
			
 
				+
			
 
				+	A1 = vmulq_f32(A1, B1);
			
 
				+	A2 = vmulq_f32(A2, B2);
			
 
				+	A3 = vmulq_f32(A3, B3);                           //	A3 *= B3
			
 
				+	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //	A0 * B0
			
 
				+
			
 
				+	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
			
 
				+	A0 = vsubq_f32(A0, A3);  //	AB03 = AB0 - AB3
			
 
				+
			
 
				+	//	change the sign of the last element
			
 
				+	A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
			
 
				+	A0 = vaddq_f32(A0, A1);  //	AB03 + AB12
			
 
				+
			
 
				+	return b3Quaternion(A0);
			
 
				+
			
 
				+#else
			
 
				+	return b3Quaternion(
			
 
				+		q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
			
 
				+		q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
			
 
				+		q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
			
 
				+		q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+operator*(const b3Quaternion& q, const b3Vector3& w)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	__m128 vQ1 = q.get128();
			
 
				+	__m128 vQ2 = w.get128();
			
 
				+	__m128 A1, B1, A2, B2, A3, B3;
			
 
				+
			
 
				+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0));
			
 
				+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0));
			
 
				+
			
 
				+	A1 = A1 * B1;
			
 
				+
			
 
				+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
			
 
				+
			
 
				+	A2 = A2 * B2;
			
 
				+
			
 
				+	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
			
 
				+	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
			
 
				+
			
 
				+	A3 = A3 * B3;  //	A3 *= B3
			
 
				+
			
 
				+	A1 = A1 + A2;                  //	AB12
			
 
				+	A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
			
 
				+	A1 = A1 - A3;                  //	AB123 = AB12 - AB3
			
 
				+
			
 
				+	return b3Quaternion(A1);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+	float32x4_t vQ1 = q.get128();
			
 
				+	float32x4_t vQ2 = w.get128();
			
 
				+	float32x4_t A1, B1, A2, B2, A3, B3;
			
 
				+	float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
			
 
				+
			
 
				+	vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
			
 
				+	{
			
 
				+		float32x2x2_t tmp;
			
 
				+
			
 
				+		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
			
 
				+		vQ2zx = tmp.val[0];
			
 
				+
			
 
				+		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
			
 
				+		vQ1zx = tmp.val[0];
			
 
				+	}
			
 
				+
			
 
				+	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+	A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);  // W W  W X
			
 
				+	B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                     // X Y  z x
			
 
				+
			
 
				+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
			
 
				+	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
			
 
				+
			
 
				+	A1 = vmulq_f32(A1, B1);
			
 
				+	A2 = vmulq_f32(A2, B2);
			
 
				+	A3 = vmulq_f32(A3, B3);  //	A3 *= B3
			
 
				+
			
 
				+	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
			
 
				+
			
 
				+	//	change the sign of the last element
			
 
				+	A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
			
 
				+
			
 
				+	A1 = vsubq_f32(A1, A3);  //	AB123 = AB12 - AB3
			
 
				+
			
 
				+	return b3Quaternion(A1);
			
 
				+
			
 
				+#else
			
 
				+	return b3Quaternion(
			
 
				+		q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
			
 
				+		q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
			
 
				+		q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
			
 
				+		-q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+operator*(const b3Vector3& w, const b3Quaternion& q)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	__m128 vQ1 = w.get128();
			
 
				+	__m128 vQ2 = q.get128();
			
 
				+	__m128 A1, B1, A2, B2, A3, B3;
			
 
				+
			
 
				+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0));  // X Y  z x
			
 
				+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));  // W W  W X
			
 
				+
			
 
				+	A1 = A1 * B1;
			
 
				+
			
 
				+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
			
 
				+
			
 
				+	A2 = A2 * B2;
			
 
				+
			
 
				+	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
			
 
				+	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
			
 
				+
			
 
				+	A3 = A3 * B3;  //	A3 *= B3
			
 
				+
			
 
				+	A1 = A1 + A2;                  //	AB12
			
 
				+	A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
			
 
				+	A1 = A1 - A3;                  //	AB123 = AB12 - AB3
			
 
				+
			
 
				+	return b3Quaternion(A1);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+	float32x4_t vQ1 = w.get128();
			
 
				+	float32x4_t vQ2 = q.get128();
			
 
				+	float32x4_t A1, B1, A2, B2, A3, B3;
			
 
				+	float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
			
 
				+
			
 
				+	{
			
 
				+		float32x2x2_t tmp;
			
 
				+
			
 
				+		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
			
 
				+		vQ1zx = tmp.val[0];
			
 
				+
			
 
				+		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
			
 
				+		vQ2zx = tmp.val[0];
			
 
				+	}
			
 
				+	vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
			
 
				+
			
 
				+	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+	A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
			
 
				+	B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
			
 
				+
			
 
				+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
			
 
				+	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
			
 
				+
			
 
				+	A1 = vmulq_f32(A1, B1);
			
 
				+	A2 = vmulq_f32(A2, B2);
			
 
				+	A3 = vmulq_f32(A3, B3);  //	A3 *= B3
			
 
				+
			
 
				+	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
			
 
				+
			
 
				+	//	change the sign of the last element
			
 
				+	A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
			
 
				+
			
 
				+	A1 = vsubq_f32(A1, A3);  //	AB123 = AB12 - AB3
			
 
				+
			
 
				+	return b3Quaternion(A1);
			
 
				+
			
 
				+#else
			
 
				+	return b3Quaternion(
			
 
				+		+w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
			
 
				+		+w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
			
 
				+		+w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
			
 
				+		-w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Calculate the dot product between two quaternions */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
			
 
				+{
			
 
				+	return q1.dot(q2);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the length of a quaternion */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Length(const b3Quaternion& q)
			
 
				+{
			
 
				+	return q.length();
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the angle between two quaternions*/
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
			
 
				+{
			
 
				+	return q1.angle(q2);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the inverse of a quaternion*/
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3Inverse(const b3Quaternion& q)
			
 
				+{
			
 
				+	return q.inverse();
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the result of spherical linear interpolation betwen two quaternions 
			
 
				+ * @param q1 The first quaternion
			
 
				+ * @param q2 The second quaternion 
			
 
				+ * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2 
			
 
				+ * Slerp assumes constant velocity between positions. */
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
			
 
				+{
			
 
				+	return q1.slerp(q2, t);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
			
 
				+{
			
 
				+	return rot0 * rot1;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3QuatNormalized(const b3Quaternion& orn)
			
 
				+{
			
 
				+	return orn.normalized();
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
			
 
				+{
			
 
				+	b3Quaternion q = rotation * v;
			
 
				+	q *= rotation.inverse();
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
			
 
				+#else
			
 
				+	return b3MakeVector3(q.getX(), q.getY(), q.getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1)  // Game Programming Gems 2.10. make sure v0,v1 are normalized
			
 
				+{
			
 
				+	b3Vector3 c = v0.cross(v1);
			
 
				+	b3Scalar d = v0.dot(v1);
			
 
				+
			
 
				+	if (d < -1.0 + B3_EPSILON)
			
 
				+	{
			
 
				+		b3Vector3 n, unused;
			
 
				+		b3PlaneSpace1(v0, n, unused);
			
 
				+		return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f);  // just pick any vector that is orthogonal to v0
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
			
 
				+	b3Scalar rs = 1.0f / s;
			
 
				+
			
 
				+	return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1)
			
 
				+{
			
 
				+	v0.normalize();
			
 
				+	v1.normalize();
			
 
				+	return b3ShortestArcQuat(v0, v1);
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_SIMD__QUATERNION_H_
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Random.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Random.h
@@ -0,0 +1,46 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_GEN_RANDOM_H
			
 
				+#define B3_GEN_RANDOM_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+
			
 
				+#ifdef MT19937
			
 
				+
			
 
				+#include <limits.h>
			
 
				+#include <mt19937.h>
			
 
				+
			
 
				+#define B3_RAND_MAX UINT_MAX
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Srand(unsigned int seed) { init_genrand(seed); }
			
 
				+B3_FORCE_INLINE unsigned int b3rand() { return genrand_int32(); }
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define B3_RAND_MAX RAND_MAX
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Srand(unsigned int seed) { srand(seed); }
			
 
				+B3_FORCE_INLINE unsigned int b3rand() { return rand(); }
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+inline b3Scalar b3RandRange(b3Scalar minRange, b3Scalar maxRange)
			
 
				+{
			
 
				+	return (b3rand() / (b3Scalar(B3_RAND_MAX) + b3Scalar(1.0))) * (maxRange - minRange) + minRange;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_GEN_RANDOM_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3ResizablePool.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3ResizablePool.h
@@ -0,0 +1,171 @@
 
				+
			
 
				+#ifndef B3_RESIZABLE_POOL_H
			
 
				+#define B3_RESIZABLE_POOL_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+enum
			
 
				+{
			
 
				+	B3_POOL_HANDLE_TERMINAL_FREE = -1,
			
 
				+	B3_POOL_HANDLE_TERMINAL_USED = -2
			
 
				+};
			
 
				+
			
 
				+template <typename U>
			
 
				+struct b3PoolBodyHandle : public U
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	int m_nextFreeHandle;
			
 
				+	void setNextFree(int next)
			
 
				+	{
			
 
				+		m_nextFreeHandle = next;
			
 
				+	}
			
 
				+	int getNextFree() const
			
 
				+	{
			
 
				+		return m_nextFreeHandle;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+class b3ResizablePool
			
 
				+{
			
 
				+protected:
			
 
				+	b3AlignedObjectArray<T> m_bodyHandles;
			
 
				+	int m_numUsedHandles;   // number of active handles
			
 
				+	int m_firstFreeHandle;  // free handles list
			
 
				+
			
 
				+	T* getHandleInternal(int handle)
			
 
				+	{
			
 
				+		return &m_bodyHandles[handle];
			
 
				+	}
			
 
				+	const T* getHandleInternal(int handle) const
			
 
				+	{
			
 
				+		return &m_bodyHandles[handle];
			
 
				+	}
			
 
				+
			
 
				+public:
			
 
				+	b3ResizablePool()
			
 
				+	{
			
 
				+		initHandles();
			
 
				+	}
			
 
				+
			
 
				+	virtual ~b3ResizablePool()
			
 
				+	{
			
 
				+		exitHandles();
			
 
				+	}
			
 
				+	///handle management
			
 
				+
			
 
				+	int getNumHandles() const
			
 
				+	{
			
 
				+		return m_bodyHandles.size();
			
 
				+	}
			
 
				+
			
 
				+	void getUsedHandles(b3AlignedObjectArray<int>& usedHandles) const
			
 
				+	{
			
 
				+		for (int i = 0; i < m_bodyHandles.size(); i++)
			
 
				+		{
			
 
				+			if (m_bodyHandles[i].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
			
 
				+			{
			
 
				+				usedHandles.push_back(i);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	T* getHandle(int handle)
			
 
				+	{
			
 
				+		b3Assert(handle >= 0);
			
 
				+		b3Assert(handle < m_bodyHandles.size());
			
 
				+		if ((handle < 0) || (handle >= m_bodyHandles.size()))
			
 
				+		{
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
			
 
				+		{
			
 
				+			return &m_bodyHandles[handle];
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+	const T* getHandle(int handle) const
			
 
				+	{
			
 
				+		b3Assert(handle >= 0);
			
 
				+		b3Assert(handle < m_bodyHandles.size());
			
 
				+		if ((handle < 0) || (handle >= m_bodyHandles.size()))
			
 
				+		{
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
			
 
				+		{
			
 
				+			return &m_bodyHandles[handle];
			
 
				+		}
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	void increaseHandleCapacity(int extraCapacity)
			
 
				+	{
			
 
				+		int curCapacity = m_bodyHandles.size();
			
 
				+		//b3Assert(curCapacity == m_numUsedHandles);
			
 
				+		int newCapacity = curCapacity + extraCapacity;
			
 
				+		m_bodyHandles.resize(newCapacity);
			
 
				+
			
 
				+		{
			
 
				+			for (int i = curCapacity; i < newCapacity; i++)
			
 
				+				m_bodyHandles[i].setNextFree(i + 1);
			
 
				+
			
 
				+			m_bodyHandles[newCapacity - 1].setNextFree(-1);
			
 
				+		}
			
 
				+		m_firstFreeHandle = curCapacity;
			
 
				+	}
			
 
				+	void initHandles()
			
 
				+	{
			
 
				+		m_numUsedHandles = 0;
			
 
				+		m_firstFreeHandle = -1;
			
 
				+
			
 
				+		increaseHandleCapacity(1);
			
 
				+	}
			
 
				+
			
 
				+	void exitHandles()
			
 
				+	{
			
 
				+		m_bodyHandles.resize(0);
			
 
				+		m_firstFreeHandle = -1;
			
 
				+		m_numUsedHandles = 0;
			
 
				+	}
			
 
				+
			
 
				+	int allocHandle()
			
 
				+	{
			
 
				+		b3Assert(m_firstFreeHandle >= 0);
			
 
				+
			
 
				+		int handle = m_firstFreeHandle;
			
 
				+		m_firstFreeHandle = getHandleInternal(handle)->getNextFree();
			
 
				+		m_numUsedHandles++;
			
 
				+
			
 
				+		if (m_firstFreeHandle < 0)
			
 
				+		{
			
 
				+			//int curCapacity = m_bodyHandles.size();
			
 
				+			int additionalCapacity = m_bodyHandles.size();
			
 
				+			increaseHandleCapacity(additionalCapacity);
			
 
				+
			
 
				+			getHandleInternal(handle)->setNextFree(m_firstFreeHandle);
			
 
				+		}
			
 
				+		getHandleInternal(handle)->setNextFree(B3_POOL_HANDLE_TERMINAL_USED);
			
 
				+		getHandleInternal(handle)->clear();
			
 
				+		return handle;
			
 
				+	}
			
 
				+
			
 
				+	void freeHandle(int handle)
			
 
				+	{
			
 
				+		b3Assert(handle >= 0);
			
 
				+
			
 
				+		if (m_bodyHandles[handle].getNextFree() == B3_POOL_HANDLE_TERMINAL_USED)
			
 
				+		{
			
 
				+			getHandleInternal(handle)->clear();
			
 
				+			getHandleInternal(handle)->setNextFree(m_firstFreeHandle);
			
 
				+			m_firstFreeHandle = handle;
			
 
				+			m_numUsedHandles--;
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+///end handle management
			
 
				+
			
 
				+#endif  //B3_RESIZABLE_POOL_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Scalar.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Scalar.h
@@ -0,0 +1,689 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SCALAR_H
			
 
				+#define B3_SCALAR_H
			
 
				+
			
 
				+#ifdef B3_MANAGED_CODE
			
 
				+//Aligned data types not supported in managed code
			
 
				+#pragma unmanaged
			
 
				+#endif
			
 
				+
			
 
				+#include <math.h>
			
 
				+#include <stdlib.h>  //size_t for MSVC 6.0
			
 
				+#include <float.h>
			
 
				+
			
 
				+//Original repository is at http://github.com/erwincoumans/bullet3
			
 
				+#define B3_BULLET_VERSION 300
			
 
				+
			
 
				+inline int b3GetVersion()
			
 
				+{
			
 
				+	return B3_BULLET_VERSION;
			
 
				+}
			
 
				+
			
 
				+#if defined(DEBUG) || defined(_DEBUG)
			
 
				+#define B3_DEBUG
			
 
				+#endif
			
 
				+
			
 
				+#include "b3Logging.h"  //for b3Error
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+
			
 
				+#if  defined(__GNUC__)	// it should handle both MINGW and CYGWIN
			
 
				+#define B3_FORCE_INLINE             __inline__ __attribute__((always_inline))
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a)   a __attribute__((aligned(16)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a)   a __attribute__((aligned(64)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a)  a __attribute__((aligned(128)))
			
 
				+#elif ( defined(_MSC_VER) && _MSC_VER < 1300 )
			
 
				+#define B3_FORCE_INLINE inline
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a) a
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a) a
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a) a
			
 
				+#else
			
 
				+//#define B3_HAS_ALIGNED_ALLOCATOR
			
 
				+#pragma warning(disable : 4324)  // disable padding warning
			
 
				+//			#pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning.
			
 
				+#pragma warning(disable : 4996)  //Turn off warnings about deprecated C routines
			
 
				+//			#pragma warning(disable:4786) // Disable the "debug name too long" warning
			
 
				+
			
 
				+#define B3_FORCE_INLINE __forceinline
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a) __declspec(align(128)) a
			
 
				+#ifdef _XBOX
			
 
				+#define B3_USE_VMX128
			
 
				+
			
 
				+#include <ppcintrinsics.h>
			
 
				+#define B3_HAVE_NATIVE_FSEL
			
 
				+#define b3Fsel(a, b, c) __fsel((a), (b), (c))
			
 
				+#else
			
 
				+
			
 
				+#if (defined(_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined(B3_USE_DOUBLE_PRECISION))
			
 
				+#if (defined(_M_IX86) || defined(_M_X64))
			
 
				+
			
 
				+
			
 
				+#ifdef __clang__
			
 
				+//#define B3_NO_SIMD_OPERATOR_OVERLOADS
			
 
				+#define B3_DISABLE_SSE
			
 
				+#endif //__clang__
			
 
				+
			
 
				+#ifndef B3_DISABLE_SSE
			
 
				+#define B3_USE_SSE
			
 
				+#endif //B3_DISABLE_SSE
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+//B3_USE_SSE_IN_API is disabled under Windows by default, because
			
 
				+//it makes it harder to integrate Bullet into your application under Windows
			
 
				+//(structured embedding Bullet structs/classes need to be 16-byte aligned)
			
 
				+//with relatively little performance gain
			
 
				+//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
			
 
				+//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
			
 
				+//#define B3_USE_SSE_IN_API
			
 
				+#endif  //B3_USE_SSE
			
 
				+#include <emmintrin.h>
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#endif  //_XBOX
			
 
				+
			
 
				+#endif  //__MINGW32__
			
 
				+
			
 
				+#ifdef B3_DEBUG
			
 
				+#ifdef _MSC_VER
			
 
				+#include <stdio.h>
			
 
				+#define b3Assert(x) { if(!(x)){b3Error("Assert " __FILE__ ":%u (%s)\n", __LINE__, #x);__debugbreak();	}}
			
 
				+#else  //_MSC_VER
			
 
				+#include <assert.h>
			
 
				+#define b3Assert assert
			
 
				+#endif  //_MSC_VER
			
 
				+#else
			
 
				+#define b3Assert(x)
			
 
				+#endif
			
 
				+//b3FullAssert is optional, slows down a lot
			
 
				+#define b3FullAssert(x)
			
 
				+
			
 
				+#define b3Likely(_c) _c
			
 
				+#define b3Unlikely(_c) _c
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#if defined(__CELLOS_LV2__)
			
 
				+#define B3_FORCE_INLINE inline __attribute__((always_inline))
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
			
 
				+#ifndef assert
			
 
				+#include <assert.h>
			
 
				+#endif
			
 
				+#ifdef B3_DEBUG
			
 
				+#ifdef __SPU__
			
 
				+#include <spu_printf.h>
			
 
				+#define printf spu_printf
			
 
				+#define b3Assert(x)               \
			
 
				+	{                             \
			
 
				+		if (!(x))                 \
			
 
				+		{                         \
			
 
				+			b3Error(              \
			
 
				+				"Assert "__FILE__ \
			
 
				+				":%u (" #x ")\n", \
			
 
				+				__LINE__);        \
			
 
				+			spu_hcmpeq(0, 0);     \
			
 
				+		}                         \
			
 
				+	}
			
 
				+#else
			
 
				+#define b3Assert assert
			
 
				+#endif
			
 
				+
			
 
				+#else
			
 
				+#define b3Assert(x)
			
 
				+#endif
			
 
				+//b3FullAssert is optional, slows down a lot
			
 
				+#define b3FullAssert(x)
			
 
				+
			
 
				+#define b3Likely(_c) _c
			
 
				+#define b3Unlikely(_c) _c
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#ifdef USE_LIBSPE2
			
 
				+
			
 
				+#define B3_FORCE_INLINE __inline
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
			
 
				+#ifndef assert
			
 
				+#include <assert.h>
			
 
				+#endif
			
 
				+#ifdef B3_DEBUG
			
 
				+#define b3Assert assert
			
 
				+#else
			
 
				+#define b3Assert(x)
			
 
				+#endif
			
 
				+//b3FullAssert is optional, slows down a lot
			
 
				+#define b3FullAssert(x)
			
 
				+
			
 
				+#define b3Likely(_c) __builtin_expect((_c), 1)
			
 
				+#define b3Unlikely(_c) __builtin_expect((_c), 0)
			
 
				+
			
 
				+#else
			
 
				+//non-windows systems
			
 
				+
			
 
				+#if (defined(__APPLE__) && (!defined(B3_USE_DOUBLE_PRECISION)))
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+#define B3_USE_SSE
			
 
				+//B3_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
			
 
				+//if apps run into issues, we will disable the next line
			
 
				+#define B3_USE_SSE_IN_API
			
 
				+#ifdef B3_USE_SSE
			
 
				+// include appropriate SSE level
			
 
				+#if defined(__SSE4_1__)
			
 
				+#include <smmintrin.h>
			
 
				+#elif defined(__SSSE3__)
			
 
				+#include <tmmintrin.h>
			
 
				+#elif defined(__SSE3__)
			
 
				+#include <pmmintrin.h>
			
 
				+#else
			
 
				+#include <emmintrin.h>
			
 
				+#endif
			
 
				+#endif  //B3_USE_SSE
			
 
				+#elif defined(__armv7__)
			
 
				+#ifdef __clang__
			
 
				+#define B3_USE_NEON 1
			
 
				+
			
 
				+#if defined B3_USE_NEON && defined(__clang__)
			
 
				+#include <arm_neon.h>
			
 
				+#endif  //B3_USE_NEON
			
 
				+#endif  //__clang__
			
 
				+#endif  //__arm__
			
 
				+
			
 
				+#define B3_FORCE_INLINE inline __attribute__((always_inline))
			
 
				+///@todo: check out alignment methods for other platforms/compilers
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
			
 
				+#ifndef assert
			
 
				+#include <assert.h>
			
 
				+#endif
			
 
				+
			
 
				+#if defined(DEBUG) || defined(_DEBUG)
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+#include <stdio.h>
			
 
				+#define b3Assert(x)                                                             \
			
 
				+	{                                                                           \
			
 
				+		if (!(x))                                                               \
			
 
				+		{                                                                       \
			
 
				+			b3Error("Assert %s in line %d, file %s\n", #x, __LINE__, __FILE__); \
			
 
				+			asm volatile("int3");                                               \
			
 
				+		}                                                                       \
			
 
				+	}
			
 
				+#else  //defined (__i386__) || defined (__x86_64__)
			
 
				+#define b3Assert assert
			
 
				+#endif  //defined (__i386__) || defined (__x86_64__)
			
 
				+#else   //defined(DEBUG) || defined (_DEBUG)
			
 
				+#define b3Assert(x)
			
 
				+#endif  //defined(DEBUG) || defined (_DEBUG)
			
 
				+
			
 
				+//b3FullAssert is optional, slows down a lot
			
 
				+#define b3FullAssert(x)
			
 
				+#define b3Likely(_c) _c
			
 
				+#define b3Unlikely(_c) _c
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#define B3_FORCE_INLINE inline
			
 
				+///@todo: check out alignment methods for other platforms/compilers
			
 
				+#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__((aligned(16)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__((aligned(64)))
			
 
				+#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__((aligned(128)))
			
 
				+///#define B3_ATTRIBUTE_ALIGNED16(a) a
			
 
				+///#define B3_ATTRIBUTE_ALIGNED64(a) a
			
 
				+///#define B3_ATTRIBUTE_ALIGNED128(a) a
			
 
				+#ifndef assert
			
 
				+#include <assert.h>
			
 
				+#endif
			
 
				+
			
 
				+#if defined(DEBUG) || defined(_DEBUG)
			
 
				+#define b3Assert assert
			
 
				+#else
			
 
				+#define b3Assert(x)
			
 
				+#endif
			
 
				+
			
 
				+//b3FullAssert is optional, slows down a lot
			
 
				+#define b3FullAssert(x)
			
 
				+#define b3Likely(_c) _c
			
 
				+#define b3Unlikely(_c) _c
			
 
				+#endif  //__APPLE__
			
 
				+
			
 
				+#endif  // LIBSPE2
			
 
				+
			
 
				+#endif  //__CELLOS_LV2__
			
 
				+#endif
			
 
				+
			
 
				+///The b3Scalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
			
 
				+#if defined(B3_USE_DOUBLE_PRECISION)
			
 
				+typedef double b3Scalar;
			
 
				+//this number could be bigger in double precision
			
 
				+#define B3_LARGE_FLOAT 1e30
			
 
				+#else
			
 
				+typedef float b3Scalar;
			
 
				+//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
			
 
				+#define B3_LARGE_FLOAT 1e18f
			
 
				+#endif
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+typedef __m128 b3SimdFloat4;
			
 
				+#endif  //B3_USE_SSE
			
 
				+
			
 
				+#if defined B3_USE_SSE_IN_API && defined(B3_USE_SSE)
			
 
				+#ifdef _WIN32
			
 
				+
			
 
				+#ifndef B3_NAN
			
 
				+static int b3NanMask = 0x7F800001;
			
 
				+#define B3_NAN (*(float *)&b3NanMask)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_INFINITY_MASK
			
 
				+static int b3InfinityMask = 0x7F800000;
			
 
				+#define B3_INFINITY_MASK (*(float *)&b3InfinityMask)
			
 
				+#endif
			
 
				+#ifndef B3_NO_SIMD_OPERATOR_OVERLOADS
			
 
				+inline __m128 operator+(const __m128 A, const __m128 B)
			
 
				+{
			
 
				+	return _mm_add_ps(A, B);
			
 
				+}
			
 
				+
			
 
				+inline __m128 operator-(const __m128 A, const __m128 B)
			
 
				+{
			
 
				+	return _mm_sub_ps(A, B);
			
 
				+}
			
 
				+
			
 
				+inline __m128 operator*(const __m128 A, const __m128 B)
			
 
				+{
			
 
				+	return _mm_mul_ps(A, B);
			
 
				+}
			
 
				+#endif //B3_NO_SIMD_OPERATOR_OVERLOADS
			
 
				+#define b3CastfTo128i(a) (_mm_castps_si128(a))
			
 
				+#define b3CastfTo128d(a) (_mm_castps_pd(a))
			
 
				+#define b3CastiTo128f(a) (_mm_castsi128_ps(a))
			
 
				+#define b3CastdTo128f(a) (_mm_castpd_ps(a))
			
 
				+#define b3CastdTo128i(a) (_mm_castpd_si128(a))
			
 
				+#define b3Assign128(r0, r1, r2, r3) _mm_setr_ps(r0, r1, r2, r3)
			
 
				+
			
 
				+#else  //_WIN32
			
 
				+
			
 
				+#define b3CastfTo128i(a) ((__m128i)(a))
			
 
				+#define b3CastfTo128d(a) ((__m128d)(a))
			
 
				+#define b3CastiTo128f(a) ((__m128)(a))
			
 
				+#define b3CastdTo128f(a) ((__m128)(a))
			
 
				+#define b3CastdTo128i(a) ((__m128i)(a))
			
 
				+#define b3Assign128(r0, r1, r2, r3) \
			
 
				+	(__m128) { r0, r1, r2, r3 }
			
 
				+#endif  //_WIN32
			
 
				+#endif  //B3_USE_SSE_IN_API
			
 
				+
			
 
				+#ifdef B3_USE_NEON
			
 
				+#include <arm_neon.h>
			
 
				+
			
 
				+typedef float32x4_t b3SimdFloat4;
			
 
				+#define B3_INFINITY INFINITY
			
 
				+#define B3_NAN NAN
			
 
				+#define b3Assign128(r0, r1, r2, r3) \
			
 
				+	(float32x4_t) { r0, r1, r2, r3 }
			
 
				+#endif
			
 
				+
			
 
				+#define B3_DECLARE_ALIGNED_ALLOCATOR()                                                                   \
			
 
				+	B3_FORCE_INLINE void *operator new(size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); }   \
			
 
				+	B3_FORCE_INLINE void operator delete(void *ptr) { b3AlignedFree(ptr); }                              \
			
 
				+	B3_FORCE_INLINE void *operator new(size_t, void *ptr) { return ptr; }                                \
			
 
				+	B3_FORCE_INLINE void operator delete(void *, void *) {}                                              \
			
 
				+	B3_FORCE_INLINE void *operator new[](size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes, 16); } \
			
 
				+	B3_FORCE_INLINE void operator delete[](void *ptr) { b3AlignedFree(ptr); }                            \
			
 
				+	B3_FORCE_INLINE void *operator new[](size_t, void *ptr) { return ptr; }                              \
			
 
				+	B3_FORCE_INLINE void operator delete[](void *, void *) {}
			
 
				+
			
 
				+#if defined(B3_USE_DOUBLE_PRECISION) || defined(B3_FORCE_DOUBLE_FUNCTIONS)
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar x)
			
 
				+{
			
 
				+	return sqrt(x);
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabs(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cos(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sin(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tan(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x)
			
 
				+{
			
 
				+	if (x < b3Scalar(-1)) x = b3Scalar(-1);
			
 
				+	if (x > b3Scalar(1)) x = b3Scalar(1);
			
 
				+	return acos(x);
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x)
			
 
				+{
			
 
				+	if (x < b3Scalar(-1)) x = b3Scalar(-1);
			
 
				+	if (x > b3Scalar(1)) x = b3Scalar(1);
			
 
				+	return asin(x);
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atan(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2(x, y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return exp(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return log(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return pow(x, y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmod(x, y); }
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar y)
			
 
				+{
			
 
				+#ifdef USE_APPROXIMATION
			
 
				+	double x, z, tempf;
			
 
				+	unsigned long *tfptr = ((unsigned long *)&tempf) + 1;
			
 
				+
			
 
				+	tempf = y;
			
 
				+	*tfptr = (0xbfcdd90a - *tfptr) >> 1; /* estimate of 1/sqrt(y) */
			
 
				+	x = tempf;
			
 
				+	z = y * b3Scalar(0.5);
			
 
				+	x = (b3Scalar(1.5) * x) - (x * x) * (x * z); /* iteration formula     */
			
 
				+	x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
			
 
				+	x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
			
 
				+	x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
			
 
				+	x = (b3Scalar(1.5) * x) - (x * x) * (x * z);
			
 
				+	return x * y;
			
 
				+#else
			
 
				+	return sqrtf(y);
			
 
				+#endif
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabsf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cosf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sinf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tanf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x)
			
 
				+{
			
 
				+	if (x < b3Scalar(-1))
			
 
				+		x = b3Scalar(-1);
			
 
				+	if (x > b3Scalar(1))
			
 
				+		x = b3Scalar(1);
			
 
				+	return acosf(x);
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x)
			
 
				+{
			
 
				+	if (x < b3Scalar(-1))
			
 
				+		x = b3Scalar(-1);
			
 
				+	if (x > b3Scalar(1))
			
 
				+		x = b3Scalar(1);
			
 
				+	return asinf(x);
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atanf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2f(x, y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return expf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return logf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x, b3Scalar y) { return powf(x, y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x, b3Scalar y) { return fmodf(x, y); }
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#define B3_2_PI b3Scalar(6.283185307179586232)
			
 
				+#define B3_PI (B3_2_PI * b3Scalar(0.5))
			
 
				+#define B3_HALF_PI (B3_2_PI * b3Scalar(0.25))
			
 
				+#define B3_RADS_PER_DEG (B3_2_PI / b3Scalar(360.0))
			
 
				+#define B3_DEGS_PER_RAD (b3Scalar(360.0) / B3_2_PI)
			
 
				+#define B3_SQRT12 b3Scalar(0.7071067811865475244008443621048490)
			
 
				+
			
 
				+#define b3RecipSqrt(x) ((b3Scalar)(b3Scalar(1.0) / b3Sqrt(b3Scalar(x)))) /* reciprocal square root */
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define B3_EPSILON DBL_EPSILON
			
 
				+#define B3_INFINITY DBL_MAX
			
 
				+#else
			
 
				+#define B3_EPSILON FLT_EPSILON
			
 
				+#define B3_INFINITY FLT_MAX
			
 
				+#endif
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan2Fast(b3Scalar y, b3Scalar x)
			
 
				+{
			
 
				+	b3Scalar coeff_1 = B3_PI / 4.0f;
			
 
				+	b3Scalar coeff_2 = 3.0f * coeff_1;
			
 
				+	b3Scalar abs_y = b3Fabs(y);
			
 
				+	b3Scalar angle;
			
 
				+	if (x >= 0.0f)
			
 
				+	{
			
 
				+		b3Scalar r = (x - abs_y) / (x + abs_y);
			
 
				+		angle = coeff_1 - coeff_1 * r;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3Scalar r = (x + abs_y) / (abs_y - x);
			
 
				+		angle = coeff_2 - coeff_1 * r;
			
 
				+	}
			
 
				+	return (y < 0.0f) ? -angle : angle;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3FuzzyZero(b3Scalar x) { return b3Fabs(x) < B3_EPSILON; }
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3Equal(b3Scalar a, b3Scalar eps)
			
 
				+{
			
 
				+	return (((a) <= eps) && !((a) < -eps));
			
 
				+}
			
 
				+B3_FORCE_INLINE bool b3GreaterEqual(b3Scalar a, b3Scalar eps)
			
 
				+{
			
 
				+	return (!((a) <= eps));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE int b3IsNegative(b3Scalar x)
			
 
				+{
			
 
				+	return x < b3Scalar(0.0) ? 1 : 0;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Radians(b3Scalar x) { return x * B3_RADS_PER_DEG; }
			
 
				+B3_FORCE_INLINE b3Scalar b3Degrees(b3Scalar x) { return x * B3_DEGS_PER_RAD; }
			
 
				+
			
 
				+#define B3_DECLARE_HANDLE(name) \
			
 
				+	typedef struct name##__     \
			
 
				+	{                           \
			
 
				+		int unused;             \
			
 
				+	} * name
			
 
				+
			
 
				+#ifndef b3Fsel
			
 
				+B3_FORCE_INLINE b3Scalar b3Fsel(b3Scalar a, b3Scalar b, b3Scalar c)
			
 
				+{
			
 
				+	return a >= 0 ? b : c;
			
 
				+}
			
 
				+#endif
			
 
				+#define b3Fsels(a, b, c) (b3Scalar) b3Fsel(a, b, c)
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3MachineIsLittleEndian()
			
 
				+{
			
 
				+	long int i = 1;
			
 
				+	const char *p = (const char *)&i;
			
 
				+	if (p[0] == 1)  // Lowest address contains the least significant byte
			
 
				+		return true;
			
 
				+	else
			
 
				+		return false;
			
 
				+}
			
 
				+
			
 
				+///b3Select avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360
			
 
				+///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html
			
 
				+B3_FORCE_INLINE unsigned b3Select(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero)
			
 
				+{
			
 
				+	// Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero
			
 
				+	// Rely on positive value or'ed with its negative having sign bit on
			
 
				+	// and zero value or'ed with its negative (which is still zero) having sign bit off
			
 
				+	// Use arithmetic shift right, shifting the sign bit through all 32 bits
			
 
				+	unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
			
 
				+	unsigned testEqz = ~testNz;
			
 
				+	return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
			
 
				+}
			
 
				+B3_FORCE_INLINE int b3Select(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero)
			
 
				+{
			
 
				+	unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
			
 
				+	unsigned testEqz = ~testNz;
			
 
				+	return static_cast<int>((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
			
 
				+}
			
 
				+B3_FORCE_INLINE float b3Select(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero)
			
 
				+{
			
 
				+#ifdef B3_HAVE_NATIVE_FSEL
			
 
				+	return (float)b3Fsel((b3Scalar)condition - b3Scalar(1.0f), valueIfConditionNonZero, valueIfConditionZero);
			
 
				+#else
			
 
				+	return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+B3_FORCE_INLINE void b3Swap(T &a, T &b)
			
 
				+{
			
 
				+	T tmp = a;
			
 
				+	a = b;
			
 
				+	b = tmp;
			
 
				+}
			
 
				+
			
 
				+//PCK: endian swapping functions
			
 
				+B3_FORCE_INLINE unsigned b3SwapEndian(unsigned val)
			
 
				+{
			
 
				+	return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE unsigned short b3SwapEndian(unsigned short val)
			
 
				+{
			
 
				+	return static_cast<unsigned short>(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE unsigned b3SwapEndian(int val)
			
 
				+{
			
 
				+	return b3SwapEndian((unsigned)val);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE unsigned short b3SwapEndian(short val)
			
 
				+{
			
 
				+	return b3SwapEndian((unsigned short)val);
			
 
				+}
			
 
				+
			
 
				+///b3SwapFloat uses using char pointers to swap the endianness
			
 
				+////b3SwapFloat/b3SwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values
			
 
				+///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754.
			
 
				+///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception.
			
 
				+///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you.
			
 
				+///so instead of returning a float/double, we return integer/long long integer
			
 
				+B3_FORCE_INLINE unsigned int b3SwapEndianFloat(float d)
			
 
				+{
			
 
				+	unsigned int a = 0;
			
 
				+	unsigned char *dst = (unsigned char *)&a;
			
 
				+	unsigned char *src = (unsigned char *)&d;
			
 
				+
			
 
				+	dst[0] = src[3];
			
 
				+	dst[1] = src[2];
			
 
				+	dst[2] = src[1];
			
 
				+	dst[3] = src[0];
			
 
				+	return a;
			
 
				+}
			
 
				+
			
 
				+// unswap using char pointers
			
 
				+B3_FORCE_INLINE float b3UnswapEndianFloat(unsigned int a)
			
 
				+{
			
 
				+	float d = 0.0f;
			
 
				+	unsigned char *src = (unsigned char *)&a;
			
 
				+	unsigned char *dst = (unsigned char *)&d;
			
 
				+
			
 
				+	dst[0] = src[3];
			
 
				+	dst[1] = src[2];
			
 
				+	dst[2] = src[1];
			
 
				+	dst[3] = src[0];
			
 
				+
			
 
				+	return d;
			
 
				+}
			
 
				+
			
 
				+// swap using char pointers
			
 
				+B3_FORCE_INLINE void b3SwapEndianDouble(double d, unsigned char *dst)
			
 
				+{
			
 
				+	unsigned char *src = (unsigned char *)&d;
			
 
				+
			
 
				+	dst[0] = src[7];
			
 
				+	dst[1] = src[6];
			
 
				+	dst[2] = src[5];
			
 
				+	dst[3] = src[4];
			
 
				+	dst[4] = src[3];
			
 
				+	dst[5] = src[2];
			
 
				+	dst[6] = src[1];
			
 
				+	dst[7] = src[0];
			
 
				+}
			
 
				+
			
 
				+// unswap using char pointers
			
 
				+B3_FORCE_INLINE double b3UnswapEndianDouble(const unsigned char *src)
			
 
				+{
			
 
				+	double d = 0.0;
			
 
				+	unsigned char *dst = (unsigned char *)&d;
			
 
				+
			
 
				+	dst[0] = src[7];
			
 
				+	dst[1] = src[6];
			
 
				+	dst[2] = src[5];
			
 
				+	dst[3] = src[4];
			
 
				+	dst[4] = src[3];
			
 
				+	dst[5] = src[2];
			
 
				+	dst[6] = src[1];
			
 
				+	dst[7] = src[0];
			
 
				+
			
 
				+	return d;
			
 
				+}
			
 
				+
			
 
				+// returns normalized value in range [-B3_PI, B3_PI]
			
 
				+B3_FORCE_INLINE b3Scalar b3NormalizeAngle(b3Scalar angleInRadians)
			
 
				+{
			
 
				+	angleInRadians = b3Fmod(angleInRadians, B3_2_PI);
			
 
				+	if (angleInRadians < -B3_PI)
			
 
				+	{
			
 
				+		return angleInRadians + B3_2_PI;
			
 
				+	}
			
 
				+	else if (angleInRadians > B3_PI)
			
 
				+	{
			
 
				+		return angleInRadians - B3_2_PI;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return angleInRadians;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+///rudimentary class to provide type info
			
 
				+struct b3TypedObject
			
 
				+{
			
 
				+	b3TypedObject(int objectType)
			
 
				+		: m_objectType(objectType)
			
 
				+	{
			
 
				+	}
			
 
				+	int m_objectType;
			
 
				+	inline int getObjectType() const
			
 
				+	{
			
 
				+		return m_objectType;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///align a pointer to the provided alignment, upwards
			
 
				+template <typename T>
			
 
				+T *b3AlignPointer(T *unalignedPtr, size_t alignment)
			
 
				+{
			
 
				+	struct b3ConvertPointerSizeT
			
 
				+	{
			
 
				+		union {
			
 
				+			T *ptr;
			
 
				+			size_t integer;
			
 
				+		};
			
 
				+	};
			
 
				+	b3ConvertPointerSizeT converter;
			
 
				+
			
 
				+	const size_t bit_mask = ~(alignment - 1);
			
 
				+	converter.ptr = unalignedPtr;
			
 
				+	converter.integer += alignment - 1;
			
 
				+	converter.integer &= bit_mask;
			
 
				+	return converter.ptr;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_SCALAR_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3StackAlloc.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3StackAlloc.h
@@ -0,0 +1,118 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+/*
			
 
				+StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson
			
 
				+Nov.2006
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_STACK_ALLOC
			
 
				+#define B3_STACK_ALLOC
			
 
				+
			
 
				+#include "b3Scalar.h"  //for b3Assert
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+///The b3Block class is an internal structure for the b3StackAlloc memory allocator.
			
 
				+struct b3Block
			
 
				+{
			
 
				+	b3Block* previous;
			
 
				+	unsigned char* address;
			
 
				+};
			
 
				+
			
 
				+///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out)
			
 
				+class b3StackAlloc
			
 
				+{
			
 
				+public:
			
 
				+	b3StackAlloc(unsigned int size)
			
 
				+	{
			
 
				+		ctor();
			
 
				+		create(size);
			
 
				+	}
			
 
				+	~b3StackAlloc() { destroy(); }
			
 
				+
			
 
				+	inline void create(unsigned int size)
			
 
				+	{
			
 
				+		destroy();
			
 
				+		data = (unsigned char*)b3AlignedAlloc(size, 16);
			
 
				+		totalsize = size;
			
 
				+	}
			
 
				+	inline void destroy()
			
 
				+	{
			
 
				+		b3Assert(usedsize == 0);
			
 
				+		//Raise(L"StackAlloc is still in use");
			
 
				+
			
 
				+		if (usedsize == 0)
			
 
				+		{
			
 
				+			if (!ischild && data)
			
 
				+				b3AlignedFree(data);
			
 
				+
			
 
				+			data = 0;
			
 
				+			usedsize = 0;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int getAvailableMemory() const
			
 
				+	{
			
 
				+		return static_cast<int>(totalsize - usedsize);
			
 
				+	}
			
 
				+
			
 
				+	unsigned char* allocate(unsigned int size)
			
 
				+	{
			
 
				+		const unsigned int nus(usedsize + size);
			
 
				+		if (nus < totalsize)
			
 
				+		{
			
 
				+			usedsize = nus;
			
 
				+			return (data + (usedsize - size));
			
 
				+		}
			
 
				+		b3Assert(0);
			
 
				+		//&& (L"Not enough memory"));
			
 
				+
			
 
				+		return (0);
			
 
				+	}
			
 
				+	B3_FORCE_INLINE b3Block* beginBlock()
			
 
				+	{
			
 
				+		b3Block* pb = (b3Block*)allocate(sizeof(b3Block));
			
 
				+		pb->previous = current;
			
 
				+		pb->address = data + usedsize;
			
 
				+		current = pb;
			
 
				+		return (pb);
			
 
				+	}
			
 
				+	B3_FORCE_INLINE void endBlock(b3Block* block)
			
 
				+	{
			
 
				+		b3Assert(block == current);
			
 
				+		//Raise(L"Unmatched blocks");
			
 
				+		if (block == current)
			
 
				+		{
			
 
				+			current = block->previous;
			
 
				+			usedsize = (unsigned int)((block->address - data) - sizeof(b3Block));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+private:
			
 
				+	void ctor()
			
 
				+	{
			
 
				+		data = 0;
			
 
				+		totalsize = 0;
			
 
				+		usedsize = 0;
			
 
				+		current = 0;
			
 
				+		ischild = false;
			
 
				+	}
			
 
				+	unsigned char* data;
			
 
				+	unsigned int totalsize;
			
 
				+	unsigned int usedsize;
			
 
				+	b3Block* current;
			
 
				+	bool ischild;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_STACK_ALLOC
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Transform.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Transform.h
@@ -0,0 +1,286 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_TRANSFORM_H
			
 
				+#define B3_TRANSFORM_H
			
 
				+
			
 
				+#include "b3Matrix3x3.h"
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3TransformData b3TransformDoubleData
			
 
				+#else
			
 
				+#define b3TransformData b3TransformFloatData
			
 
				+#endif
			
 
				+
			
 
				+/**@brief The b3Transform class supports rigid transforms with only translation and rotation and no scaling/shear.
			
 
				+ *It can be used in combination with b3Vector3, b3Quaternion and b3Matrix3x3 linear algebra classes. */
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3Transform
			
 
				+{
			
 
				+	///Storage for the rotation
			
 
				+	b3Matrix3x3 m_basis;
			
 
				+	///Storage for the translation
			
 
				+	b3Vector3 m_origin;
			
 
				+
			
 
				+public:
			
 
				+	/**@brief No initialization constructor */
			
 
				+	b3Transform() {}
			
 
				+	/**@brief Constructor from b3Quaternion (optional b3Vector3 )
			
 
				+   * @param q Rotation from quaternion 
			
 
				+   * @param c Translation from Vector (default 0,0,0) */
			
 
				+	explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q,
			
 
				+										 const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
			
 
				+		: m_basis(q),
			
 
				+		  m_origin(c)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Constructor from b3Matrix3x3 (optional b3Vector3)
			
 
				+   * @param b Rotation from Matrix 
			
 
				+   * @param c Translation from Vector default (0,0,0)*/
			
 
				+	explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b,
			
 
				+										 const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
			
 
				+		: m_basis(b),
			
 
				+		  m_origin(c)
			
 
				+	{
			
 
				+	}
			
 
				+	/**@brief Copy constructor */
			
 
				+	B3_FORCE_INLINE b3Transform(const b3Transform& other)
			
 
				+		: m_basis(other.m_basis),
			
 
				+		  m_origin(other.m_origin)
			
 
				+	{
			
 
				+	}
			
 
				+	/**@brief Assignment Operator */
			
 
				+	B3_FORCE_INLINE b3Transform& operator=(const b3Transform& other)
			
 
				+	{
			
 
				+		m_basis = other.m_basis;
			
 
				+		m_origin = other.m_origin;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set the current transform as the value of the product of two transforms
			
 
				+   * @param t1 Transform 1
			
 
				+   * @param t2 Transform 2
			
 
				+   * This = Transform1 * Transform2 */
			
 
				+	B3_FORCE_INLINE void mult(const b3Transform& t1, const b3Transform& t2)
			
 
				+	{
			
 
				+		m_basis = t1.m_basis * t2.m_basis;
			
 
				+		m_origin = t1(t2.m_origin);
			
 
				+	}
			
 
				+
			
 
				+	/*		void multInverseLeft(const b3Transform& t1, const b3Transform& t2) {
			
 
				+			b3Vector3 v = t2.m_origin - t1.m_origin;
			
 
				+			m_basis = b3MultTransposeLeft(t1.m_basis, t2.m_basis);
			
 
				+			m_origin = v * t1.m_basis;
			
 
				+		}
			
 
				+		*/
			
 
				+
			
 
				+	/**@brief Return the transform of the vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 operator()(const b3Vector3& x) const
			
 
				+	{
			
 
				+		return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the transform of the vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& x) const
			
 
				+	{
			
 
				+		return (*this)(x);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the transform of the b3Quaternion */
			
 
				+	B3_FORCE_INLINE b3Quaternion operator*(const b3Quaternion& q) const
			
 
				+	{
			
 
				+		return getRotation() * q;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the basis matrix for the rotation */
			
 
				+	B3_FORCE_INLINE b3Matrix3x3& getBasis() { return m_basis; }
			
 
				+	/**@brief Return the basis matrix for the rotation */
			
 
				+	B3_FORCE_INLINE const b3Matrix3x3& getBasis() const { return m_basis; }
			
 
				+
			
 
				+	/**@brief Return the origin vector translation */
			
 
				+	B3_FORCE_INLINE b3Vector3& getOrigin() { return m_origin; }
			
 
				+	/**@brief Return the origin vector translation */
			
 
				+	B3_FORCE_INLINE const b3Vector3& getOrigin() const { return m_origin; }
			
 
				+
			
 
				+	/**@brief Return a quaternion representing the rotation */
			
 
				+	b3Quaternion getRotation() const
			
 
				+	{
			
 
				+		b3Quaternion q;
			
 
				+		m_basis.getRotation(q);
			
 
				+		return q;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set from an array 
			
 
				+   * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
			
 
				+	void setFromOpenGLMatrix(const b3Scalar* m)
			
 
				+	{
			
 
				+		m_basis.setFromOpenGLSubMatrix(m);
			
 
				+		m_origin.setValue(m[12], m[13], m[14]);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Fill an array representation
			
 
				+   * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
			
 
				+	void getOpenGLMatrix(b3Scalar * m) const
			
 
				+	{
			
 
				+		m_basis.getOpenGLSubMatrix(m);
			
 
				+		m[12] = m_origin.getX();
			
 
				+		m[13] = m_origin.getY();
			
 
				+		m[14] = m_origin.getZ();
			
 
				+		m[15] = b3Scalar(1.0);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set the translational element
			
 
				+   * @param origin The vector to set the translation to */
			
 
				+	B3_FORCE_INLINE void setOrigin(const b3Vector3& origin)
			
 
				+	{
			
 
				+		m_origin = origin;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Vector3 invXform(const b3Vector3& inVec) const;
			
 
				+
			
 
				+	/**@brief Set the rotational element by b3Matrix3x3 */
			
 
				+	B3_FORCE_INLINE void setBasis(const b3Matrix3x3& basis)
			
 
				+	{
			
 
				+		m_basis = basis;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set the rotational element by b3Quaternion */
			
 
				+	B3_FORCE_INLINE void setRotation(const b3Quaternion& q)
			
 
				+	{
			
 
				+		m_basis.setRotation(q);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set this transformation to the identity */
			
 
				+	void setIdentity()
			
 
				+	{
			
 
				+		m_basis.setIdentity();
			
 
				+		m_origin.setValue(b3Scalar(0.0), b3Scalar(0.0), b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Multiply this Transform by another(this = this * another) 
			
 
				+   * @param t The other transform */
			
 
				+	b3Transform& operator*=(const b3Transform& t)
			
 
				+	{
			
 
				+		m_origin += m_basis * t.m_origin;
			
 
				+		m_basis *= t.m_basis;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the inverse of this transform */
			
 
				+	b3Transform inverse() const
			
 
				+	{
			
 
				+		b3Matrix3x3 inv = m_basis.transpose();
			
 
				+		return b3Transform(inv, inv * -m_origin);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the inverse of this transform times the other transform
			
 
				+   * @param t The other transform 
			
 
				+   * return this.inverse() * the other */
			
 
				+	b3Transform inverseTimes(const b3Transform& t) const;
			
 
				+
			
 
				+	/**@brief Return the product of this transform and the other */
			
 
				+	b3Transform operator*(const b3Transform& t) const;
			
 
				+
			
 
				+	/**@brief Return an identity transform */
			
 
				+	static const b3Transform& getIdentity()
			
 
				+	{
			
 
				+		static const b3Transform identityTransform(b3Matrix3x3::getIdentity());
			
 
				+		return identityTransform;
			
 
				+	}
			
 
				+
			
 
				+	void serialize(struct b3TransformData & dataOut) const;
			
 
				+
			
 
				+	void serializeFloat(struct b3TransformFloatData & dataOut) const;
			
 
				+
			
 
				+	void deSerialize(const struct b3TransformData& dataIn);
			
 
				+
			
 
				+	void deSerializeDouble(const struct b3TransformDoubleData& dataIn);
			
 
				+
			
 
				+	void deSerializeFloat(const struct b3TransformFloatData& dataIn);
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+b3Transform::invXform(const b3Vector3& inVec) const
			
 
				+{
			
 
				+	b3Vector3 v = inVec - m_origin;
			
 
				+	return (m_basis.transpose() * v);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Transform
			
 
				+b3Transform::inverseTimes(const b3Transform& t) const
			
 
				+{
			
 
				+	b3Vector3 v = t.getOrigin() - m_origin;
			
 
				+	return b3Transform(m_basis.transposeTimes(t.m_basis),
			
 
				+					   v * m_basis);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Transform
			
 
				+	b3Transform::operator*(const b3Transform& t) const
			
 
				+{
			
 
				+	return b3Transform(m_basis * t.m_basis,
			
 
				+					   (*this)(t.m_origin));
			
 
				+}
			
 
				+
			
 
				+/**@brief Test if two transforms have all elements equal */
			
 
				+B3_FORCE_INLINE bool operator==(const b3Transform& t1, const b3Transform& t2)
			
 
				+{
			
 
				+	return (t1.getBasis() == t2.getBasis() &&
			
 
				+			t1.getOrigin() == t2.getOrigin());
			
 
				+}
			
 
				+
			
 
				+///for serialization
			
 
				+struct b3TransformFloatData
			
 
				+{
			
 
				+	b3Matrix3x3FloatData m_basis;
			
 
				+	b3Vector3FloatData m_origin;
			
 
				+};
			
 
				+
			
 
				+struct b3TransformDoubleData
			
 
				+{
			
 
				+	b3Matrix3x3DoubleData m_basis;
			
 
				+	b3Vector3DoubleData m_origin;
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Transform::serialize(b3TransformData& dataOut) const
			
 
				+{
			
 
				+	m_basis.serialize(dataOut.m_basis);
			
 
				+	m_origin.serialize(dataOut.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Transform::serializeFloat(b3TransformFloatData& dataOut) const
			
 
				+{
			
 
				+	m_basis.serializeFloat(dataOut.m_basis);
			
 
				+	m_origin.serializeFloat(dataOut.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Transform::deSerialize(const b3TransformData& dataIn)
			
 
				+{
			
 
				+	m_basis.deSerialize(dataIn.m_basis);
			
 
				+	m_origin.deSerialize(dataIn.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Transform::deSerializeFloat(const b3TransformFloatData& dataIn)
			
 
				+{
			
 
				+	m_basis.deSerializeFloat(dataIn.m_basis);
			
 
				+	m_origin.deSerializeFloat(dataIn.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Transform::deSerializeDouble(const b3TransformDoubleData& dataIn)
			
 
				+{
			
 
				+	m_basis.deSerializeDouble(dataIn.m_basis);
			
 
				+	m_origin.deSerializeDouble(dataIn.m_origin);
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_TRANSFORM_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3TransformUtil.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3TransformUtil.h
@@ -0,0 +1,210 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_TRANSFORM_UTIL_H
			
 
				+#define B3_TRANSFORM_UTIL_H
			
 
				+
			
 
				+#include "b3Transform.h"
			
 
				+#define B3_ANGULAR_MOTION_THRESHOLD b3Scalar(0.5) * B3_HALF_PI
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents, const b3Vector3& supportDir)
			
 
				+{
			
 
				+	return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(),
			
 
				+						 supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(),
			
 
				+						 supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ());
			
 
				+}
			
 
				+
			
 
				+/// Utils related to temporal transforms
			
 
				+class b3TransformUtil
			
 
				+{
			
 
				+public:
			
 
				+	static void integrateTransform(const b3Transform& curTrans, const b3Vector3& linvel, const b3Vector3& angvel, b3Scalar timeStep, b3Transform& predictedTransform)
			
 
				+	{
			
 
				+		predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep);
			
 
				+		//	#define QUATERNION_DERIVATIVE
			
 
				+#ifdef QUATERNION_DERIVATIVE
			
 
				+		b3Quaternion predictedOrn = curTrans.getRotation();
			
 
				+		predictedOrn += (angvel * predictedOrn) * (timeStep * b3Scalar(0.5));
			
 
				+		predictedOrn.normalize();
			
 
				+#else
			
 
				+		//Exponential map
			
 
				+		//google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia
			
 
				+
			
 
				+		b3Vector3 axis;
			
 
				+		b3Scalar fAngle = angvel.length();
			
 
				+		//limit the angular motion
			
 
				+		if (fAngle * timeStep > B3_ANGULAR_MOTION_THRESHOLD)
			
 
				+		{
			
 
				+			fAngle = B3_ANGULAR_MOTION_THRESHOLD / timeStep;
			
 
				+		}
			
 
				+
			
 
				+		if (fAngle < b3Scalar(0.001))
			
 
				+		{
			
 
				+			// use Taylor's expansions of sync function
			
 
				+			axis = angvel * (b3Scalar(0.5) * timeStep - (timeStep * timeStep * timeStep) * (b3Scalar(0.020833333333)) * fAngle * fAngle);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// sync(fAngle) = sin(c*fAngle)/t
			
 
				+			axis = angvel * (b3Sin(b3Scalar(0.5) * fAngle * timeStep) / fAngle);
			
 
				+		}
			
 
				+		b3Quaternion dorn(axis.getX(), axis.getY(), axis.getZ(), b3Cos(fAngle * timeStep * b3Scalar(0.5)));
			
 
				+		b3Quaternion orn0 = curTrans.getRotation();
			
 
				+
			
 
				+		b3Quaternion predictedOrn = dorn * orn0;
			
 
				+		predictedOrn.normalize();
			
 
				+#endif
			
 
				+		predictedTransform.setRotation(predictedOrn);
			
 
				+	}
			
 
				+
			
 
				+	static void calculateVelocityQuaternion(const b3Vector3& pos0, const b3Vector3& pos1, const b3Quaternion& orn0, const b3Quaternion& orn1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel)
			
 
				+	{
			
 
				+		linVel = (pos1 - pos0) / timeStep;
			
 
				+		b3Vector3 axis;
			
 
				+		b3Scalar angle;
			
 
				+		if (orn0 != orn1)
			
 
				+		{
			
 
				+			calculateDiffAxisAngleQuaternion(orn0, orn1, axis, angle);
			
 
				+			angVel = axis * angle / timeStep;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			angVel.setValue(0, 0, 0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0, const b3Quaternion& orn1a, b3Vector3& axis, b3Scalar& angle)
			
 
				+	{
			
 
				+		b3Quaternion orn1 = orn0.nearest(orn1a);
			
 
				+		b3Quaternion dorn = orn1 * orn0.inverse();
			
 
				+		angle = dorn.getAngle();
			
 
				+		axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ());
			
 
				+		axis[3] = b3Scalar(0.);
			
 
				+		//check for axis length
			
 
				+		b3Scalar len = axis.length2();
			
 
				+		if (len < B3_EPSILON * B3_EPSILON)
			
 
				+			axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.));
			
 
				+		else
			
 
				+			axis /= b3Sqrt(len);
			
 
				+	}
			
 
				+
			
 
				+	static void calculateVelocity(const b3Transform& transform0, const b3Transform& transform1, b3Scalar timeStep, b3Vector3& linVel, b3Vector3& angVel)
			
 
				+	{
			
 
				+		linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep;
			
 
				+		b3Vector3 axis;
			
 
				+		b3Scalar angle;
			
 
				+		calculateDiffAxisAngle(transform0, transform1, axis, angle);
			
 
				+		angVel = axis * angle / timeStep;
			
 
				+	}
			
 
				+
			
 
				+	static void calculateDiffAxisAngle(const b3Transform& transform0, const b3Transform& transform1, b3Vector3& axis, b3Scalar& angle)
			
 
				+	{
			
 
				+		b3Matrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse();
			
 
				+		b3Quaternion dorn;
			
 
				+		dmat.getRotation(dorn);
			
 
				+
			
 
				+		///floating point inaccuracy can lead to w component > 1..., which breaks
			
 
				+		dorn.normalize();
			
 
				+
			
 
				+		angle = dorn.getAngle();
			
 
				+		axis = b3MakeVector3(dorn.getX(), dorn.getY(), dorn.getZ());
			
 
				+		axis[3] = b3Scalar(0.);
			
 
				+		//check for axis length
			
 
				+		b3Scalar len = axis.length2();
			
 
				+		if (len < B3_EPSILON * B3_EPSILON)
			
 
				+			axis = b3MakeVector3(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.));
			
 
				+		else
			
 
				+			axis /= b3Sqrt(len);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///The b3ConvexSeparatingDistanceUtil can help speed up convex collision detection
			
 
				+///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance
			
 
				+class b3ConvexSeparatingDistanceUtil
			
 
				+{
			
 
				+	b3Quaternion m_ornA;
			
 
				+	b3Quaternion m_ornB;
			
 
				+	b3Vector3 m_posA;
			
 
				+	b3Vector3 m_posB;
			
 
				+
			
 
				+	b3Vector3 m_separatingNormal;
			
 
				+
			
 
				+	b3Scalar m_boundingRadiusA;
			
 
				+	b3Scalar m_boundingRadiusB;
			
 
				+	b3Scalar m_separatingDistance;
			
 
				+
			
 
				+public:
			
 
				+	b3ConvexSeparatingDistanceUtil(b3Scalar boundingRadiusA, b3Scalar boundingRadiusB)
			
 
				+		: m_boundingRadiusA(boundingRadiusA),
			
 
				+		  m_boundingRadiusB(boundingRadiusB),
			
 
				+		  m_separatingDistance(0.f)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar getConservativeSeparatingDistance()
			
 
				+	{
			
 
				+		return m_separatingDistance;
			
 
				+	}
			
 
				+
			
 
				+	void updateSeparatingDistance(const b3Transform& transA, const b3Transform& transB)
			
 
				+	{
			
 
				+		const b3Vector3& toPosA = transA.getOrigin();
			
 
				+		const b3Vector3& toPosB = transB.getOrigin();
			
 
				+		b3Quaternion toOrnA = transA.getRotation();
			
 
				+		b3Quaternion toOrnB = transB.getRotation();
			
 
				+
			
 
				+		if (m_separatingDistance > 0.f)
			
 
				+		{
			
 
				+			b3Vector3 linVelA, angVelA, linVelB, angVelB;
			
 
				+			b3TransformUtil::calculateVelocityQuaternion(m_posA, toPosA, m_ornA, toOrnA, b3Scalar(1.), linVelA, angVelA);
			
 
				+			b3TransformUtil::calculateVelocityQuaternion(m_posB, toPosB, m_ornB, toOrnB, b3Scalar(1.), linVelB, angVelB);
			
 
				+			b3Scalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB;
			
 
				+			b3Vector3 relLinVel = (linVelB - linVelA);
			
 
				+			b3Scalar relLinVelocLength = relLinVel.dot(m_separatingNormal);
			
 
				+			if (relLinVelocLength < 0.f)
			
 
				+			{
			
 
				+				relLinVelocLength = 0.f;
			
 
				+			}
			
 
				+
			
 
				+			b3Scalar projectedMotion = maxAngularProjectedVelocity + relLinVelocLength;
			
 
				+			m_separatingDistance -= projectedMotion;
			
 
				+		}
			
 
				+
			
 
				+		m_posA = toPosA;
			
 
				+		m_posB = toPosB;
			
 
				+		m_ornA = toOrnA;
			
 
				+		m_ornB = toOrnB;
			
 
				+	}
			
 
				+
			
 
				+	void initSeparatingDistance(const b3Vector3& separatingVector, b3Scalar separatingDistance, const b3Transform& transA, const b3Transform& transB)
			
 
				+	{
			
 
				+		m_separatingDistance = separatingDistance;
			
 
				+
			
 
				+		if (m_separatingDistance > 0.f)
			
 
				+		{
			
 
				+			m_separatingNormal = separatingVector;
			
 
				+
			
 
				+			const b3Vector3& toPosA = transA.getOrigin();
			
 
				+			const b3Vector3& toPosB = transB.getOrigin();
			
 
				+			b3Quaternion toOrnA = transA.getRotation();
			
 
				+			b3Quaternion toOrnB = transB.getRotation();
			
 
				+			m_posA = toPosA;
			
 
				+			m_posB = toPosB;
			
 
				+			m_ornA = toOrnA;
			
 
				+			m_ornB = toOrnB;
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_TRANSFORM_UTIL_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Vector3.cpp
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Vector3.cpp
@@ -0,0 +1,1637 @@
 
				+/*
			
 
				+ Copyright (c) 2011-213 Apple Inc. http://bulletphysics.org
			
 
				+
			
 
				+ This software is provided 'as-is', without any express or implied warranty.
			
 
				+ In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+ Permission is granted to anyone to use this software for any purpose,
			
 
				+ including commercial applications, and to alter it and redistribute it freely,
			
 
				+ subject to the following restrictions:
			
 
				+
			
 
				+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+ 3. This notice may not be removed or altered from any source distribution.
			
 
				+
			
 
				+ This source version has been altered.
			
 
				+ */
			
 
				+
			
 
				+#if defined(_WIN32) || defined(__i386__)
			
 
				+#define B3_USE_SSE_IN_API
			
 
				+#endif
			
 
				+
			
 
				+#include "b3Vector3.h"
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+
			
 
				+#ifdef __APPLE__
			
 
				+#include <stdint.h>
			
 
				+typedef float float4 __attribute__((vector_size(16)));
			
 
				+#else
			
 
				+#define float4 __m128
			
 
				+#endif
			
 
				+//typedef  uint32_t uint4 __attribute__ ((vector_size(16)));
			
 
				+
			
 
				+#if defined B3_USE_SSE || defined _WIN32
			
 
				+
			
 
				+#define LOG2_ARRAY_SIZE 6
			
 
				+#define STACK_ARRAY_COUNT (1UL << LOG2_ARRAY_SIZE)
			
 
				+
			
 
				+#include <emmintrin.h>
			
 
				+
			
 
				+long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+long b3_maxdot_large(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	const float4 *vertices = (const float4 *)vv;
			
 
				+	static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
			
 
				+	float4 dotMax = b3Assign128(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY);
			
 
				+	float4 vvec = _mm_loadu_ps(vec);
			
 
				+	float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa));  /// zzzz
			
 
				+	float4 vLo = _mm_movelh_ps(vvec, vvec);                                    /// xyxy
			
 
				+
			
 
				+	long maxIndex = -1L;
			
 
				+
			
 
				+	size_t segment = 0;
			
 
				+	float4 stack_array[STACK_ARRAY_COUNT];
			
 
				+
			
 
				+#if DEBUG
			
 
				+	// memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) );
			
 
				+#endif
			
 
				+
			
 
				+	size_t index;
			
 
				+	float4 max;
			
 
				+	// Faster loop without cleanup code for full tiles
			
 
				+	for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4)
			
 
				+	{
			
 
				+		max = dotMax;
			
 
				+
			
 
				+		for (index = 0; index < STACK_ARRAY_COUNT; index += 4)
			
 
				+		{  // do four dot products at a time. Carefully avoid touching the w element.
			
 
				+			float4 v0 = vertices[0];
			
 
				+			float4 v1 = vertices[1];
			
 
				+			float4 v2 = vertices[2];
			
 
				+			float4 v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			float4 lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			float4 hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			float4 lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			float4 hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 1] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 2] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 3] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			// It is too costly to keep the index of the max here. We will look for it again later.  We save a lot of work this way.
			
 
				+		}
			
 
				+
			
 
				+		// If we found a new max
			
 
				+		if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax)))
			
 
				+		{
			
 
				+			// copy the new max across all lanes of our max accumulator
			
 
				+			max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e));
			
 
				+			max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1));
			
 
				+
			
 
				+			dotMax = max;
			
 
				+
			
 
				+			// find first occurrence of that max
			
 
				+			size_t test;
			
 
				+			for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++)  // local_count must be a multiple of 4
			
 
				+			{
			
 
				+			}
			
 
				+			// record where it is.
			
 
				+			maxIndex = 4 * index + segment + indexTable[test];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// account for work we've already done
			
 
				+	count -= segment;
			
 
				+
			
 
				+	// Deal with the last < STACK_ARRAY_COUNT vectors
			
 
				+	max = dotMax;
			
 
				+	index = 0;
			
 
				+
			
 
				+	if (b3Unlikely(count > 16))
			
 
				+	{
			
 
				+		for (; index + 4 <= count / 4; index += 4)
			
 
				+		{  // do four dot products at a time. Carefully avoid touching the w element.
			
 
				+			float4 v0 = vertices[0];
			
 
				+			float4 v1 = vertices[1];
			
 
				+			float4 v2 = vertices[2];
			
 
				+			float4 v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			float4 lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			float4 hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			float4 lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			float4 hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 1] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 2] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 3] = x;
			
 
				+			max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+
			
 
				+			// It is too costly to keep the index of the max here. We will look for it again later.  We save a lot of work this way.
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	size_t localCount = (count & -4L) - 4 * index;
			
 
				+	if (localCount)
			
 
				+	{
			
 
				+#ifdef __APPLE__
			
 
				+		float4 t0, t1, t2, t3, t4;
			
 
				+		float4 *sap = &stack_array[index + localCount / 4];
			
 
				+		vertices += localCount;  // counter the offset
			
 
				+		size_t byteIndex = -(localCount) * sizeof(float);
			
 
				+		//AT&T Code style assembly
			
 
				+		asm volatile(
			
 
				+			".align 4                                                                   \n\
			
 
				+             0: movaps  %[max], %[t2]                            // move max out of the way to avoid propagating NaNs in max \n\
			
 
				+          movaps  (%[vertices], %[byteIndex], 4),    %[t0]    // vertices[0]      \n\
			
 
				+          movaps  16(%[vertices], %[byteIndex], 4),  %[t1]    // vertices[1]      \n\
			
 
				+          movaps  %[t0], %[max]                               // vertices[0]      \n\
			
 
				+          movlhps %[t1], %[max]                               // x0y0x1y1         \n\
			
 
				+         movaps  32(%[vertices], %[byteIndex], 4),  %[t3]    // vertices[2]      \n\
			
 
				+         movaps  48(%[vertices], %[byteIndex], 4),  %[t4]    // vertices[3]      \n\
			
 
				+          mulps   %[vLo], %[max]                              // x0y0x1y1 * vLo   \n\
			
 
				+         movhlps %[t0], %[t1]                                // z0w0z1w1         \n\
			
 
				+         movaps  %[t3], %[t0]                                // vertices[2]      \n\
			
 
				+         movlhps %[t4], %[t0]                                // x2y2x3y3         \n\
			
 
				+         mulps   %[vLo], %[t0]                               // x2y2x3y3 * vLo   \n\
			
 
				+          movhlps %[t3], %[t4]                                // z2w2z3w3         \n\
			
 
				+          shufps  $0x88, %[t4], %[t1]                         // z0z1z2z3         \n\
			
 
				+          mulps   %[vHi], %[t1]                               // z0z1z2z3 * vHi   \n\
			
 
				+         movaps  %[max], %[t3]                               // x0y0x1y1 * vLo   \n\
			
 
				+         shufps  $0x88, %[t0], %[max]                        // x0x1x2x3 * vLo.x \n\
			
 
				+         shufps  $0xdd, %[t0], %[t3]                         // y0y1y2y3 * vLo.y \n\
			
 
				+         addps   %[t3], %[max]                               // x + y            \n\
			
 
				+         addps   %[t1], %[max]                               // x + y + z        \n\
			
 
				+         movaps  %[max], (%[sap], %[byteIndex])              // record result for later scrutiny \n\
			
 
				+         maxps   %[t2], %[max]                               // record max, restore max   \n\
			
 
				+         add     $16, %[byteIndex]                           // advance loop counter\n\
			
 
				+         jnz     0b                                          \n\
			
 
				+     "
			
 
				+			: [max] "+x"(max), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex)
			
 
				+			: [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap)
			
 
				+			: "memory", "cc");
			
 
				+		index += localCount / 4;
			
 
				+#else
			
 
				+		{
			
 
				+			for (unsigned int i = 0; i < localCount / 4; i++, index++)
			
 
				+			{  // do four dot products at a time. Carefully avoid touching the w element.
			
 
				+				float4 v0 = vertices[0];
			
 
				+				float4 v1 = vertices[1];
			
 
				+				float4 v2 = vertices[2];
			
 
				+				float4 v3 = vertices[3];
			
 
				+				vertices += 4;
			
 
				+
			
 
				+				float4 lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+				float4 hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+				float4 lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+				float4 hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+				lo0 = lo0 * vLo;
			
 
				+				lo1 = lo1 * vLo;
			
 
				+				float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+				float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+				float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+				z = z * vHi;
			
 
				+				x = x + y;
			
 
				+				x = x + z;
			
 
				+				stack_array[index] = x;
			
 
				+				max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+			}
			
 
				+		}
			
 
				+#endif  //__APPLE__
			
 
				+	}
			
 
				+
			
 
				+	// process the last few points
			
 
				+	if (count & 3)
			
 
				+	{
			
 
				+		float4 v0, v1, v2, x, y, z;
			
 
				+		switch (count & 3)
			
 
				+		{
			
 
				+			case 3:
			
 
				+			{
			
 
				+				v0 = vertices[0];
			
 
				+				v1 = vertices[1];
			
 
				+				v2 = vertices[2];
			
 
				+
			
 
				+				// Calculate 3 dot products, transpose, duplicate v2
			
 
				+				float4 lo0 = _mm_movelh_ps(v0, v1);  // xyxy.lo
			
 
				+				float4 hi0 = _mm_movehl_ps(v1, v0);  // z?z?.lo
			
 
				+				lo0 = lo0 * vLo;
			
 
				+				z = _mm_shuffle_ps(hi0, v2, 0xa8);  // z0z1z2z2
			
 
				+				z = z * vHi;
			
 
				+				float4 lo1 = _mm_movelh_ps(v2, v2);  // xyxy
			
 
				+				lo1 = lo1 * vLo;
			
 
				+				x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+				y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			}
			
 
				+			break;
			
 
				+			case 2:
			
 
				+			{
			
 
				+				v0 = vertices[0];
			
 
				+				v1 = vertices[1];
			
 
				+				float4 xy = _mm_movelh_ps(v0, v1);
			
 
				+				z = _mm_movehl_ps(v1, v0);
			
 
				+				xy = xy * vLo;
			
 
				+				z = _mm_shuffle_ps(z, z, 0xa8);
			
 
				+				x = _mm_shuffle_ps(xy, xy, 0xa8);
			
 
				+				y = _mm_shuffle_ps(xy, xy, 0xfd);
			
 
				+				z = z * vHi;
			
 
				+			}
			
 
				+			break;
			
 
				+			case 1:
			
 
				+			{
			
 
				+				float4 xy = vertices[0];
			
 
				+				z = _mm_shuffle_ps(xy, xy, 0xaa);
			
 
				+				xy = xy * vLo;
			
 
				+				z = z * vHi;
			
 
				+				x = _mm_shuffle_ps(xy, xy, 0);
			
 
				+				y = _mm_shuffle_ps(xy, xy, 0x55);
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		x = x + y;
			
 
				+		x = x + z;
			
 
				+		stack_array[index] = x;
			
 
				+		max = _mm_max_ps(x, max);  // control the order here so that max is never NaN even if x is nan
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	// if we found a new max.
			
 
				+	if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(max, dotMax)))
			
 
				+	{  // we found a new max. Search for it
			
 
				+		// find max across the max vector, place in all elements of max -- big latency hit here
			
 
				+		max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0x4e));
			
 
				+		max = _mm_max_ps(max, (float4)_mm_shuffle_ps(max, max, 0xb1));
			
 
				+
			
 
				+		// It is slightly faster to do this part in scalar code when count < 8. However, the common case for
			
 
				+		// this where it actually makes a difference is handled in the early out at the top of the function,
			
 
				+		// so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced
			
 
				+		// complexity, and removed it.
			
 
				+
			
 
				+		dotMax = max;
			
 
				+
			
 
				+		// scan for the first occurence of max in the array
			
 
				+		size_t test;
			
 
				+		for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], max))); index++)  // local_count must be a multiple of 4
			
 
				+		{
			
 
				+		}
			
 
				+		maxIndex = 4 * index + segment + indexTable[test];
			
 
				+	}
			
 
				+
			
 
				+	_mm_store_ss(dotResult, dotMax);
			
 
				+	return maxIndex;
			
 
				+}
			
 
				+
			
 
				+long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+
			
 
				+long b3_mindot_large(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	const float4 *vertices = (const float4 *)vv;
			
 
				+	static const unsigned char indexTable[16] = {(unsigned char)-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
			
 
				+
			
 
				+	float4 dotmin = b3Assign128(B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY);
			
 
				+	float4 vvec = _mm_loadu_ps(vec);
			
 
				+	float4 vHi = b3CastiTo128f(_mm_shuffle_epi32(b3CastfTo128i(vvec), 0xaa));  /// zzzz
			
 
				+	float4 vLo = _mm_movelh_ps(vvec, vvec);                                    /// xyxy
			
 
				+
			
 
				+	long minIndex = -1L;
			
 
				+
			
 
				+	size_t segment = 0;
			
 
				+	float4 stack_array[STACK_ARRAY_COUNT];
			
 
				+
			
 
				+#if DEBUG
			
 
				+	// memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) );
			
 
				+#endif
			
 
				+
			
 
				+	size_t index;
			
 
				+	float4 min;
			
 
				+	// Faster loop without cleanup code for full tiles
			
 
				+	for (segment = 0; segment + STACK_ARRAY_COUNT * 4 <= count; segment += STACK_ARRAY_COUNT * 4)
			
 
				+	{
			
 
				+		min = dotmin;
			
 
				+
			
 
				+		for (index = 0; index < STACK_ARRAY_COUNT; index += 4)
			
 
				+		{  // do four dot products at a time. Carefully avoid touching the w element.
			
 
				+			float4 v0 = vertices[0];
			
 
				+			float4 v1 = vertices[1];
			
 
				+			float4 v2 = vertices[2];
			
 
				+			float4 v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			float4 lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			float4 hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			float4 lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			float4 hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 1] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 2] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 3] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			// It is too costly to keep the index of the min here. We will look for it again later.  We save a lot of work this way.
			
 
				+		}
			
 
				+
			
 
				+		// If we found a new min
			
 
				+		if (0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin)))
			
 
				+		{
			
 
				+			// copy the new min across all lanes of our min accumulator
			
 
				+			min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e));
			
 
				+			min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1));
			
 
				+
			
 
				+			dotmin = min;
			
 
				+
			
 
				+			// find first occurrence of that min
			
 
				+			size_t test;
			
 
				+			for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++)  // local_count must be a multiple of 4
			
 
				+			{
			
 
				+			}
			
 
				+			// record where it is.
			
 
				+			minIndex = 4 * index + segment + indexTable[test];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// account for work we've already done
			
 
				+	count -= segment;
			
 
				+
			
 
				+	// Deal with the last < STACK_ARRAY_COUNT vectors
			
 
				+	min = dotmin;
			
 
				+	index = 0;
			
 
				+
			
 
				+	if (b3Unlikely(count > 16))
			
 
				+	{
			
 
				+		for (; index + 4 <= count / 4; index += 4)
			
 
				+		{  // do four dot products at a time. Carefully avoid touching the w element.
			
 
				+			float4 v0 = vertices[0];
			
 
				+			float4 v1 = vertices[1];
			
 
				+			float4 v2 = vertices[2];
			
 
				+			float4 v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			float4 lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			float4 hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			float4 lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			float4 hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 1] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 2] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			v0 = vertices[0];
			
 
				+			v1 = vertices[1];
			
 
				+			v2 = vertices[2];
			
 
				+			v3 = vertices[3];
			
 
				+			vertices += 4;
			
 
				+
			
 
				+			lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+			hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+			lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+			hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+			lo0 = lo0 * vLo;
			
 
				+			lo1 = lo1 * vLo;
			
 
				+			z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+			x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+			y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			z = z * vHi;
			
 
				+			x = x + y;
			
 
				+			x = x + z;
			
 
				+			stack_array[index + 3] = x;
			
 
				+			min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+
			
 
				+			// It is too costly to keep the index of the min here. We will look for it again later.  We save a lot of work this way.
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	size_t localCount = (count & -4L) - 4 * index;
			
 
				+	if (localCount)
			
 
				+	{
			
 
				+#ifdef __APPLE__
			
 
				+		vertices += localCount;  // counter the offset
			
 
				+		float4 t0, t1, t2, t3, t4;
			
 
				+		size_t byteIndex = -(localCount) * sizeof(float);
			
 
				+		float4 *sap = &stack_array[index + localCount / 4];
			
 
				+
			
 
				+		asm volatile(
			
 
				+			".align 4                                                                   \n\
			
 
				+             0: movaps  %[min], %[t2]                            // move min out of the way to avoid propagating NaNs in min \n\
			
 
				+             movaps  (%[vertices], %[byteIndex], 4),    %[t0]    // vertices[0]      \n\
			
 
				+             movaps  16(%[vertices], %[byteIndex], 4),  %[t1]    // vertices[1]      \n\
			
 
				+             movaps  %[t0], %[min]                               // vertices[0]      \n\
			
 
				+             movlhps %[t1], %[min]                               // x0y0x1y1         \n\
			
 
				+             movaps  32(%[vertices], %[byteIndex], 4),  %[t3]    // vertices[2]      \n\
			
 
				+             movaps  48(%[vertices], %[byteIndex], 4),  %[t4]    // vertices[3]      \n\
			
 
				+             mulps   %[vLo], %[min]                              // x0y0x1y1 * vLo   \n\
			
 
				+             movhlps %[t0], %[t1]                                // z0w0z1w1         \n\
			
 
				+             movaps  %[t3], %[t0]                                // vertices[2]      \n\
			
 
				+             movlhps %[t4], %[t0]                                // x2y2x3y3         \n\
			
 
				+             movhlps %[t3], %[t4]                                // z2w2z3w3         \n\
			
 
				+             mulps   %[vLo], %[t0]                               // x2y2x3y3 * vLo   \n\
			
 
				+             shufps  $0x88, %[t4], %[t1]                         // z0z1z2z3         \n\
			
 
				+             mulps   %[vHi], %[t1]                               // z0z1z2z3 * vHi   \n\
			
 
				+             movaps  %[min], %[t3]                               // x0y0x1y1 * vLo   \n\
			
 
				+             shufps  $0x88, %[t0], %[min]                        // x0x1x2x3 * vLo.x \n\
			
 
				+             shufps  $0xdd, %[t0], %[t3]                         // y0y1y2y3 * vLo.y \n\
			
 
				+             addps   %[t3], %[min]                               // x + y            \n\
			
 
				+             addps   %[t1], %[min]                               // x + y + z        \n\
			
 
				+             movaps  %[min], (%[sap], %[byteIndex])              // record result for later scrutiny \n\
			
 
				+             minps   %[t2], %[min]                               // record min, restore min   \n\
			
 
				+             add     $16, %[byteIndex]                           // advance loop counter\n\
			
 
				+             jnz     0b                                          \n\
			
 
				+             "
			
 
				+			: [min] "+x"(min), [t0] "=&x"(t0), [t1] "=&x"(t1), [t2] "=&x"(t2), [t3] "=&x"(t3), [t4] "=&x"(t4), [byteIndex] "+r"(byteIndex)
			
 
				+			: [vLo] "x"(vLo), [vHi] "x"(vHi), [vertices] "r"(vertices), [sap] "r"(sap)
			
 
				+			: "memory", "cc");
			
 
				+		index += localCount / 4;
			
 
				+#else
			
 
				+		{
			
 
				+			for (unsigned int i = 0; i < localCount / 4; i++, index++)
			
 
				+			{  // do four dot products at a time. Carefully avoid touching the w element.
			
 
				+				float4 v0 = vertices[0];
			
 
				+				float4 v1 = vertices[1];
			
 
				+				float4 v2 = vertices[2];
			
 
				+				float4 v3 = vertices[3];
			
 
				+				vertices += 4;
			
 
				+
			
 
				+				float4 lo0 = _mm_movelh_ps(v0, v1);  // x0y0x1y1
			
 
				+				float4 hi0 = _mm_movehl_ps(v1, v0);  // z0?0z1?1
			
 
				+				float4 lo1 = _mm_movelh_ps(v2, v3);  // x2y2x3y3
			
 
				+				float4 hi1 = _mm_movehl_ps(v3, v2);  // z2?2z3?3
			
 
				+
			
 
				+				lo0 = lo0 * vLo;
			
 
				+				lo1 = lo1 * vLo;
			
 
				+				float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
			
 
				+				float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+				float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+				z = z * vHi;
			
 
				+				x = x + y;
			
 
				+				x = x + z;
			
 
				+				stack_array[index] = x;
			
 
				+				min = _mm_min_ps(x, min);  // control the order here so that max is never NaN even if x is nan
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	// process the last few points
			
 
				+	if (count & 3)
			
 
				+	{
			
 
				+		float4 v0, v1, v2, x, y, z;
			
 
				+		switch (count & 3)
			
 
				+		{
			
 
				+			case 3:
			
 
				+			{
			
 
				+				v0 = vertices[0];
			
 
				+				v1 = vertices[1];
			
 
				+				v2 = vertices[2];
			
 
				+
			
 
				+				// Calculate 3 dot products, transpose, duplicate v2
			
 
				+				float4 lo0 = _mm_movelh_ps(v0, v1);  // xyxy.lo
			
 
				+				float4 hi0 = _mm_movehl_ps(v1, v0);  // z?z?.lo
			
 
				+				lo0 = lo0 * vLo;
			
 
				+				z = _mm_shuffle_ps(hi0, v2, 0xa8);  // z0z1z2z2
			
 
				+				z = z * vHi;
			
 
				+				float4 lo1 = _mm_movelh_ps(v2, v2);  // xyxy
			
 
				+				lo1 = lo1 * vLo;
			
 
				+				x = _mm_shuffle_ps(lo0, lo1, 0x88);
			
 
				+				y = _mm_shuffle_ps(lo0, lo1, 0xdd);
			
 
				+			}
			
 
				+			break;
			
 
				+			case 2:
			
 
				+			{
			
 
				+				v0 = vertices[0];
			
 
				+				v1 = vertices[1];
			
 
				+				float4 xy = _mm_movelh_ps(v0, v1);
			
 
				+				z = _mm_movehl_ps(v1, v0);
			
 
				+				xy = xy * vLo;
			
 
				+				z = _mm_shuffle_ps(z, z, 0xa8);
			
 
				+				x = _mm_shuffle_ps(xy, xy, 0xa8);
			
 
				+				y = _mm_shuffle_ps(xy, xy, 0xfd);
			
 
				+				z = z * vHi;
			
 
				+			}
			
 
				+			break;
			
 
				+			case 1:
			
 
				+			{
			
 
				+				float4 xy = vertices[0];
			
 
				+				z = _mm_shuffle_ps(xy, xy, 0xaa);
			
 
				+				xy = xy * vLo;
			
 
				+				z = z * vHi;
			
 
				+				x = _mm_shuffle_ps(xy, xy, 0);
			
 
				+				y = _mm_shuffle_ps(xy, xy, 0x55);
			
 
				+			}
			
 
				+			break;
			
 
				+		}
			
 
				+		x = x + y;
			
 
				+		x = x + z;
			
 
				+		stack_array[index] = x;
			
 
				+		min = _mm_min_ps(x, min);  // control the order here so that min is never NaN even if x is nan
			
 
				+		index++;
			
 
				+	}
			
 
				+
			
 
				+	// if we found a new min.
			
 
				+	if (0 == segment || 0xf != _mm_movemask_ps((float4)_mm_cmpeq_ps(min, dotmin)))
			
 
				+	{  // we found a new min. Search for it
			
 
				+		// find min across the min vector, place in all elements of min -- big latency hit here
			
 
				+		min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0x4e));
			
 
				+		min = _mm_min_ps(min, (float4)_mm_shuffle_ps(min, min, 0xb1));
			
 
				+
			
 
				+		// It is slightly faster to do this part in scalar code when count < 8. However, the common case for
			
 
				+		// this where it actually makes a difference is handled in the early out at the top of the function,
			
 
				+		// so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced
			
 
				+		// complexity, and removed it.
			
 
				+
			
 
				+		dotmin = min;
			
 
				+
			
 
				+		// scan for the first occurence of min in the array
			
 
				+		size_t test;
			
 
				+		for (index = 0; 0 == (test = _mm_movemask_ps(_mm_cmpeq_ps(stack_array[index], min))); index++)  // local_count must be a multiple of 4
			
 
				+		{
			
 
				+		}
			
 
				+		minIndex = 4 * index + segment + indexTable[test];
			
 
				+	}
			
 
				+
			
 
				+	_mm_store_ss(dotResult, dotmin);
			
 
				+	return minIndex;
			
 
				+}
			
 
				+
			
 
				+#elif defined B3_USE_NEON
			
 
				+#define ARM_NEON_GCC_COMPATIBILITY 1
			
 
				+#include <arm_neon.h>
			
 
				+
			
 
				+static long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+static long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+static long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+static long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult);
			
 
				+
			
 
				+long (*b3_maxdot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_maxdot_large_sel;
			
 
				+long (*b3_mindot_large)(const float *vv, const float *vec, unsigned long count, float *dotResult) = b3_mindot_large_sel;
			
 
				+
			
 
				+extern "C"
			
 
				+{
			
 
				+	int _get_cpu_capabilities(void);
			
 
				+}
			
 
				+
			
 
				+static long b3_maxdot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	if (_get_cpu_capabilities() & 0x2000)
			
 
				+		b3_maxdot_large = _maxdot_large_v1;
			
 
				+	else
			
 
				+		b3_maxdot_large = _maxdot_large_v0;
			
 
				+
			
 
				+	return b3_maxdot_large(vv, vec, count, dotResult);
			
 
				+}
			
 
				+
			
 
				+static long b3_mindot_large_sel(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	if (_get_cpu_capabilities() & 0x2000)
			
 
				+		b3_mindot_large = _mindot_large_v1;
			
 
				+	else
			
 
				+		b3_mindot_large = _mindot_large_v0;
			
 
				+
			
 
				+	return b3_mindot_large(vv, vec, count, dotResult);
			
 
				+}
			
 
				+
			
 
				+#define vld1q_f32_aligned_postincrement(_ptr) ({ float32x4_t _r; asm( "vld1.f32  {%0}, [%1, :128]!\n" : "=w" (_r), "+r" (_ptr) ); /*return*/ _r; })
			
 
				+
			
 
				+long b3_maxdot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	unsigned long i = 0;
			
 
				+	float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
			
 
				+	float32x2_t vLo = vget_low_f32(vvec);
			
 
				+	float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0);
			
 
				+	float32x2_t dotMaxLo = (float32x2_t){-B3_INFINITY, -B3_INFINITY};
			
 
				+	float32x2_t dotMaxHi = (float32x2_t){-B3_INFINITY, -B3_INFINITY};
			
 
				+	uint32x2_t indexLo = (uint32x2_t){0, 1};
			
 
				+	uint32x2_t indexHi = (uint32x2_t){2, 3};
			
 
				+	uint32x2_t iLo = (uint32x2_t){-1, -1};
			
 
				+	uint32x2_t iHi = (uint32x2_t){-1, -1};
			
 
				+	const uint32x2_t four = (uint32x2_t){4, 4};
			
 
				+
			
 
				+	for (; i + 8 <= count; i += 8)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+		float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+		float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+		float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
			
 
				+
			
 
				+		float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+		float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+		float32x2_t zHi = vmul_f32(z1.val[0], vHi);
			
 
				+
			
 
				+		float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+		float32x2_t rHi = vpadd_f32(xy2, xy3);
			
 
				+		rLo = vadd_f32(rLo, zLo);
			
 
				+		rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+		uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
			
 
				+		uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi);
			
 
				+		dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
			
 
				+		dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
			
 
				+		iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		indexLo = vadd_u32(indexLo, four);
			
 
				+		indexHi = vadd_u32(indexHi, four);
			
 
				+
			
 
				+		v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+		xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+		xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+		xy3 = vmul_f32(vget_low_f32(v3), vLo);
			
 
				+
			
 
				+		z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+		zLo = vmul_f32(z0.val[0], vHi);
			
 
				+		zHi = vmul_f32(z1.val[0], vHi);
			
 
				+
			
 
				+		rLo = vpadd_f32(xy0, xy1);
			
 
				+		rHi = vpadd_f32(xy2, xy3);
			
 
				+		rLo = vadd_f32(rLo, zLo);
			
 
				+		rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+		maskLo = vcgt_f32(rLo, dotMaxLo);
			
 
				+		maskHi = vcgt_f32(rHi, dotMaxHi);
			
 
				+		dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
			
 
				+		dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
			
 
				+		iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		indexLo = vadd_u32(indexLo, four);
			
 
				+		indexHi = vadd_u32(indexHi, four);
			
 
				+	}
			
 
				+
			
 
				+	for (; i + 4 <= count; i += 4)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+		float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+		float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+		float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
			
 
				+
			
 
				+		float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+		float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+		float32x2_t zHi = vmul_f32(z1.val[0], vHi);
			
 
				+
			
 
				+		float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+		float32x2_t rHi = vpadd_f32(xy2, xy3);
			
 
				+		rLo = vadd_f32(rLo, zLo);
			
 
				+		rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+		uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
			
 
				+		uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi);
			
 
				+		dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
			
 
				+		dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
			
 
				+		iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		indexLo = vadd_u32(indexLo, four);
			
 
				+		indexHi = vadd_u32(indexHi, four);
			
 
				+	}
			
 
				+
			
 
				+	switch (count & 3)
			
 
				+	{
			
 
				+		case 3:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+			float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+			float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+
			
 
				+			float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+			float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+			float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi);
			
 
				+
			
 
				+			float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+			float32x2_t rHi = vpadd_f32(xy2, xy2);
			
 
				+			rLo = vadd_f32(rLo, zLo);
			
 
				+			rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+			uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
			
 
				+			uint32x2_t maskHi = vcgt_f32(rHi, dotMaxHi);
			
 
				+			dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
			
 
				+			dotMaxHi = vbsl_f32(maskHi, rHi, dotMaxHi);
			
 
				+			iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+			iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		}
			
 
				+		break;
			
 
				+		case 2:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+			float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+
			
 
				+			float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+			float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+
			
 
				+			float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+			rLo = vadd_f32(rLo, zLo);
			
 
				+
			
 
				+			uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
			
 
				+			dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
			
 
				+			iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		}
			
 
				+		break;
			
 
				+		case 1:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+			float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0);
			
 
				+			float32x2_t zLo = vmul_f32(z0, vHi);
			
 
				+			float32x2_t rLo = vpadd_f32(xy0, xy0);
			
 
				+			rLo = vadd_f32(rLo, zLo);
			
 
				+			uint32x2_t maskLo = vcgt_f32(rLo, dotMaxLo);
			
 
				+			dotMaxLo = vbsl_f32(maskLo, rLo, dotMaxLo);
			
 
				+			iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		default:
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	// select best answer between hi and lo results
			
 
				+	uint32x2_t mask = vcgt_f32(dotMaxHi, dotMaxLo);
			
 
				+	dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo);
			
 
				+	iLo = vbsl_u32(mask, iHi, iLo);
			
 
				+
			
 
				+	// select best answer between even and odd results
			
 
				+	dotMaxHi = vdup_lane_f32(dotMaxLo, 1);
			
 
				+	iHi = vdup_lane_u32(iLo, 1);
			
 
				+	mask = vcgt_f32(dotMaxHi, dotMaxLo);
			
 
				+	dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo);
			
 
				+	iLo = vbsl_u32(mask, iHi, iLo);
			
 
				+
			
 
				+	*dotResult = vget_lane_f32(dotMaxLo, 0);
			
 
				+	return vget_lane_u32(iLo, 0);
			
 
				+}
			
 
				+
			
 
				+long b3_maxdot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
			
 
				+	float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec));
			
 
				+	float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
			
 
				+	const uint32x4_t four = (uint32x4_t){4, 4, 4, 4};
			
 
				+	uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3};
			
 
				+	uint32x4_t index = (uint32x4_t){-1, -1, -1, -1};
			
 
				+	float32x4_t maxDot = (float32x4_t){-B3_INFINITY, -B3_INFINITY, -B3_INFINITY, -B3_INFINITY};
			
 
				+
			
 
				+	unsigned long i = 0;
			
 
				+	for (; i + 8 <= count; i += 8)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+		float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+
			
 
				+		xy0 = vmulq_f32(xy0, vLo);
			
 
				+		xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+		float32x4x2_t zb = vuzpq_f32(z0, z1);
			
 
				+		float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+		float32x4x2_t xy = vuzpq_f32(xy0, xy1);
			
 
				+		float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+		x = vaddq_f32(x, z);
			
 
				+
			
 
				+		uint32x4_t mask = vcgtq_f32(x, maxDot);
			
 
				+		maxDot = vbslq_f32(mask, x, maxDot);
			
 
				+		index = vbslq_u32(mask, local_index, index);
			
 
				+		local_index = vaddq_u32(local_index, four);
			
 
				+
			
 
				+		v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+		xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+
			
 
				+		xy0 = vmulq_f32(xy0, vLo);
			
 
				+		xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+		zb = vuzpq_f32(z0, z1);
			
 
				+		z = vmulq_f32(zb.val[0], vHi);
			
 
				+		xy = vuzpq_f32(xy0, xy1);
			
 
				+		x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+		x = vaddq_f32(x, z);
			
 
				+
			
 
				+		mask = vcgtq_f32(x, maxDot);
			
 
				+		maxDot = vbslq_f32(mask, x, maxDot);
			
 
				+		index = vbslq_u32(mask, local_index, index);
			
 
				+		local_index = vaddq_u32(local_index, four);
			
 
				+	}
			
 
				+
			
 
				+	for (; i + 4 <= count; i += 4)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+		float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+
			
 
				+		xy0 = vmulq_f32(xy0, vLo);
			
 
				+		xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+		float32x4x2_t zb = vuzpq_f32(z0, z1);
			
 
				+		float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+		float32x4x2_t xy = vuzpq_f32(xy0, xy1);
			
 
				+		float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+		x = vaddq_f32(x, z);
			
 
				+
			
 
				+		uint32x4_t mask = vcgtq_f32(x, maxDot);
			
 
				+		maxDot = vbslq_f32(mask, x, maxDot);
			
 
				+		index = vbslq_u32(mask, local_index, index);
			
 
				+		local_index = vaddq_u32(local_index, four);
			
 
				+	}
			
 
				+
			
 
				+	switch (count & 3)
			
 
				+	{
			
 
				+		case 3:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+			float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2));
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+			float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2));
			
 
				+
			
 
				+			xy0 = vmulq_f32(xy0, vLo);
			
 
				+			xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+			float32x4x2_t zb = vuzpq_f32(z0, z1);
			
 
				+			float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+			float32x4x2_t xy = vuzpq_f32(xy0, xy1);
			
 
				+			float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+			x = vaddq_f32(x, z);
			
 
				+
			
 
				+			uint32x4_t mask = vcgtq_f32(x, maxDot);
			
 
				+			maxDot = vbslq_f32(mask, x, maxDot);
			
 
				+			index = vbslq_u32(mask, local_index, index);
			
 
				+			local_index = vaddq_u32(local_index, four);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		case 2:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+
			
 
				+			xy0 = vmulq_f32(xy0, vLo);
			
 
				+
			
 
				+			float32x4x2_t zb = vuzpq_f32(z0, z0);
			
 
				+			float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+			float32x4x2_t xy = vuzpq_f32(xy0, xy0);
			
 
				+			float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+			x = vaddq_f32(x, z);
			
 
				+
			
 
				+			uint32x4_t mask = vcgtq_f32(x, maxDot);
			
 
				+			maxDot = vbslq_f32(mask, x, maxDot);
			
 
				+			index = vbslq_u32(mask, local_index, index);
			
 
				+			local_index = vaddq_u32(local_index, four);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		case 1:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0));
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0);
			
 
				+
			
 
				+			xy0 = vmulq_f32(xy0, vLo);
			
 
				+
			
 
				+			z = vmulq_f32(z, vHi);
			
 
				+			float32x4x2_t xy = vuzpq_f32(xy0, xy0);
			
 
				+			float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+			x = vaddq_f32(x, z);
			
 
				+
			
 
				+			uint32x4_t mask = vcgtq_f32(x, maxDot);
			
 
				+			maxDot = vbslq_f32(mask, x, maxDot);
			
 
				+			index = vbslq_u32(mask, local_index, index);
			
 
				+			local_index = vaddq_u32(local_index, four);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		default:
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	// select best answer between hi and lo results
			
 
				+	uint32x2_t mask = vcgt_f32(vget_high_f32(maxDot), vget_low_f32(maxDot));
			
 
				+	float32x2_t maxDot2 = vbsl_f32(mask, vget_high_f32(maxDot), vget_low_f32(maxDot));
			
 
				+	uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index));
			
 
				+
			
 
				+	// select best answer between even and odd results
			
 
				+	float32x2_t maxDotO = vdup_lane_f32(maxDot2, 1);
			
 
				+	uint32x2_t indexHi = vdup_lane_u32(index2, 1);
			
 
				+	mask = vcgt_f32(maxDotO, maxDot2);
			
 
				+	maxDot2 = vbsl_f32(mask, maxDotO, maxDot2);
			
 
				+	index2 = vbsl_u32(mask, indexHi, index2);
			
 
				+
			
 
				+	*dotResult = vget_lane_f32(maxDot2, 0);
			
 
				+	return vget_lane_u32(index2, 0);
			
 
				+}
			
 
				+
			
 
				+long b3_mindot_large_v0(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	unsigned long i = 0;
			
 
				+	float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
			
 
				+	float32x2_t vLo = vget_low_f32(vvec);
			
 
				+	float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0);
			
 
				+	float32x2_t dotMinLo = (float32x2_t){B3_INFINITY, B3_INFINITY};
			
 
				+	float32x2_t dotMinHi = (float32x2_t){B3_INFINITY, B3_INFINITY};
			
 
				+	uint32x2_t indexLo = (uint32x2_t){0, 1};
			
 
				+	uint32x2_t indexHi = (uint32x2_t){2, 3};
			
 
				+	uint32x2_t iLo = (uint32x2_t){-1, -1};
			
 
				+	uint32x2_t iHi = (uint32x2_t){-1, -1};
			
 
				+	const uint32x2_t four = (uint32x2_t){4, 4};
			
 
				+
			
 
				+	for (; i + 8 <= count; i += 8)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+		float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+		float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+		float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
			
 
				+
			
 
				+		float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+		float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+		float32x2_t zHi = vmul_f32(z1.val[0], vHi);
			
 
				+
			
 
				+		float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+		float32x2_t rHi = vpadd_f32(xy2, xy3);
			
 
				+		rLo = vadd_f32(rLo, zLo);
			
 
				+		rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+		uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
			
 
				+		uint32x2_t maskHi = vclt_f32(rHi, dotMinHi);
			
 
				+		dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
			
 
				+		dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
			
 
				+		iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		indexLo = vadd_u32(indexLo, four);
			
 
				+		indexHi = vadd_u32(indexHi, four);
			
 
				+
			
 
				+		v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+		xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+		xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+		xy3 = vmul_f32(vget_low_f32(v3), vLo);
			
 
				+
			
 
				+		z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+		zLo = vmul_f32(z0.val[0], vHi);
			
 
				+		zHi = vmul_f32(z1.val[0], vHi);
			
 
				+
			
 
				+		rLo = vpadd_f32(xy0, xy1);
			
 
				+		rHi = vpadd_f32(xy2, xy3);
			
 
				+		rLo = vadd_f32(rLo, zLo);
			
 
				+		rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+		maskLo = vclt_f32(rLo, dotMinLo);
			
 
				+		maskHi = vclt_f32(rHi, dotMinHi);
			
 
				+		dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
			
 
				+		dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
			
 
				+		iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		indexLo = vadd_u32(indexLo, four);
			
 
				+		indexHi = vadd_u32(indexHi, four);
			
 
				+	}
			
 
				+
			
 
				+	for (; i + 4 <= count; i += 4)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+		float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+		float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+		float32x2_t xy3 = vmul_f32(vget_low_f32(v3), vLo);
			
 
				+
			
 
				+		float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x2x2_t z1 = vtrn_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+		float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+		float32x2_t zHi = vmul_f32(z1.val[0], vHi);
			
 
				+
			
 
				+		float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+		float32x2_t rHi = vpadd_f32(xy2, xy3);
			
 
				+		rLo = vadd_f32(rLo, zLo);
			
 
				+		rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+		uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
			
 
				+		uint32x2_t maskHi = vclt_f32(rHi, dotMinHi);
			
 
				+		dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
			
 
				+		dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
			
 
				+		iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		indexLo = vadd_u32(indexLo, four);
			
 
				+		indexHi = vadd_u32(indexHi, four);
			
 
				+	}
			
 
				+	switch (count & 3)
			
 
				+	{
			
 
				+		case 3:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+			float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+			float32x2_t xy2 = vmul_f32(vget_low_f32(v2), vLo);
			
 
				+
			
 
				+			float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+			float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+			float32x2_t zHi = vmul_f32(vdup_lane_f32(vget_high_f32(v2), 0), vHi);
			
 
				+
			
 
				+			float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+			float32x2_t rHi = vpadd_f32(xy2, xy2);
			
 
				+			rLo = vadd_f32(rLo, zLo);
			
 
				+			rHi = vadd_f32(rHi, zHi);
			
 
				+
			
 
				+			uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
			
 
				+			uint32x2_t maskHi = vclt_f32(rHi, dotMinHi);
			
 
				+			dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
			
 
				+			dotMinHi = vbsl_f32(maskHi, rHi, dotMinHi);
			
 
				+			iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+			iHi = vbsl_u32(maskHi, indexHi, iHi);
			
 
				+		}
			
 
				+		break;
			
 
				+		case 2:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+			float32x2_t xy1 = vmul_f32(vget_low_f32(v1), vLo);
			
 
				+
			
 
				+			float32x2x2_t z0 = vtrn_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+			float32x2_t zLo = vmul_f32(z0.val[0], vHi);
			
 
				+
			
 
				+			float32x2_t rLo = vpadd_f32(xy0, xy1);
			
 
				+			rLo = vadd_f32(rLo, zLo);
			
 
				+
			
 
				+			uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
			
 
				+			dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
			
 
				+			iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		}
			
 
				+		break;
			
 
				+		case 1:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x2_t xy0 = vmul_f32(vget_low_f32(v0), vLo);
			
 
				+			float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0);
			
 
				+			float32x2_t zLo = vmul_f32(z0, vHi);
			
 
				+			float32x2_t rLo = vpadd_f32(xy0, xy0);
			
 
				+			rLo = vadd_f32(rLo, zLo);
			
 
				+			uint32x2_t maskLo = vclt_f32(rLo, dotMinLo);
			
 
				+			dotMinLo = vbsl_f32(maskLo, rLo, dotMinLo);
			
 
				+			iLo = vbsl_u32(maskLo, indexLo, iLo);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		default:
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	// select best answer between hi and lo results
			
 
				+	uint32x2_t mask = vclt_f32(dotMinHi, dotMinLo);
			
 
				+	dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo);
			
 
				+	iLo = vbsl_u32(mask, iHi, iLo);
			
 
				+
			
 
				+	// select best answer between even and odd results
			
 
				+	dotMinHi = vdup_lane_f32(dotMinLo, 1);
			
 
				+	iHi = vdup_lane_u32(iLo, 1);
			
 
				+	mask = vclt_f32(dotMinHi, dotMinLo);
			
 
				+	dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo);
			
 
				+	iLo = vbsl_u32(mask, iHi, iLo);
			
 
				+
			
 
				+	*dotResult = vget_lane_f32(dotMinLo, 0);
			
 
				+	return vget_lane_u32(iLo, 0);
			
 
				+}
			
 
				+
			
 
				+long b3_mindot_large_v1(const float *vv, const float *vec, unsigned long count, float *dotResult)
			
 
				+{
			
 
				+	float32x4_t vvec = vld1q_f32_aligned_postincrement(vec);
			
 
				+	float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec));
			
 
				+	float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
			
 
				+	const uint32x4_t four = (uint32x4_t){4, 4, 4, 4};
			
 
				+	uint32x4_t local_index = (uint32x4_t){0, 1, 2, 3};
			
 
				+	uint32x4_t index = (uint32x4_t){-1, -1, -1, -1};
			
 
				+	float32x4_t minDot = (float32x4_t){B3_INFINITY, B3_INFINITY, B3_INFINITY, B3_INFINITY};
			
 
				+
			
 
				+	unsigned long i = 0;
			
 
				+	for (; i + 8 <= count; i += 8)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+		float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+
			
 
				+		xy0 = vmulq_f32(xy0, vLo);
			
 
				+		xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+		float32x4x2_t zb = vuzpq_f32(z0, z1);
			
 
				+		float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+		float32x4x2_t xy = vuzpq_f32(xy0, xy1);
			
 
				+		float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+		x = vaddq_f32(x, z);
			
 
				+
			
 
				+		uint32x4_t mask = vcltq_f32(x, minDot);
			
 
				+		minDot = vbslq_f32(mask, x, minDot);
			
 
				+		index = vbslq_u32(mask, local_index, index);
			
 
				+		local_index = vaddq_u32(local_index, four);
			
 
				+
			
 
				+		v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+		xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+
			
 
				+		xy0 = vmulq_f32(xy0, vLo);
			
 
				+		xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+		zb = vuzpq_f32(z0, z1);
			
 
				+		z = vmulq_f32(zb.val[0], vHi);
			
 
				+		xy = vuzpq_f32(xy0, xy1);
			
 
				+		x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+		x = vaddq_f32(x, z);
			
 
				+
			
 
				+		mask = vcltq_f32(x, minDot);
			
 
				+		minDot = vbslq_f32(mask, x, minDot);
			
 
				+		index = vbslq_u32(mask, local_index, index);
			
 
				+		local_index = vaddq_u32(local_index, four);
			
 
				+	}
			
 
				+
			
 
				+	for (; i + 4 <= count; i += 4)
			
 
				+	{
			
 
				+		float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+		float32x4_t v3 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+		float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v3));
			
 
				+		// the next two lines should resolve to a single vswp d, d
			
 
				+		float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+		float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v3));
			
 
				+
			
 
				+		xy0 = vmulq_f32(xy0, vLo);
			
 
				+		xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+		float32x4x2_t zb = vuzpq_f32(z0, z1);
			
 
				+		float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+		float32x4x2_t xy = vuzpq_f32(xy0, xy1);
			
 
				+		float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+		x = vaddq_f32(x, z);
			
 
				+
			
 
				+		uint32x4_t mask = vcltq_f32(x, minDot);
			
 
				+		minDot = vbslq_f32(mask, x, minDot);
			
 
				+		index = vbslq_u32(mask, local_index, index);
			
 
				+		local_index = vaddq_u32(local_index, four);
			
 
				+	}
			
 
				+
			
 
				+	switch (count & 3)
			
 
				+	{
			
 
				+		case 3:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v2 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+			float32x4_t xy1 = vcombine_f32(vget_low_f32(v2), vget_low_f32(v2));
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+			float32x4_t z1 = vcombine_f32(vget_high_f32(v2), vget_high_f32(v2));
			
 
				+
			
 
				+			xy0 = vmulq_f32(xy0, vLo);
			
 
				+			xy1 = vmulq_f32(xy1, vLo);
			
 
				+
			
 
				+			float32x4x2_t zb = vuzpq_f32(z0, z1);
			
 
				+			float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+			float32x4x2_t xy = vuzpq_f32(xy0, xy1);
			
 
				+			float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+			x = vaddq_f32(x, z);
			
 
				+
			
 
				+			uint32x4_t mask = vcltq_f32(x, minDot);
			
 
				+			minDot = vbslq_f32(mask, x, minDot);
			
 
				+			index = vbslq_u32(mask, local_index, index);
			
 
				+			local_index = vaddq_u32(local_index, four);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		case 2:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+			float32x4_t v1 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v1));
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t z0 = vcombine_f32(vget_high_f32(v0), vget_high_f32(v1));
			
 
				+
			
 
				+			xy0 = vmulq_f32(xy0, vLo);
			
 
				+
			
 
				+			float32x4x2_t zb = vuzpq_f32(z0, z0);
			
 
				+			float32x4_t z = vmulq_f32(zb.val[0], vHi);
			
 
				+			float32x4x2_t xy = vuzpq_f32(xy0, xy0);
			
 
				+			float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+			x = vaddq_f32(x, z);
			
 
				+
			
 
				+			uint32x4_t mask = vcltq_f32(x, minDot);
			
 
				+			minDot = vbslq_f32(mask, x, minDot);
			
 
				+			index = vbslq_u32(mask, local_index, index);
			
 
				+			local_index = vaddq_u32(local_index, four);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		case 1:
			
 
				+		{
			
 
				+			float32x4_t v0 = vld1q_f32_aligned_postincrement(vv);
			
 
				+
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t xy0 = vcombine_f32(vget_low_f32(v0), vget_low_f32(v0));
			
 
				+			// the next two lines should resolve to a single vswp d, d
			
 
				+			float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0);
			
 
				+
			
 
				+			xy0 = vmulq_f32(xy0, vLo);
			
 
				+
			
 
				+			z = vmulq_f32(z, vHi);
			
 
				+			float32x4x2_t xy = vuzpq_f32(xy0, xy0);
			
 
				+			float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
			
 
				+			x = vaddq_f32(x, z);
			
 
				+
			
 
				+			uint32x4_t mask = vcltq_f32(x, minDot);
			
 
				+			minDot = vbslq_f32(mask, x, minDot);
			
 
				+			index = vbslq_u32(mask, local_index, index);
			
 
				+			local_index = vaddq_u32(local_index, four);
			
 
				+		}
			
 
				+		break;
			
 
				+
			
 
				+		default:
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	// select best answer between hi and lo results
			
 
				+	uint32x2_t mask = vclt_f32(vget_high_f32(minDot), vget_low_f32(minDot));
			
 
				+	float32x2_t minDot2 = vbsl_f32(mask, vget_high_f32(minDot), vget_low_f32(minDot));
			
 
				+	uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index));
			
 
				+
			
 
				+	// select best answer between even and odd results
			
 
				+	float32x2_t minDotO = vdup_lane_f32(minDot2, 1);
			
 
				+	uint32x2_t indexHi = vdup_lane_u32(index2, 1);
			
 
				+	mask = vclt_f32(minDotO, minDot2);
			
 
				+	minDot2 = vbsl_f32(mask, minDotO, minDot2);
			
 
				+	index2 = vbsl_u32(mask, indexHi, index2);
			
 
				+
			
 
				+	*dotResult = vget_lane_f32(minDot2, 0);
			
 
				+	return vget_lane_u32(index2, 0);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+#error Unhandled __APPLE__ arch
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __APPLE__ */
			
--- a/Dependencies/include/bullet3/Bullet3Common/b3Vector3.h
+++ b/Dependencies/include/bullet3/Bullet3Common/b3Vector3.h
@@ -0,0 +1,1303 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_VECTOR3_H
			
 
				+#define B3_VECTOR3_H
			
 
				+
			
 
				+//#include <stdint.h>
			
 
				+#include "b3Scalar.h"
			
 
				+#include "b3MinMax.h"
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3Vector3Data b3Vector3DoubleData
			
 
				+#define b3Vector3DataName "b3Vector3DoubleData"
			
 
				+#else
			
 
				+#define b3Vector3Data b3Vector3FloatData
			
 
				+#define b3Vector3DataName "b3Vector3FloatData"
			
 
				+#endif  //B3_USE_DOUBLE_PRECISION
			
 
				+
			
 
				+#if defined B3_USE_SSE
			
 
				+
			
 
				+//typedef  uint32_t __m128i __attribute__ ((vector_size(16)));
			
 
				+
			
 
				+#ifdef _MSC_VER
			
 
				+#pragma warning(disable : 4556)  // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
			
 
				+#endif
			
 
				+
			
 
				+#define B3_SHUFFLE(x, y, z, w) (((w) << 6 | (z) << 4 | (y) << 2 | (x)) & 0xff)
			
 
				+//#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
			
 
				+#define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask))
			
 
				+#define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3))
			
 
				+#define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i))
			
 
				+
			
 
				+#define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
			
 
				+#define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
			
 
				+#define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
			
 
				+#define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
			
 
				+#define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
			
 
				+#define b3vxyzMaskf b3vFFF0fMask
			
 
				+#define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
			
 
				+
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifdef B3_USE_NEON
			
 
				+
			
 
				+const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
			
 
				+const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
			
 
				+const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
			
 
				+const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+class b3Vector3;
			
 
				+class b3Vector4;
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+//#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
			
 
				+inline b3Vector3 b3MakeVector3(b3SimdFloat4 v);
			
 
				+inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec);
			
 
				+#endif
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z);
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
			
 
				+inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w);
			
 
				+
			
 
				+/**@brief b3Vector3 can be used to represent 3D points and vectors.
			
 
				+ * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
			
 
				+ * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
			
 
				+ */
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3Vector3
			
 
				+{
			
 
				+public:
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)  // _WIN32 || ARM
			
 
				+	union {
			
 
				+		b3SimdFloat4 mVec128;
			
 
				+		float m_floats[4];
			
 
				+		struct
			
 
				+		{
			
 
				+			float x, y, z, w;
			
 
				+		};
			
 
				+	};
			
 
				+#else
			
 
				+	union {
			
 
				+		float m_floats[4];
			
 
				+		struct
			
 
				+		{
			
 
				+			float x, y, z, w;
			
 
				+		};
			
 
				+	};
			
 
				+#endif
			
 
				+
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)  // _WIN32 || ARM
			
 
				+
			
 
				+	/*B3_FORCE_INLINE		b3Vector3()
			
 
				+	{
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+	B3_FORCE_INLINE b3SimdFloat4 get128() const
			
 
				+	{
			
 
				+		return mVec128;
			
 
				+	}
			
 
				+	B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
			
 
				+	{
			
 
				+		mVec128 = v128;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+public:
			
 
				+	/**@brief Add a vector to this one
			
 
				+ * @param The vector to add to this one */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_add_ps(mVec128, v.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vaddq_f32(mVec128, v.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] += v.m_floats[0];
			
 
				+		m_floats[1] += v.m_floats[1];
			
 
				+		m_floats[2] += v.m_floats[2];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Subtract a vector from this one
			
 
				+   * @param The vector to subtract */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_sub_ps(mVec128, v.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vsubq_f32(mVec128, v.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] -= v.m_floats[0];
			
 
				+		m_floats[1] -= v.m_floats[1];
			
 
				+		m_floats[2] -= v.m_floats[2];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Scale the vector
			
 
				+   * @param s Scale factor */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0x80);  //	(S S S 0.0)
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vs);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmulq_n_f32(mVec128, s);
			
 
				+#else
			
 
				+		m_floats[0] *= s;
			
 
				+		m_floats[1] *= s;
			
 
				+		m_floats[2] *= s;
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Inversely scale the vector
			
 
				+   * @param s Scale factor to divide by */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s)
			
 
				+	{
			
 
				+		b3FullAssert(s != b3Scalar(0.0));
			
 
				+
			
 
				+#if 0  //defined(B3_USE_SSE_IN_API)
			
 
				+// this code is not faster !
			
 
				+		__m128 vs = _mm_load_ss(&s);
			
 
				+		vs = _mm_div_ss(b3v1110, vs);
			
 
				+		vs = b3_pshufd_ps(vs, 0x00);	//	(S S S S)
			
 
				+
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vs);
			
 
				+
			
 
				+		return *this;
			
 
				+#else
			
 
				+		return *this *= b3Scalar(1.0) / s;
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the dot product
			
 
				+   * @param v The other vector in the dot product */
			
 
				+	B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
			
 
				+		__m128 z = _mm_movehl_ps(vd, vd);
			
 
				+		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, y);
			
 
				+		vd = _mm_add_ss(vd, z);
			
 
				+		return _mm_cvtss_f32(vd);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
			
 
				+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
			
 
				+		x = vadd_f32(x, vget_high_f32(vd));
			
 
				+		return vget_lane_f32(x, 0);
			
 
				+#else
			
 
				+		return m_floats[0] * v.m_floats[0] +
			
 
				+			   m_floats[1] * v.m_floats[1] +
			
 
				+			   m_floats[2] * v.m_floats[2];
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the length of the vector squared */
			
 
				+	B3_FORCE_INLINE b3Scalar length2() const
			
 
				+	{
			
 
				+		return dot(*this);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the length of the vector */
			
 
				+	B3_FORCE_INLINE b3Scalar length() const
			
 
				+	{
			
 
				+		return b3Sqrt(length2());
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the distance squared between the ends of this and another vector
			
 
				+   * This is symantically treating the vector like a point */
			
 
				+	B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
			
 
				+
			
 
				+	/**@brief Return the distance between the ends of this and another vector
			
 
				+   * This is symantically treating the vector like a point */
			
 
				+	B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Vector3& safeNormalize()
			
 
				+	{
			
 
				+		b3Scalar l2 = length2();
			
 
				+		//triNormal.normalize();
			
 
				+		if (l2 >= B3_EPSILON * B3_EPSILON)
			
 
				+		{
			
 
				+			(*this) /= b3Sqrt(l2);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			setValue(1, 0, 0);
			
 
				+		}
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Normalize this vector
			
 
				+   * x^2 + y^2 + z^2 = 1 */
			
 
				+	B3_FORCE_INLINE b3Vector3& normalize()
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		// dot product first
			
 
				+		__m128 vd = _mm_mul_ps(mVec128, mVec128);
			
 
				+		__m128 z = _mm_movehl_ps(vd, vd);
			
 
				+		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, y);
			
 
				+		vd = _mm_add_ss(vd, z);
			
 
				+
			
 
				+#if 0
			
 
				+        vd = _mm_sqrt_ss(vd);
			
 
				+		vd = _mm_div_ss(b3v1110, vd);
			
 
				+		vd = b3_splat_ps(vd, 0x80);
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vd);
			
 
				+#else
			
 
				+
			
 
				+		// NR step 1/sqrt(x) - vd is x, y is output
			
 
				+		y = _mm_rsqrt_ss(vd);  // estimate
			
 
				+
			
 
				+		//  one step NR
			
 
				+		z = b3v1_5;
			
 
				+		vd = _mm_mul_ss(vd, b3vHalf);  // vd * 0.5
			
 
				+		//x2 = vd;
			
 
				+		vd = _mm_mul_ss(vd, y);  // vd * 0.5 * y0
			
 
				+		vd = _mm_mul_ss(vd, y);  // vd * 0.5 * y0 * y0
			
 
				+		z = _mm_sub_ss(z, vd);   // 1.5 - vd * 0.5 * y0 * y0
			
 
				+
			
 
				+		y = _mm_mul_ss(y, z);  // y0 * (1.5 - vd * 0.5 * y0 * y0)
			
 
				+
			
 
				+		y = b3_splat_ps(y, 0x80);
			
 
				+		mVec128 = _mm_mul_ps(mVec128, y);
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+		return *this;
			
 
				+#else
			
 
				+		return *this /= length();
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return a normalized version of this vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 normalized() const;
			
 
				+
			
 
				+	/**@brief Return a rotated version of this vector
			
 
				+   * @param wAxis The axis to rotate about
			
 
				+   * @param angle The angle to rotate by */
			
 
				+	B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const;
			
 
				+
			
 
				+	/**@brief Return the angle between this and another vector
			
 
				+   * @param v The other vector */
			
 
				+	B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const
			
 
				+	{
			
 
				+		b3Scalar s = b3Sqrt(length2() * v.length2());
			
 
				+		b3FullAssert(s != b3Scalar(0.0));
			
 
				+		return b3Acos(dot(v) / s);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return a vector will the absolute values of each element */
			
 
				+	B3_FORCE_INLINE b3Vector3 absolute() const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Vector3(vabsq_f32(mVec128));
			
 
				+#else
			
 
				+		return b3MakeVector3(
			
 
				+			b3Fabs(m_floats[0]),
			
 
				+			b3Fabs(m_floats[1]),
			
 
				+			b3Fabs(m_floats[2]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the cross product between this and another vector
			
 
				+   * @param v The other vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 T, V;
			
 
				+
			
 
				+		T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3));    //	(Y Z X 0)
			
 
				+		V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3));  //	(Y Z X 0)
			
 
				+
			
 
				+		V = _mm_mul_ps(V, mVec128);
			
 
				+		T = _mm_mul_ps(T, v.mVec128);
			
 
				+		V = _mm_sub_ps(V, T);
			
 
				+
			
 
				+		V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
			
 
				+		return b3MakeVector3(V);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t T, V;
			
 
				+		// form (Y, Z, X, _) of mVec128 and v.mVec128
			
 
				+		float32x2_t Tlow = vget_low_f32(mVec128);
			
 
				+		float32x2_t Vlow = vget_low_f32(v.mVec128);
			
 
				+		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
			
 
				+
			
 
				+		V = vmulq_f32(V, mVec128);
			
 
				+		T = vmulq_f32(T, v.mVec128);
			
 
				+		V = vsubq_f32(V, T);
			
 
				+		Vlow = vget_low_f32(V);
			
 
				+		// form (Y, Z, X, _);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
			
 
				+		V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
			
 
				+
			
 
				+		return b3Vector3(V);
			
 
				+#else
			
 
				+		return b3MakeVector3(
			
 
				+			m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
			
 
				+			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
			
 
				+			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		// cross:
			
 
				+		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3));  //	(Y Z X 0)
			
 
				+		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3));  //	(Y Z X 0)
			
 
				+
			
 
				+		V = _mm_mul_ps(V, v1.mVec128);
			
 
				+		T = _mm_mul_ps(T, v2.mVec128);
			
 
				+		V = _mm_sub_ps(V, T);
			
 
				+
			
 
				+		V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
			
 
				+
			
 
				+		// dot:
			
 
				+		V = _mm_mul_ps(V, mVec128);
			
 
				+		__m128 z = _mm_movehl_ps(V, V);
			
 
				+		__m128 y = _mm_shuffle_ps(V, V, 0x55);
			
 
				+		V = _mm_add_ss(V, y);
			
 
				+		V = _mm_add_ss(V, z);
			
 
				+		return _mm_cvtss_f32(V);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		// cross:
			
 
				+		float32x4_t T, V;
			
 
				+		// form (Y, Z, X, _) of mVec128 and v.mVec128
			
 
				+		float32x2_t Tlow = vget_low_f32(v1.mVec128);
			
 
				+		float32x2_t Vlow = vget_low_f32(v2.mVec128);
			
 
				+		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
			
 
				+
			
 
				+		V = vmulq_f32(V, v1.mVec128);
			
 
				+		T = vmulq_f32(T, v2.mVec128);
			
 
				+		V = vsubq_f32(V, T);
			
 
				+		Vlow = vget_low_f32(V);
			
 
				+		// form (Y, Z, X, _);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
			
 
				+
			
 
				+		// dot:
			
 
				+		V = vmulq_f32(mVec128, V);
			
 
				+		float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
			
 
				+		x = vadd_f32(x, vget_high_f32(V));
			
 
				+		return vget_lane_f32(x, 0);
			
 
				+#else
			
 
				+		return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
			
 
				+			   m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
			
 
				+			   m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the axis with the smallest value
			
 
				+   * Note return values are 0,1,2 for x, y, or z */
			
 
				+	B3_FORCE_INLINE int minAxis() const
			
 
				+	{
			
 
				+		return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the axis with the largest value
			
 
				+   * Note return values are 0,1,2 for x, y, or z */
			
 
				+	B3_FORCE_INLINE int maxAxis() const
			
 
				+	{
			
 
				+		return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE int furthestAxis() const
			
 
				+	{
			
 
				+		return absolute().minAxis();
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE int closestAxis() const
			
 
				+	{
			
 
				+		return absolute().maxAxis();
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vrt = _mm_load_ss(&rt);  //	(rt 0 0 0)
			
 
				+		b3Scalar s = b3Scalar(1.0) - rt;
			
 
				+		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0x80);  //	(S S S 0.0)
			
 
				+		__m128 r0 = _mm_mul_ps(v0.mVec128, vs);
			
 
				+		vrt = b3_pshufd_ps(vrt, 0x80);  //	(rt rt rt 0.0)
			
 
				+		__m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
			
 
				+		__m128 tmp3 = _mm_add_ps(r0, r1);
			
 
				+		mVec128 = tmp3;
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
			
 
				+		vl = vmulq_n_f32(vl, rt);
			
 
				+		mVec128 = vaddq_f32(vl, v0.mVec128);
			
 
				+#else
			
 
				+		b3Scalar s = b3Scalar(1.0) - rt;
			
 
				+		m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
			
 
				+		m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
			
 
				+		m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
			
 
				+		//don't do the unused w component
			
 
				+		//		m_co[3] = s * v0[3] + rt * v1[3];
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the linear interpolation between this and another vector
			
 
				+   * @param v The other vector
			
 
				+   * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
			
 
				+	B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		__m128 vt = _mm_load_ss(&t);  //	(t 0 0 0)
			
 
				+		vt = b3_pshufd_ps(vt, 0x80);  //	(rt rt rt 0.0)
			
 
				+		__m128 vl = _mm_sub_ps(v.mVec128, mVec128);
			
 
				+		vl = _mm_mul_ps(vl, vt);
			
 
				+		vl = _mm_add_ps(vl, mVec128);
			
 
				+
			
 
				+		return b3MakeVector3(vl);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
			
 
				+		vl = vmulq_n_f32(vl, t);
			
 
				+		vl = vaddq_f32(vl, mVec128);
			
 
				+
			
 
				+		return b3Vector3(vl);
			
 
				+#else
			
 
				+		return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
			
 
				+							 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
			
 
				+							 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Elementwise multiply this vector by the other
			
 
				+   * @param v The other vector */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_mul_ps(mVec128, v.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmulq_f32(mVec128, v.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] *= v.m_floats[0];
			
 
				+		m_floats[1] *= v.m_floats[1];
			
 
				+		m_floats[2] *= v.m_floats[2];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the x value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
			
 
				+	/**@brief Return the y value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
			
 
				+	/**@brief Return the z value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
			
 
				+	/**@brief Return the w value */
			
 
				+	B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
			
 
				+
			
 
				+	/**@brief Set the x value */
			
 
				+	B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; };
			
 
				+	/**@brief Set the y value */
			
 
				+	B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; };
			
 
				+	/**@brief Set the z value */
			
 
				+	B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; };
			
 
				+	/**@brief Set the w value */
			
 
				+	B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; };
			
 
				+
			
 
				+	//B3_FORCE_INLINE b3Scalar&       operator[](int i)       { return (&m_floats[0])[i];	}
			
 
				+	//B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
			
 
				+	///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
			
 
				+	B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; }
			
 
				+	B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; }
			
 
				+
			
 
				+	B3_FORCE_INLINE bool operator==(const b3Vector3& other) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
			
 
				+#else
			
 
				+		return ((m_floats[3] == other.m_floats[3]) &&
			
 
				+				(m_floats[2] == other.m_floats[2]) &&
			
 
				+				(m_floats[1] == other.m_floats[1]) &&
			
 
				+				(m_floats[0] == other.m_floats[0]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const
			
 
				+	{
			
 
				+		return !(*this == other);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set each element to the max of the current values and the values of another b3Vector3
			
 
				+   * @param other The other b3Vector3 to compare with
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setMax(const b3Vector3& other)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_max_ps(mVec128, other.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmaxq_f32(mVec128, other.mVec128);
			
 
				+#else
			
 
				+		b3SetMax(m_floats[0], other.m_floats[0]);
			
 
				+		b3SetMax(m_floats[1], other.m_floats[1]);
			
 
				+		b3SetMax(m_floats[2], other.m_floats[2]);
			
 
				+		b3SetMax(m_floats[3], other.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set each element to the min of the current values and the values of another b3Vector3
			
 
				+   * @param other The other b3Vector3 to compare with
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setMin(const b3Vector3& other)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = _mm_min_ps(mVec128, other.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vminq_f32(mVec128, other.mVec128);
			
 
				+#else
			
 
				+		b3SetMin(m_floats[0], other.m_floats[0]);
			
 
				+		b3SetMin(m_floats[1], other.m_floats[1]);
			
 
				+		b3SetMin(m_floats[2], other.m_floats[2]);
			
 
				+		b3SetMin(m_floats[3], other.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
			
 
				+	{
			
 
				+		m_floats[0] = _x;
			
 
				+		m_floats[1] = _y;
			
 
				+		m_floats[2] = _z;
			
 
				+		m_floats[3] = b3Scalar(0.f);
			
 
				+	}
			
 
				+
			
 
				+	void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+
			
 
				+		__m128 V = _mm_and_ps(mVec128, b3vFFF0fMask);
			
 
				+		__m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
			
 
				+		__m128 V2 = _mm_movelh_ps(V0, V);
			
 
				+
			
 
				+		__m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
			
 
				+
			
 
				+		V0 = _mm_shuffle_ps(V0, V, 0xDB);
			
 
				+		V2 = _mm_shuffle_ps(V2, V, 0xF9);
			
 
				+
			
 
				+		v0->mVec128 = V0;
			
 
				+		v1->mVec128 = V1;
			
 
				+		v2->mVec128 = V2;
			
 
				+#else
			
 
				+		v0->setValue(0., -getZ(), getY());
			
 
				+		v1->setValue(getZ(), 0., -getX());
			
 
				+		v2->setValue(-getY(), getX(), 0.);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	void setZero()
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		int32x4_t vi = vdupq_n_s32(0);
			
 
				+		mVec128 = vreinterpretq_f32_s32(vi);
			
 
				+#else
			
 
				+		setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE bool isZero() const
			
 
				+	{
			
 
				+		return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE bool fuzzyZero() const
			
 
				+	{
			
 
				+		return length2() < B3_EPSILON;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn);
			
 
				+
			
 
				+	B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn);
			
 
				+
			
 
				+	B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn);
			
 
				+
			
 
				+	/**@brief returns index of maximum dot product between this and vectors in array[]
			
 
				+         * @param array The other vectors
			
 
				+         * @param array_count The number of other vectors
			
 
				+         * @param dotOut The maximum dot product */
			
 
				+	B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
			
 
				+
			
 
				+	/**@brief returns index of minimum dot product between this and vectors in array[]
			
 
				+         * @param array The other vectors
			
 
				+         * @param array_count The number of other vectors
			
 
				+         * @param dotOut The minimum dot product */
			
 
				+	B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const;
			
 
				+
			
 
				+	/* create a vector as  b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 ))  */
			
 
				+	B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+
			
 
				+		__m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128);
			
 
				+		__m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128);
			
 
				+		__m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128);
			
 
				+		__m128 b0 = _mm_unpacklo_ps(a0, a1);
			
 
				+		__m128 b1 = _mm_unpackhi_ps(a0, a1);
			
 
				+		__m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps());
			
 
				+		__m128 r = _mm_movelh_ps(b0, b2);
			
 
				+		r = _mm_add_ps(r, _mm_movehl_ps(b2, b0));
			
 
				+		a2 = _mm_and_ps(a2, b3vxyzMaskf);
			
 
				+		r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1))));
			
 
				+		return b3MakeVector3(r);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0};
			
 
				+		float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128);
			
 
				+		float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128);
			
 
				+		float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128);
			
 
				+		float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1));
			
 
				+		a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask);
			
 
				+		float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]);
			
 
				+		float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
			
 
				+		return b3Vector3(vcombine_f32(b0, b1));
			
 
				+#else
			
 
				+		return b3MakeVector3(dot(v0), dot(v1), dot(v2));
			
 
				+#endif
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+/**@brief Return the sum of two vectors (Point symantics)*/
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator+(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+		v1.m_floats[0] + v2.m_floats[0],
			
 
				+		v1.m_floats[1] + v2.m_floats[1],
			
 
				+		v1.m_floats[2] + v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the elementwise product of two vectors */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator*(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+		v1.m_floats[0] * v2.m_floats[0],
			
 
				+		v1.m_floats[1] * v2.m_floats[1],
			
 
				+		v1.m_floats[2] * v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the difference between two vectors */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator-(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+
			
 
				+	//	without _mm_and_ps this code causes slowdown in Concave moving
			
 
				+	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
			
 
				+	return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
			
 
				+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+		v1.m_floats[0] - v2.m_floats[0],
			
 
				+		v1.m_floats[1] - v2.m_floats[1],
			
 
				+		v1.m_floats[2] - v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the negative of the vector */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator-(const b3Vector3& v)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	__m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
			
 
				+	return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
			
 
				+#else
			
 
				+	return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator*(const b3Vector3& v, const b3Scalar& s)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
			
 
				+	vs = b3_pshufd_ps(vs, 0x80);  //	(S S S 0.0)
			
 
				+	return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	float32x4_t r = vmulq_n_f32(v.mVec128, s);
			
 
				+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
			
 
				+#else
			
 
				+	return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator*(const b3Scalar& s, const b3Vector3& v)
			
 
				+{
			
 
				+	return v * s;
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector inversely scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator/(const b3Vector3& v, const b3Scalar& s)
			
 
				+{
			
 
				+	b3FullAssert(s != b3Scalar(0.0));
			
 
				+#if 0  //defined(B3_USE_SSE_IN_API)
			
 
				+// this code is not faster !
			
 
				+	__m128 vs = _mm_load_ss(&s);
			
 
				+    vs = _mm_div_ss(b3v1110, vs);
			
 
				+	vs = b3_pshufd_ps(vs, 0x00);	//	(S S S S)
			
 
				+
			
 
				+	return b3Vector3(_mm_mul_ps(v.mVec128, vs));
			
 
				+#else
			
 
				+	return v * (b3Scalar(1.0) / s);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector inversely scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator/(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))
			
 
				+	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
			
 
				+	vec = _mm_and_ps(vec, b3vFFF0fMask);
			
 
				+	return b3MakeVector3(vec);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	float32x4_t x, y, v, m;
			
 
				+
			
 
				+	x = v1.mVec128;
			
 
				+	y = v2.mVec128;
			
 
				+
			
 
				+	v = vrecpeq_f32(y);     // v ~ 1/y
			
 
				+	m = vrecpsq_f32(y, v);  // m = (2-v*y)
			
 
				+	v = vmulq_f32(v, m);    // vv = v*m ~~ 1/y
			
 
				+	m = vrecpsq_f32(y, v);  // mm = (2-vv*y)
			
 
				+	v = vmulq_f32(v, x);    // x*vv
			
 
				+	v = vmulq_f32(v, m);    // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
			
 
				+
			
 
				+	return b3Vector3(v);
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+		v1.m_floats[0] / v2.m_floats[0],
			
 
				+		v1.m_floats[1] / v2.m_floats[1],
			
 
				+		v1.m_floats[2] / v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the dot product between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Dot(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+	return v1.dot(v2);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the distance squared between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Distance2(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+	return v1.distance2(v2);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the distance between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Distance(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+	return v1.distance(v2);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the angle between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Angle(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+	return v1.angle(v2);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the cross product of two vectors */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+b3Cross(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+	return v1.cross(v2);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
			
 
				+{
			
 
				+	return v1.triple(v2, v3);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the linear interpolation between two vectors
			
 
				+ * @param v1 One vector
			
 
				+ * @param v2 The other vector
			
 
				+ * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
			
 
				+{
			
 
				+	return v1.lerp(v2, t);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
			
 
				+{
			
 
				+	return (v - *this).length2();
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
			
 
				+{
			
 
				+	return (v - *this).length();
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+	b3Vector3 norm = *this;
			
 
				+
			
 
				+	return norm.normalize();
			
 
				+#else
			
 
				+	return *this / length();
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const
			
 
				+{
			
 
				+	// wAxis must be a unit lenght vector
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+
			
 
				+	__m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
			
 
				+	b3Scalar ssin = b3Sin(_angle);
			
 
				+	__m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128;
			
 
				+	O = _mm_and_ps(O, b3vFFF0fMask);
			
 
				+	b3Scalar scos = b3Cos(_angle);
			
 
				+
			
 
				+	__m128 vsin = _mm_load_ss(&ssin);  //	(S 0 0 0)
			
 
				+	__m128 vcos = _mm_load_ss(&scos);  //	(S 0 0 0)
			
 
				+
			
 
				+	__m128 Y = b3_pshufd_ps(O, 0xC9);  //	(Y Z X 0)
			
 
				+	__m128 Z = b3_pshufd_ps(O, 0xD2);  //	(Z X Y 0)
			
 
				+	O = _mm_add_ps(O, Y);
			
 
				+	vsin = b3_pshufd_ps(vsin, 0x80);  //	(S S S 0)
			
 
				+	O = _mm_add_ps(O, Z);
			
 
				+	vcos = b3_pshufd_ps(vcos, 0x80);  //	(S S S 0)
			
 
				+
			
 
				+	vsin = vsin * C;
			
 
				+	O = O * wAxis.mVec128;
			
 
				+	__m128 X = mVec128 - O;
			
 
				+
			
 
				+	O = O + vsin;
			
 
				+	vcos = vcos * X;
			
 
				+	O = O + vcos;
			
 
				+
			
 
				+	return b3MakeVector3(O);
			
 
				+#else
			
 
				+	b3Vector3 o = wAxis * wAxis.dot(*this);
			
 
				+	b3Vector3 _x = *this - o;
			
 
				+	b3Vector3 _y;
			
 
				+
			
 
				+	_y = wAxis.cross(*this);
			
 
				+
			
 
				+	return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
			
 
				+{
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+#if defined _WIN32 || defined(B3_USE_SSE)
			
 
				+	const long scalar_cutoff = 10;
			
 
				+	long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
			
 
				+#elif defined B3_USE_NEON
			
 
				+	const long scalar_cutoff = 4;
			
 
				+	extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
			
 
				+#endif
			
 
				+	if (array_count < scalar_cutoff)
			
 
				+#else
			
 
				+
			
 
				+#endif  //B3_USE_SSE || B3_USE_NEON
			
 
				+	{
			
 
				+		b3Scalar maxDot = -B3_INFINITY;
			
 
				+		int i = 0;
			
 
				+		int ptIndex = -1;
			
 
				+		for (i = 0; i < array_count; i++)
			
 
				+		{
			
 
				+			b3Scalar dot = array[i].dot(*this);
			
 
				+
			
 
				+			if (dot > maxDot)
			
 
				+			{
			
 
				+				maxDot = dot;
			
 
				+				ptIndex = i;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		b3Assert(ptIndex >= 0);
			
 
				+		if (ptIndex < 0)
			
 
				+		{
			
 
				+			ptIndex = 0;
			
 
				+		}
			
 
				+		dotOut = maxDot;
			
 
				+		return ptIndex;
			
 
				+	}
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+	return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const
			
 
				+{
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+#if defined B3_USE_SSE
			
 
				+	const long scalar_cutoff = 10;
			
 
				+	long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut);
			
 
				+#elif defined B3_USE_NEON
			
 
				+	const long scalar_cutoff = 4;
			
 
				+	extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut);
			
 
				+#else
			
 
				+#error unhandled arch!
			
 
				+#endif
			
 
				+
			
 
				+	if (array_count < scalar_cutoff)
			
 
				+#endif  //B3_USE_SSE || B3_USE_NEON
			
 
				+	{
			
 
				+		b3Scalar minDot = B3_INFINITY;
			
 
				+		int i = 0;
			
 
				+		int ptIndex = -1;
			
 
				+
			
 
				+		for (i = 0; i < array_count; i++)
			
 
				+		{
			
 
				+			b3Scalar dot = array[i].dot(*this);
			
 
				+
			
 
				+			if (dot < minDot)
			
 
				+			{
			
 
				+				minDot = dot;
			
 
				+				ptIndex = i;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		dotOut = minDot;
			
 
				+
			
 
				+		return ptIndex;
			
 
				+	}
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+	return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+class b3Vector4 : public b3Vector3
			
 
				+{
			
 
				+public:
			
 
				+	B3_FORCE_INLINE b3Vector4 absolute4() const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+		return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Vector4(vabsq_f32(mVec128));
			
 
				+#else
			
 
				+		return b3MakeVector4(
			
 
				+			b3Fabs(m_floats[0]),
			
 
				+			b3Fabs(m_floats[1]),
			
 
				+			b3Fabs(m_floats[2]),
			
 
				+			b3Fabs(m_floats[3]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar getW() const { return m_floats[3]; }
			
 
				+
			
 
				+	B3_FORCE_INLINE int maxAxis4() const
			
 
				+	{
			
 
				+		int maxIndex = -1;
			
 
				+		b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
			
 
				+		if (m_floats[0] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 0;
			
 
				+			maxVal = m_floats[0];
			
 
				+		}
			
 
				+		if (m_floats[1] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 1;
			
 
				+			maxVal = m_floats[1];
			
 
				+		}
			
 
				+		if (m_floats[2] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 2;
			
 
				+			maxVal = m_floats[2];
			
 
				+		}
			
 
				+		if (m_floats[3] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 3;
			
 
				+		}
			
 
				+
			
 
				+		return maxIndex;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE int minAxis4() const
			
 
				+	{
			
 
				+		int minIndex = -1;
			
 
				+		b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
			
 
				+		if (m_floats[0] < minVal)
			
 
				+		{
			
 
				+			minIndex = 0;
			
 
				+			minVal = m_floats[0];
			
 
				+		}
			
 
				+		if (m_floats[1] < minVal)
			
 
				+		{
			
 
				+			minIndex = 1;
			
 
				+			minVal = m_floats[1];
			
 
				+		}
			
 
				+		if (m_floats[2] < minVal)
			
 
				+		{
			
 
				+			minIndex = 2;
			
 
				+			minVal = m_floats[2];
			
 
				+		}
			
 
				+		if (m_floats[3] < minVal)
			
 
				+		{
			
 
				+			minIndex = 3;
			
 
				+			minVal = m_floats[3];
			
 
				+		}
			
 
				+
			
 
				+		return minIndex;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE int closestAxis4() const
			
 
				+	{
			
 
				+		return absolute4().maxAxis4();
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set x,y,z and zero w
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   */
			
 
				+
			
 
				+	/*		void getValue(b3Scalar *m) const
			
 
				+		{
			
 
				+			m[0] = m_floats[0];
			
 
				+			m[1] = m_floats[1];
			
 
				+			m[2] =m_floats[2];
			
 
				+		}
			
 
				+*/
			
 
				+	/**@brief Set the values
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   * @param w Value of w
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
			
 
				+	{
			
 
				+		m_floats[0] = _x;
			
 
				+		m_floats[1] = _y;
			
 
				+		m_floats[2] = _z;
			
 
				+		m_floats[3] = _w;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
			
 
				+B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
			
 
				+{
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+	unsigned char* dest = (unsigned char*)&destVal;
			
 
				+	unsigned char* src = (unsigned char*)&sourceVal;
			
 
				+	dest[0] = src[7];
			
 
				+	dest[1] = src[6];
			
 
				+	dest[2] = src[5];
			
 
				+	dest[3] = src[4];
			
 
				+	dest[4] = src[3];
			
 
				+	dest[5] = src[2];
			
 
				+	dest[6] = src[1];
			
 
				+	dest[7] = src[0];
			
 
				+#else
			
 
				+	unsigned char* dest = (unsigned char*)&destVal;
			
 
				+	unsigned char* src = (unsigned char*)&sourceVal;
			
 
				+	dest[0] = src[3];
			
 
				+	dest[1] = src[2];
			
 
				+	dest[2] = src[1];
			
 
				+	dest[3] = src[0];
			
 
				+#endif  //B3_USE_DOUBLE_PRECISION
			
 
				+}
			
 
				+///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
			
 
				+B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
			
 
				+{
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		b3SwapScalarEndian(sourceVec[i], destVec[i]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
			
 
				+B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector)
			
 
				+{
			
 
				+	b3Vector3 swappedVec;
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		b3SwapScalarEndian(vector[i], swappedVec[i]);
			
 
				+	}
			
 
				+	vector = swappedVec;
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q)
			
 
				+{
			
 
				+	if (b3Fabs(n[2]) > B3_SQRT12)
			
 
				+	{
			
 
				+		// choose p in y-z plane
			
 
				+		b3Scalar a = n[1] * n[1] + n[2] * n[2];
			
 
				+		b3Scalar k = b3RecipSqrt(a);
			
 
				+		p[0] = 0;
			
 
				+		p[1] = -n[2] * k;
			
 
				+		p[2] = n[1] * k;
			
 
				+		// set q = n x p
			
 
				+		q[0] = a * k;
			
 
				+		q[1] = -n[0] * p[2];
			
 
				+		q[2] = n[0] * p[1];
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// choose p in x-y plane
			
 
				+		b3Scalar a = n[0] * n[0] + n[1] * n[1];
			
 
				+		b3Scalar k = b3RecipSqrt(a);
			
 
				+		p[0] = -n[1] * k;
			
 
				+		p[1] = n[0] * k;
			
 
				+		p[2] = 0;
			
 
				+		// set q = n x p
			
 
				+		q[0] = -n[2] * p[1];
			
 
				+		q[1] = n[2] * p[0];
			
 
				+		q[2] = a * k;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct b3Vector3FloatData
			
 
				+{
			
 
				+	float m_floats[4];
			
 
				+};
			
 
				+
			
 
				+struct b3Vector3DoubleData
			
 
				+{
			
 
				+	double m_floats[4];
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const
			
 
				+{
			
 
				+	///could also do a memcpy, check if it is worth it
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+		dataOut.m_floats[i] = float(m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn)
			
 
				+{
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+		m_floats[i] = b3Scalar(dataIn.m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const
			
 
				+{
			
 
				+	///could also do a memcpy, check if it is worth it
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+		dataOut.m_floats[i] = double(m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn)
			
 
				+{
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+		m_floats[i] = b3Scalar(dataIn.m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const
			
 
				+{
			
 
				+	///could also do a memcpy, check if it is worth it
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+		dataOut.m_floats[i] = m_floats[i];
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn)
			
 
				+{
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+		m_floats[i] = dataIn.m_floats[i];
			
 
				+}
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z)
			
 
				+{
			
 
				+	b3Vector3 tmp;
			
 
				+	tmp.setValue(x, y, z);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
			
 
				+{
			
 
				+	b3Vector3 tmp;
			
 
				+	tmp.setValue(x, y, z);
			
 
				+	tmp.w = w;
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w)
			
 
				+{
			
 
				+	b3Vector4 tmp;
			
 
				+	tmp.setValue(x, y, z, w);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3SimdFloat4 v)
			
 
				+{
			
 
				+	b3Vector3 tmp;
			
 
				+	tmp.set128(v);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
			
 
				+{
			
 
				+	b3Vector4 tmp;
			
 
				+	tmp.set128(vec);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif  //B3_VECTOR3_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/premake4.lua
+++ b/Dependencies/include/bullet3/Bullet3Common/premake4.lua
@@ -0,0 +1,16 @@
 
				+	project "Bullet3Common"
			
 
				+
			
 
				+	language "C++"
			
 
				+				
			
 
				+	kind "StaticLib"
			
 
				+		
			
 
				+	if os.is("Linux") then
			
 
				+	    buildoptions{"-fPIC"}
			
 
				+	end
			
 
				+
			
 
				+	includedirs {".."}
			
 
				+
			
 
				+	files {
			
 
				+		"*.cpp",
			
 
				+		"*.h"
			
 
				+	}
			
--- a/Dependencies/include/bullet3/Bullet3Common/shared/b3Float4.h
+++ b/Dependencies/include/bullet3/Bullet3Common/shared/b3Float4.h
@@ -0,0 +1,90 @@
 
				+#ifndef B3_FLOAT4_H
			
 
				+#define B3_FLOAT4_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3PlatformDefinitions.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#define b3Float4 b3Vector3
			
 
				+#define b3Float4ConstArg const b3Vector3&
			
 
				+#define b3Dot3F4 b3Dot
			
 
				+#define b3Cross3 b3Cross
			
 
				+#define b3MakeFloat4 b3MakeVector3
			
 
				+inline b3Vector3 b3Normalized(const b3Vector3& vec)
			
 
				+{
			
 
				+	return vec.normalized();
			
 
				+}
			
 
				+
			
 
				+inline b3Float4 b3FastNormalized3(b3Float4ConstArg v)
			
 
				+{
			
 
				+	return v.normalized();
			
 
				+}
			
 
				+
			
 
				+inline b3Float4 b3MaxFloat4(const b3Float4& a, const b3Float4& b)
			
 
				+{
			
 
				+	b3Float4 tmp = a;
			
 
				+	tmp.setMax(b);
			
 
				+	return tmp;
			
 
				+}
			
 
				+inline b3Float4 b3MinFloat4(const b3Float4& a, const b3Float4& b)
			
 
				+{
			
 
				+	b3Float4 tmp = a;
			
 
				+	tmp.setMin(b);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+typedef float4 b3Float4;
			
 
				+#define b3Float4ConstArg const b3Float4
			
 
				+#define b3MakeFloat4 (float4)
			
 
				+float b3Dot3F4(b3Float4ConstArg v0, b3Float4ConstArg v1)
			
 
				+{
			
 
				+	float4 a1 = b3MakeFloat4(v0.xyz, 0.f);
			
 
				+	float4 b1 = b3MakeFloat4(v1.xyz, 0.f);
			
 
				+	return dot(a1, b1);
			
 
				+}
			
 
				+b3Float4 b3Cross3(b3Float4ConstArg v0, b3Float4ConstArg v1)
			
 
				+{
			
 
				+	float4 a1 = b3MakeFloat4(v0.xyz, 0.f);
			
 
				+	float4 b1 = b3MakeFloat4(v1.xyz, 0.f);
			
 
				+	return cross(a1, b1);
			
 
				+}
			
 
				+#define b3MinFloat4 min
			
 
				+#define b3MaxFloat4 max
			
 
				+
			
 
				+#define b3Normalized(a) normalize(a)
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+inline bool b3IsAlmostZero(b3Float4ConstArg v)
			
 
				+{
			
 
				+	if (b3Fabs(v.x) > 1e-6 || b3Fabs(v.y) > 1e-6 || b3Fabs(v.z) > 1e-6)
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+inline int b3MaxDot(b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut)
			
 
				+{
			
 
				+	float maxDot = -B3_INFINITY;
			
 
				+	int i = 0;
			
 
				+	int ptIndex = -1;
			
 
				+	for (i = 0; i < vecLen; i++)
			
 
				+	{
			
 
				+		float dot = b3Dot3F4(vecArray[i], vec);
			
 
				+
			
 
				+		if (dot > maxDot)
			
 
				+		{
			
 
				+			maxDot = dot;
			
 
				+			ptIndex = i;
			
 
				+		}
			
 
				+	}
			
 
				+	b3Assert(ptIndex >= 0);
			
 
				+	if (ptIndex < 0)
			
 
				+	{
			
 
				+		ptIndex = 0;
			
 
				+	}
			
 
				+	*dotOut = maxDot;
			
 
				+	return ptIndex;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_FLOAT4_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/shared/b3Int2.h
+++ b/Dependencies/include/bullet3/Bullet3Common/shared/b3Int2.h
@@ -0,0 +1,63 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_INT2_H
			
 
				+#define B3_INT2_H
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+struct b3UnsignedInt2
			
 
				+{
			
 
				+	union {
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int x, y;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int s[2];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+struct b3Int2
			
 
				+{
			
 
				+	union {
			
 
				+		struct
			
 
				+		{
			
 
				+			int x, y;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			int s[2];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+inline b3Int2 b3MakeInt2(int x, int y)
			
 
				+{
			
 
				+	b3Int2 v;
			
 
				+	v.s[0] = x;
			
 
				+	v.s[1] = y;
			
 
				+	return v;
			
 
				+}
			
 
				+#else
			
 
				+
			
 
				+#define b3UnsignedInt2 uint2
			
 
				+#define b3Int2 int2
			
 
				+#define b3MakeInt2 (int2)
			
 
				+
			
 
				+#endif  //__cplusplus
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Common/shared/b3Int4.h
+++ b/Dependencies/include/bullet3/Bullet3Common/shared/b3Int4.h
@@ -0,0 +1,71 @@
 
				+#ifndef B3_INT4_H
			
 
				+#define B3_INT4_H
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3UnsignedInt4
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	union {
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int x, y, z, w;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int s[4];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3Int4
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	union {
			
 
				+		struct
			
 
				+		{
			
 
				+			int x, y, z, w;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			int s[4];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0)
			
 
				+{
			
 
				+	b3Int4 v;
			
 
				+	v.s[0] = x;
			
 
				+	v.s[1] = y;
			
 
				+	v.s[2] = z;
			
 
				+	v.s[3] = w;
			
 
				+	return v;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
			
 
				+{
			
 
				+	b3UnsignedInt4 v;
			
 
				+	v.s[0] = x;
			
 
				+	v.s[1] = y;
			
 
				+	v.s[2] = z;
			
 
				+	v.s[3] = w;
			
 
				+	return v;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#define b3UnsignedInt4 uint4
			
 
				+#define b3Int4 int4
			
 
				+#define b3MakeInt4 (int4)
			
 
				+#define b3MakeUnsignedInt4 (uint4)
			
 
				+
			
 
				+#endif  //__cplusplus
			
 
				+
			
 
				+#endif  //B3_INT4_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/shared/b3Mat3x3.h
+++ b/Dependencies/include/bullet3/Bullet3Common/shared/b3Mat3x3.h
@@ -0,0 +1,157 @@
 
				+
			
 
				+#ifndef B3_MAT3x3_H
			
 
				+#define B3_MAT3x3_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+
			
 
				+#define b3Mat3x3 b3Matrix3x3
			
 
				+#define b3Mat3x3ConstArg const b3Matrix3x3&
			
 
				+
			
 
				+inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat)
			
 
				+{
			
 
				+	return b3Mat3x3(quat);
			
 
				+}
			
 
				+
			
 
				+inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat)
			
 
				+{
			
 
				+	return mat.absolute();
			
 
				+}
			
 
				+
			
 
				+#define b3GetRow(m, row) m.getRow(row)
			
 
				+
			
 
				+__inline b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b)
			
 
				+{
			
 
				+	return b * a;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	b3Float4 m_row[3];
			
 
				+} b3Mat3x3;
			
 
				+
			
 
				+#define b3Mat3x3ConstArg const b3Mat3x3
			
 
				+#define b3GetRow(m, row) (m.m_row[row])
			
 
				+
			
 
				+inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)
			
 
				+{
			
 
				+	b3Float4 quat2 = (b3Float4)(quat.x * quat.x, quat.y * quat.y, quat.z * quat.z, 0.f);
			
 
				+	b3Mat3x3 out;
			
 
				+
			
 
				+	out.m_row[0].x = 1 - 2 * quat2.y - 2 * quat2.z;
			
 
				+	out.m_row[0].y = 2 * quat.x * quat.y - 2 * quat.w * quat.z;
			
 
				+	out.m_row[0].z = 2 * quat.x * quat.z + 2 * quat.w * quat.y;
			
 
				+	out.m_row[0].w = 0.f;
			
 
				+
			
 
				+	out.m_row[1].x = 2 * quat.x * quat.y + 2 * quat.w * quat.z;
			
 
				+	out.m_row[1].y = 1 - 2 * quat2.x - 2 * quat2.z;
			
 
				+	out.m_row[1].z = 2 * quat.y * quat.z - 2 * quat.w * quat.x;
			
 
				+	out.m_row[1].w = 0.f;
			
 
				+
			
 
				+	out.m_row[2].x = 2 * quat.x * quat.z - 2 * quat.w * quat.y;
			
 
				+	out.m_row[2].y = 2 * quat.y * quat.z + 2 * quat.w * quat.x;
			
 
				+	out.m_row[2].z = 1 - 2 * quat2.x - 2 * quat2.y;
			
 
				+	out.m_row[2].w = 0.f;
			
 
				+
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)
			
 
				+{
			
 
				+	b3Mat3x3 out;
			
 
				+	out.m_row[0] = fabs(matIn.m_row[0]);
			
 
				+	out.m_row[1] = fabs(matIn.m_row[1]);
			
 
				+	out.m_row[2] = fabs(matIn.m_row[2]);
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+__inline b3Mat3x3 mtZero();
			
 
				+
			
 
				+__inline b3Mat3x3 mtIdentity();
			
 
				+
			
 
				+__inline b3Mat3x3 mtTranspose(b3Mat3x3 m);
			
 
				+
			
 
				+__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);
			
 
				+
			
 
				+__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);
			
 
				+
			
 
				+__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);
			
 
				+
			
 
				+__inline b3Mat3x3 mtZero()
			
 
				+{
			
 
				+	b3Mat3x3 m;
			
 
				+	m.m_row[0] = (b3Float4)(0.f);
			
 
				+	m.m_row[1] = (b3Float4)(0.f);
			
 
				+	m.m_row[2] = (b3Float4)(0.f);
			
 
				+	return m;
			
 
				+}
			
 
				+
			
 
				+__inline b3Mat3x3 mtIdentity()
			
 
				+{
			
 
				+	b3Mat3x3 m;
			
 
				+	m.m_row[0] = (b3Float4)(1, 0, 0, 0);
			
 
				+	m.m_row[1] = (b3Float4)(0, 1, 0, 0);
			
 
				+	m.m_row[2] = (b3Float4)(0, 0, 1, 0);
			
 
				+	return m;
			
 
				+}
			
 
				+
			
 
				+__inline b3Mat3x3 mtTranspose(b3Mat3x3 m)
			
 
				+{
			
 
				+	b3Mat3x3 out;
			
 
				+	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
			
 
				+	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
			
 
				+	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+__inline b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)
			
 
				+{
			
 
				+	b3Mat3x3 transB;
			
 
				+	transB = mtTranspose(b);
			
 
				+	b3Mat3x3 ans;
			
 
				+	//	why this doesn't run when 0ing in the for{}
			
 
				+	a.m_row[0].w = 0.f;
			
 
				+	a.m_row[1].w = 0.f;
			
 
				+	a.m_row[2].w = 0.f;
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		//	a.m_row[i].w = 0.f;
			
 
				+		ans.m_row[i].x = b3Dot3F4(a.m_row[i], transB.m_row[0]);
			
 
				+		ans.m_row[i].y = b3Dot3F4(a.m_row[i], transB.m_row[1]);
			
 
				+		ans.m_row[i].z = b3Dot3F4(a.m_row[i], transB.m_row[2]);
			
 
				+		ans.m_row[i].w = 0.f;
			
 
				+	}
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+__inline b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)
			
 
				+{
			
 
				+	b3Float4 ans;
			
 
				+	ans.x = b3Dot3F4(a.m_row[0], b);
			
 
				+	ans.y = b3Dot3F4(a.m_row[1], b);
			
 
				+	ans.z = b3Dot3F4(a.m_row[2], b);
			
 
				+	ans.w = 0.f;
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+__inline b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)
			
 
				+{
			
 
				+	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
			
 
				+	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
			
 
				+	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
			
 
				+
			
 
				+	b3Float4 ans;
			
 
				+	ans.x = b3Dot3F4(a, colx);
			
 
				+	ans.y = b3Dot3F4(a, coly);
			
 
				+	ans.z = b3Dot3F4(a, colz);
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif  //B3_MAT3x3_H
			
--- a/Dependencies/include/bullet3/Bullet3Common/shared/b3PlatformDefinitions.h
+++ b/Dependencies/include/bullet3/Bullet3Common/shared/b3PlatformDefinitions.h
@@ -0,0 +1,41 @@
 
				+#ifndef B3_PLATFORM_DEFINITIONS_H
			
 
				+#define B3_PLATFORM_DEFINITIONS_H
			
 
				+
			
 
				+struct MyTest
			
 
				+{
			
 
				+	int bla;
			
 
				+};
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+//#define b3ConstArray(a) const b3AlignedObjectArray<a>&
			
 
				+#define b3ConstArray(a) const a *
			
 
				+#define b3AtomicInc(a) ((*a)++)
			
 
				+
			
 
				+inline int b3AtomicAdd(volatile int *p, int val)
			
 
				+{
			
 
				+	int oldValue = *p;
			
 
				+	int newValue = oldValue + val;
			
 
				+	*p = newValue;
			
 
				+	return oldValue;
			
 
				+}
			
 
				+
			
 
				+#define __global
			
 
				+
			
 
				+#define B3_STATIC static
			
 
				+#else
			
 
				+//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
			
 
				+#define B3_LARGE_FLOAT 1e18f
			
 
				+#define B3_INFINITY 1e18f
			
 
				+#define b3Assert(a)
			
 
				+#define b3ConstArray(a) __global const a *
			
 
				+#define b3AtomicInc atomic_inc
			
 
				+#define b3AtomicAdd atomic_add
			
 
				+#define b3Fabs fabs
			
 
				+#define b3Sqrt native_sqrt
			
 
				+#define b3Sin native_sin
			
 
				+#define b3Cos native_cos
			
 
				+
			
 
				+#define B3_STATIC
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/Dependencies/include/bullet3/Bullet3Common/shared/b3Quat.h
+++ b/Dependencies/include/bullet3/Bullet3Common/shared/b3Quat.h
@@ -0,0 +1,100 @@
 
				+#ifndef B3_QUAT_H
			
 
				+#define B3_QUAT_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3PlatformDefinitions.h"
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+#include "Bullet3Common/b3Quaternion.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+
			
 
				+#define b3Quat b3Quaternion
			
 
				+#define b3QuatConstArg const b3Quaternion&
			
 
				+inline b3Quat b3QuatInverse(b3QuatConstArg orn)
			
 
				+{
			
 
				+	return orn.inverse();
			
 
				+}
			
 
				+
			
 
				+inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)
			
 
				+{
			
 
				+	b3Transform tr;
			
 
				+	tr.setOrigin(translation);
			
 
				+	tr.setRotation(orientation);
			
 
				+	return tr(point);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+typedef float4 b3Quat;
			
 
				+#define b3QuatConstArg const b3Quat
			
 
				+
			
 
				+inline float4 b3FastNormalize4(float4 v)
			
 
				+{
			
 
				+	v = (float4)(v.xyz, 0.f);
			
 
				+	return fast_normalize(v);
			
 
				+}
			
 
				+
			
 
				+inline b3Quat b3QuatMul(b3Quat a, b3Quat b);
			
 
				+inline b3Quat b3QuatNormalized(b3QuatConstArg in);
			
 
				+inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);
			
 
				+inline b3Quat b3QuatInvert(b3QuatConstArg q);
			
 
				+inline b3Quat b3QuatInverse(b3QuatConstArg q);
			
 
				+
			
 
				+inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
			
 
				+{
			
 
				+	b3Quat ans;
			
 
				+	ans = b3Cross3(a, b);
			
 
				+	ans += a.w * b + b.w * a;
			
 
				+	//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
			
 
				+	ans.w = a.w * b.w - b3Dot3F4(a, b);
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+inline b3Quat b3QuatNormalized(b3QuatConstArg in)
			
 
				+{
			
 
				+	b3Quat q;
			
 
				+	q = in;
			
 
				+	//return b3FastNormalize4(in);
			
 
				+	float len = native_sqrt(dot(q, q));
			
 
				+	if (len > 0.f)
			
 
				+	{
			
 
				+		q *= 1.f / len;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		q.x = q.y = q.z = 0.f;
			
 
				+		q.w = 1.f;
			
 
				+	}
			
 
				+	return q;
			
 
				+}
			
 
				+inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)
			
 
				+{
			
 
				+	b3Quat qInv = b3QuatInvert(q);
			
 
				+	float4 vcpy = vec;
			
 
				+	vcpy.w = 0.f;
			
 
				+	float4 out = b3QuatMul(b3QuatMul(q, vcpy), qInv);
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+inline b3Quat b3QuatInverse(b3QuatConstArg q)
			
 
				+{
			
 
				+	return (b3Quat)(-q.xyz, q.w);
			
 
				+}
			
 
				+
			
 
				+inline b3Quat b3QuatInvert(b3QuatConstArg q)
			
 
				+{
			
 
				+	return (b3Quat)(-q.xyz, q.w);
			
 
				+}
			
 
				+
			
 
				+inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)
			
 
				+{
			
 
				+	return b3QuatRotate(b3QuatInvert(q), vec);
			
 
				+}
			
 
				+
			
 
				+inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)
			
 
				+{
			
 
				+	return b3QuatRotate(orientation, point) + (translation);
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif  //B3_QUAT_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/CMakeLists.txt
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/CMakeLists.txt
@@ -0,0 +1,61 @@
 
				+
			
 
				+INCLUDE_DIRECTORIES(
			
 
				+	${BULLET_PHYSICS_SOURCE_DIR}/src
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Dynamics_SRCS
			
 
				+	b3CpuRigidBodyPipeline.cpp
			
 
				+	ConstraintSolver/b3FixedConstraint.cpp
			
 
				+	ConstraintSolver/b3Generic6DofConstraint.cpp
			
 
				+	ConstraintSolver/b3PgsJacobiSolver.cpp
			
 
				+	ConstraintSolver/b3Point2PointConstraint.cpp
			
 
				+	ConstraintSolver/b3TypedConstraint.cpp
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Dynamics_HDRS
			
 
				+	  b3CpuRigidBodyPipeline.h
			
 
				+	ConstraintSolver/b3ContactSolverInfo.h
			
 
				+	ConstraintSolver/b3FixedConstraint.h
			
 
				+	ConstraintSolver/b3Generic6DofConstraint.h
			
 
				+	ConstraintSolver/b3JacobianEntry.h
			
 
				+	ConstraintSolver/b3PgsJacobiSolver.h
			
 
				+	ConstraintSolver/b3Point2PointConstraint.h
			
 
				+	ConstraintSolver/b3SolverBody.h
			
 
				+	ConstraintSolver/b3SolverConstraint.h
			
 
				+	ConstraintSolver/b3TypedConstraint.h
			
 
				+	shared/b3ContactConstraint4.h
			
 
				+	shared/b3ConvertConstraint4.h
			
 
				+	shared/b3Inertia.h
			
 
				+	shared/b3IntegrateTransforms.h
			
 
				+)
			
 
				+
			
 
				+ADD_LIBRARY(Bullet3Dynamics ${Bullet3Dynamics_SRCS} ${Bullet3Dynamics_HDRS})
			
 
				+if (BUILD_SHARED_LIBS)
			
 
				+  target_link_libraries(Bullet3Dynamics Bullet3Collision)
			
 
				+endif ()
			
 
				+SET_TARGET_PROPERTIES(Bullet3Dynamics PROPERTIES VERSION ${BULLET_VERSION})
			
 
				+SET_TARGET_PROPERTIES(Bullet3Dynamics PROPERTIES SOVERSION ${BULLET_VERSION})
			
 
				+
			
 
				+IF (INSTALL_LIBS)
			
 
				+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+		#FILES_MATCHING requires CMake 2.6
			
 
				+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Dynamics DESTINATION .)
			
 
				+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Dynamics
			
 
				+					RUNTIME DESTINATION bin
			
 
				+					LIBRARY DESTINATION lib${LIB_SUFFIX}
			
 
				+					ARCHIVE DESTINATION lib${LIB_SUFFIX})
			
 
				+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
			
 
				+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
			
 
				+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
			
 
				+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+
			
 
				+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Dynamics PROPERTIES FRAMEWORK true)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Dynamics PROPERTIES PUBLIC_HEADER "${Bullet3Dynamics_HDRS}")
			
 
				+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+ENDIF (INSTALL_LIBS)
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h
@@ -0,0 +1,149 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONTACT_SOLVER_INFO
			
 
				+#define B3_CONTACT_SOLVER_INFO
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+
			
 
				+enum b3SolverMode
			
 
				+{
			
 
				+	B3_SOLVER_RANDMIZE_ORDER = 1,
			
 
				+	B3_SOLVER_FRICTION_SEPARATE = 2,
			
 
				+	B3_SOLVER_USE_WARMSTARTING = 4,
			
 
				+	B3_SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
			
 
				+	B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
			
 
				+	B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
			
 
				+	B3_SOLVER_CACHE_FRIENDLY = 128,
			
 
				+	B3_SOLVER_SIMD = 256,
			
 
				+	B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512,
			
 
				+	B3_SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024
			
 
				+};
			
 
				+
			
 
				+struct b3ContactSolverInfoData
			
 
				+{
			
 
				+	b3Scalar m_tau;
			
 
				+	b3Scalar m_damping;  //global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
			
 
				+	b3Scalar m_friction;
			
 
				+	b3Scalar m_timeStep;
			
 
				+	b3Scalar m_restitution;
			
 
				+	int m_numIterations;
			
 
				+	b3Scalar m_maxErrorReduction;
			
 
				+	b3Scalar m_sor;
			
 
				+	b3Scalar m_erp;        //used as Baumgarte factor
			
 
				+	b3Scalar m_erp2;       //used in Split Impulse
			
 
				+	b3Scalar m_globalCfm;  //constraint force mixing
			
 
				+	int m_splitImpulse;
			
 
				+	b3Scalar m_splitImpulsePenetrationThreshold;
			
 
				+	b3Scalar m_splitImpulseTurnErp;
			
 
				+	b3Scalar m_linearSlop;
			
 
				+	b3Scalar m_warmstartingFactor;
			
 
				+
			
 
				+	int m_solverMode;
			
 
				+	int m_restingContactRestitutionThreshold;
			
 
				+	int m_minimumSolverBatchSize;
			
 
				+	b3Scalar m_maxGyroscopicForce;
			
 
				+	b3Scalar m_singleAxisRollingFrictionThreshold;
			
 
				+};
			
 
				+
			
 
				+struct b3ContactSolverInfo : public b3ContactSolverInfoData
			
 
				+{
			
 
				+	inline b3ContactSolverInfo()
			
 
				+	{
			
 
				+		m_tau = b3Scalar(0.6);
			
 
				+		m_damping = b3Scalar(1.0);
			
 
				+		m_friction = b3Scalar(0.3);
			
 
				+		m_timeStep = b3Scalar(1.f / 60.f);
			
 
				+		m_restitution = b3Scalar(0.);
			
 
				+		m_maxErrorReduction = b3Scalar(20.);
			
 
				+		m_numIterations = 10;
			
 
				+		m_erp = b3Scalar(0.2);
			
 
				+		m_erp2 = b3Scalar(0.8);
			
 
				+		m_globalCfm = b3Scalar(0.);
			
 
				+		m_sor = b3Scalar(1.);
			
 
				+		m_splitImpulse = true;
			
 
				+		m_splitImpulsePenetrationThreshold = -.04f;
			
 
				+		m_splitImpulseTurnErp = 0.1f;
			
 
				+		m_linearSlop = b3Scalar(0.0);
			
 
				+		m_warmstartingFactor = b3Scalar(0.85);
			
 
				+		//m_solverMode =  B3_SOLVER_USE_WARMSTARTING |  B3_SOLVER_SIMD | B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | B3_SOLVER_RANDMIZE_ORDER;
			
 
				+		m_solverMode = B3_SOLVER_USE_WARMSTARTING | B3_SOLVER_SIMD;  // | B3_SOLVER_RANDMIZE_ORDER;
			
 
				+		m_restingContactRestitutionThreshold = 2;                    //unused as of 2.81
			
 
				+		m_minimumSolverBatchSize = 128;                              //try to combine islands until the amount of constraints reaches this limit
			
 
				+		m_maxGyroscopicForce = 100.f;                                ///only used to clamp forces for bodies that have their B3_ENABLE_GYROPSCOPIC_FORCE flag set (using b3RigidBody::setFlag)
			
 
				+		m_singleAxisRollingFrictionThreshold = 1e30f;                ///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows.
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct b3ContactSolverInfoDoubleData
			
 
				+{
			
 
				+	double m_tau;
			
 
				+	double m_damping;  //global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
			
 
				+	double m_friction;
			
 
				+	double m_timeStep;
			
 
				+	double m_restitution;
			
 
				+	double m_maxErrorReduction;
			
 
				+	double m_sor;
			
 
				+	double m_erp;        //used as Baumgarte factor
			
 
				+	double m_erp2;       //used in Split Impulse
			
 
				+	double m_globalCfm;  //constraint force mixing
			
 
				+	double m_splitImpulsePenetrationThreshold;
			
 
				+	double m_splitImpulseTurnErp;
			
 
				+	double m_linearSlop;
			
 
				+	double m_warmstartingFactor;
			
 
				+	double m_maxGyroscopicForce;
			
 
				+	double m_singleAxisRollingFrictionThreshold;
			
 
				+
			
 
				+	int m_numIterations;
			
 
				+	int m_solverMode;
			
 
				+	int m_restingContactRestitutionThreshold;
			
 
				+	int m_minimumSolverBatchSize;
			
 
				+	int m_splitImpulse;
			
 
				+	char m_padding[4];
			
 
				+};
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct b3ContactSolverInfoFloatData
			
 
				+{
			
 
				+	float m_tau;
			
 
				+	float m_damping;  //global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
			
 
				+	float m_friction;
			
 
				+	float m_timeStep;
			
 
				+
			
 
				+	float m_restitution;
			
 
				+	float m_maxErrorReduction;
			
 
				+	float m_sor;
			
 
				+	float m_erp;  //used as Baumgarte factor
			
 
				+
			
 
				+	float m_erp2;       //used in Split Impulse
			
 
				+	float m_globalCfm;  //constraint force mixing
			
 
				+	float m_splitImpulsePenetrationThreshold;
			
 
				+	float m_splitImpulseTurnErp;
			
 
				+
			
 
				+	float m_linearSlop;
			
 
				+	float m_warmstartingFactor;
			
 
				+	float m_maxGyroscopicForce;
			
 
				+	float m_singleAxisRollingFrictionThreshold;
			
 
				+
			
 
				+	int m_numIterations;
			
 
				+	int m_solverMode;
			
 
				+	int m_restingContactRestitutionThreshold;
			
 
				+	int m_minimumSolverBatchSize;
			
 
				+
			
 
				+	int m_splitImpulse;
			
 
				+	char m_padding[4];
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CONTACT_SOLVER_INFO
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp
@@ -0,0 +1,103 @@
 
				+
			
 
				+#include "b3FixedConstraint.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Common/b3TransformUtil.h"
			
 
				+#include <new>
			
 
				+
			
 
				+b3FixedConstraint::b3FixedConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB)
			
 
				+	: b3TypedConstraint(B3_FIXED_CONSTRAINT_TYPE, rbA, rbB)
			
 
				+{
			
 
				+	m_pivotInA = frameInA.getOrigin();
			
 
				+	m_pivotInB = frameInB.getOrigin();
			
 
				+	m_relTargetAB = frameInA.getRotation() * frameInB.getRotation().inverse();
			
 
				+}
			
 
				+
			
 
				+b3FixedConstraint::~b3FixedConstraint()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void b3FixedConstraint::getInfo1(b3ConstraintInfo1* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	info->m_numConstraintRows = 6;
			
 
				+	info->nub = 6;
			
 
				+}
			
 
				+
			
 
				+void b3FixedConstraint::getInfo2(b3ConstraintInfo2* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	//fix the 3 linear degrees of freedom
			
 
				+
			
 
				+	const b3Vector3& worldPosA = bodies[m_rbA].m_pos;
			
 
				+	const b3Quaternion& worldOrnA = bodies[m_rbA].m_quat;
			
 
				+	const b3Vector3& worldPosB = bodies[m_rbB].m_pos;
			
 
				+	const b3Quaternion& worldOrnB = bodies[m_rbB].m_quat;
			
 
				+
			
 
				+	info->m_J1linearAxis[0] = 1;
			
 
				+	info->m_J1linearAxis[info->rowskip + 1] = 1;
			
 
				+	info->m_J1linearAxis[2 * info->rowskip + 2] = 1;
			
 
				+
			
 
				+	b3Vector3 a1 = b3QuatRotate(worldOrnA, m_pivotInA);
			
 
				+	{
			
 
				+		b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis);
			
 
				+		b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis + info->rowskip);
			
 
				+		b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis + 2 * info->rowskip);
			
 
				+		b3Vector3 a1neg = -a1;
			
 
				+		a1neg.getSkewSymmetricMatrix(angular0, angular1, angular2);
			
 
				+	}
			
 
				+
			
 
				+	if (info->m_J2linearAxis)
			
 
				+	{
			
 
				+		info->m_J2linearAxis[0] = -1;
			
 
				+		info->m_J2linearAxis[info->rowskip + 1] = -1;
			
 
				+		info->m_J2linearAxis[2 * info->rowskip + 2] = -1;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3 a2 = b3QuatRotate(worldOrnB, m_pivotInB);
			
 
				+
			
 
				+	{
			
 
				+		//	b3Vector3 a2n = -a2;
			
 
				+		b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis);
			
 
				+		b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis + info->rowskip);
			
 
				+		b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis + 2 * info->rowskip);
			
 
				+		a2.getSkewSymmetricMatrix(angular0, angular1, angular2);
			
 
				+	}
			
 
				+
			
 
				+	// set right hand side for the linear dofs
			
 
				+	b3Scalar k = info->fps * info->erp;
			
 
				+	b3Vector3 linearError = k * (a2 + worldPosB - a1 - worldPosA);
			
 
				+	int j;
			
 
				+	for (j = 0; j < 3; j++)
			
 
				+	{
			
 
				+		info->m_constraintError[j * info->rowskip] = linearError[j];
			
 
				+		//printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]);
			
 
				+	}
			
 
				+
			
 
				+	//fix the 3 angular degrees of freedom
			
 
				+
			
 
				+	int start_row = 3;
			
 
				+	int s = info->rowskip;
			
 
				+	int start_index = start_row * s;
			
 
				+
			
 
				+	// 3 rows to make body rotations equal
			
 
				+	info->m_J1angularAxis[start_index] = 1;
			
 
				+	info->m_J1angularAxis[start_index + s + 1] = 1;
			
 
				+	info->m_J1angularAxis[start_index + s * 2 + 2] = 1;
			
 
				+	if (info->m_J2angularAxis)
			
 
				+	{
			
 
				+		info->m_J2angularAxis[start_index] = -1;
			
 
				+		info->m_J2angularAxis[start_index + s + 1] = -1;
			
 
				+		info->m_J2angularAxis[start_index + s * 2 + 2] = -1;
			
 
				+	}
			
 
				+
			
 
				+	// set right hand side for the angular dofs
			
 
				+
			
 
				+	b3Vector3 diff;
			
 
				+	b3Scalar angle;
			
 
				+	b3Quaternion qrelCur = worldOrnA * worldOrnB.inverse();
			
 
				+
			
 
				+	b3TransformUtil::calculateDiffAxisAngleQuaternion(m_relTargetAB, qrelCur, diff, angle);
			
 
				+	diff *= -angle;
			
 
				+	for (j = 0; j < 3; j++)
			
 
				+	{
			
 
				+		info->m_constraintError[(3 + j) * info->rowskip] = k * diff[j];
			
 
				+	}
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h
@@ -0,0 +1,34 @@
 
				+
			
 
				+#ifndef B3_FIXED_CONSTRAINT_H
			
 
				+#define B3_FIXED_CONSTRAINT_H
			
 
				+
			
 
				+#include "b3TypedConstraint.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3FixedConstraint : public b3TypedConstraint
			
 
				+{
			
 
				+	b3Vector3 m_pivotInA;
			
 
				+	b3Vector3 m_pivotInB;
			
 
				+	b3Quaternion m_relTargetAB;
			
 
				+
			
 
				+public:
			
 
				+	b3FixedConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB);
			
 
				+
			
 
				+	virtual ~b3FixedConstraint();
			
 
				+
			
 
				+	virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void setParam(int num, b3Scalar value, int axis = -1)
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+	}
			
 
				+	virtual b3Scalar getParam(int num, int axis = -1) const
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+		return 0.f;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_FIXED_CONSTRAINT_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp
@@ -0,0 +1,737 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+/*
			
 
				+2007-09-09
			
 
				+Refactored by Francisco Le?n
			
 
				+email: [email protected]
			
 
				+http://gimpact.sf.net
			
 
				+*/
			
 
				+
			
 
				+#include "b3Generic6DofConstraint.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+#include "Bullet3Common/b3TransformUtil.h"
			
 
				+#include "Bullet3Common/b3TransformUtil.h"
			
 
				+#include <new>
			
 
				+
			
 
				+#define D6_USE_OBSOLETE_METHOD false
			
 
				+#define D6_USE_FRAME_OFFSET true
			
 
				+
			
 
				+b3Generic6DofConstraint::b3Generic6DofConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB, bool useLinearReferenceFrameA, const b3RigidBodyData* bodies)
			
 
				+	: b3TypedConstraint(B3_D6_CONSTRAINT_TYPE, rbA, rbB), m_frameInA(frameInA), m_frameInB(frameInB), m_useLinearReferenceFrameA(useLinearReferenceFrameA), m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET), m_flags(0)
			
 
				+{
			
 
				+	calculateTransforms(bodies);
			
 
				+}
			
 
				+
			
 
				+#define GENERIC_D6_DISABLE_WARMSTARTING 1
			
 
				+
			
 
				+b3Scalar btGetMatrixElem(const b3Matrix3x3& mat, int index);
			
 
				+b3Scalar btGetMatrixElem(const b3Matrix3x3& mat, int index)
			
 
				+{
			
 
				+	int i = index % 3;
			
 
				+	int j = index / 3;
			
 
				+	return mat[i][j];
			
 
				+}
			
 
				+
			
 
				+///MatrixToEulerXYZ from http://www.geometrictools.com/LibFoundation/Mathematics/Wm4Matrix3.inl.html
			
 
				+bool matrixToEulerXYZ(const b3Matrix3x3& mat, b3Vector3& xyz);
			
 
				+bool matrixToEulerXYZ(const b3Matrix3x3& mat, b3Vector3& xyz)
			
 
				+{
			
 
				+	//	// rot =  cy*cz          -cy*sz           sy
			
 
				+	//	//        cz*sx*sy+cx*sz  cx*cz-sx*sy*sz -cy*sx
			
 
				+	//	//       -cx*cz*sy+sx*sz  cz*sx+cx*sy*sz  cx*cy
			
 
				+	//
			
 
				+
			
 
				+	b3Scalar fi = btGetMatrixElem(mat, 2);
			
 
				+	if (fi < b3Scalar(1.0f))
			
 
				+	{
			
 
				+		if (fi > b3Scalar(-1.0f))
			
 
				+		{
			
 
				+			xyz[0] = b3Atan2(-btGetMatrixElem(mat, 5), btGetMatrixElem(mat, 8));
			
 
				+			xyz[1] = b3Asin(btGetMatrixElem(mat, 2));
			
 
				+			xyz[2] = b3Atan2(-btGetMatrixElem(mat, 1), btGetMatrixElem(mat, 0));
			
 
				+			return true;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// WARNING.  Not unique.  XA - ZA = -atan2(r10,r11)
			
 
				+			xyz[0] = -b3Atan2(btGetMatrixElem(mat, 3), btGetMatrixElem(mat, 4));
			
 
				+			xyz[1] = -B3_HALF_PI;
			
 
				+			xyz[2] = b3Scalar(0.0);
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// WARNING.  Not unique.  XAngle + ZAngle = atan2(r10,r11)
			
 
				+		xyz[0] = b3Atan2(btGetMatrixElem(mat, 3), btGetMatrixElem(mat, 4));
			
 
				+		xyz[1] = B3_HALF_PI;
			
 
				+		xyz[2] = 0.0;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+//////////////////////////// b3RotationalLimitMotor ////////////////////////////////////
			
 
				+
			
 
				+int b3RotationalLimitMotor::testLimitValue(b3Scalar test_value)
			
 
				+{
			
 
				+	if (m_loLimit > m_hiLimit)
			
 
				+	{
			
 
				+		m_currentLimit = 0;  //Free from violation
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (test_value < m_loLimit)
			
 
				+	{
			
 
				+		m_currentLimit = 1;  //low limit violation
			
 
				+		m_currentLimitError = test_value - m_loLimit;
			
 
				+		if (m_currentLimitError > B3_PI)
			
 
				+			m_currentLimitError -= B3_2_PI;
			
 
				+		else if (m_currentLimitError < -B3_PI)
			
 
				+			m_currentLimitError += B3_2_PI;
			
 
				+		return 1;
			
 
				+	}
			
 
				+	else if (test_value > m_hiLimit)
			
 
				+	{
			
 
				+		m_currentLimit = 2;  //High limit violation
			
 
				+		m_currentLimitError = test_value - m_hiLimit;
			
 
				+		if (m_currentLimitError > B3_PI)
			
 
				+			m_currentLimitError -= B3_2_PI;
			
 
				+		else if (m_currentLimitError < -B3_PI)
			
 
				+			m_currentLimitError += B3_2_PI;
			
 
				+		return 2;
			
 
				+	};
			
 
				+
			
 
				+	m_currentLimit = 0;  //Free from violation
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+//////////////////////////// End b3RotationalLimitMotor ////////////////////////////////////
			
 
				+
			
 
				+//////////////////////////// b3TranslationalLimitMotor ////////////////////////////////////
			
 
				+
			
 
				+int b3TranslationalLimitMotor::testLimitValue(int limitIndex, b3Scalar test_value)
			
 
				+{
			
 
				+	b3Scalar loLimit = m_lowerLimit[limitIndex];
			
 
				+	b3Scalar hiLimit = m_upperLimit[limitIndex];
			
 
				+	if (loLimit > hiLimit)
			
 
				+	{
			
 
				+		m_currentLimit[limitIndex] = 0;  //Free from violation
			
 
				+		m_currentLimitError[limitIndex] = b3Scalar(0.f);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	if (test_value < loLimit)
			
 
				+	{
			
 
				+		m_currentLimit[limitIndex] = 2;  //low limit violation
			
 
				+		m_currentLimitError[limitIndex] = test_value - loLimit;
			
 
				+		return 2;
			
 
				+	}
			
 
				+	else if (test_value > hiLimit)
			
 
				+	{
			
 
				+		m_currentLimit[limitIndex] = 1;  //High limit violation
			
 
				+		m_currentLimitError[limitIndex] = test_value - hiLimit;
			
 
				+		return 1;
			
 
				+	};
			
 
				+
			
 
				+	m_currentLimit[limitIndex] = 0;  //Free from violation
			
 
				+	m_currentLimitError[limitIndex] = b3Scalar(0.f);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+//////////////////////////// b3TranslationalLimitMotor ////////////////////////////////////
			
 
				+
			
 
				+void b3Generic6DofConstraint::calculateAngleInfo()
			
 
				+{
			
 
				+	b3Matrix3x3 relative_frame = m_calculatedTransformA.getBasis().inverse() * m_calculatedTransformB.getBasis();
			
 
				+	matrixToEulerXYZ(relative_frame, m_calculatedAxisAngleDiff);
			
 
				+	// in euler angle mode we do not actually constrain the angular velocity
			
 
				+	// along the axes axis[0] and axis[2] (although we do use axis[1]) :
			
 
				+	//
			
 
				+	//    to get			constrain w2-w1 along		...not
			
 
				+	//    ------			---------------------		------
			
 
				+	//    d(angle[0])/dt = 0	ax[1] x ax[2]			ax[0]
			
 
				+	//    d(angle[1])/dt = 0	ax[1]
			
 
				+	//    d(angle[2])/dt = 0	ax[0] x ax[1]			ax[2]
			
 
				+	//
			
 
				+	// constraining w2-w1 along an axis 'a' means that a'*(w2-w1)=0.
			
 
				+	// to prove the result for angle[0], write the expression for angle[0] from
			
 
				+	// GetInfo1 then take the derivative. to prove this for angle[2] it is
			
 
				+	// easier to take the euler rate expression for d(angle[2])/dt with respect
			
 
				+	// to the components of w and set that to 0.
			
 
				+	b3Vector3 axis0 = m_calculatedTransformB.getBasis().getColumn(0);
			
 
				+	b3Vector3 axis2 = m_calculatedTransformA.getBasis().getColumn(2);
			
 
				+
			
 
				+	m_calculatedAxis[1] = axis2.cross(axis0);
			
 
				+	m_calculatedAxis[0] = m_calculatedAxis[1].cross(axis2);
			
 
				+	m_calculatedAxis[2] = axis0.cross(m_calculatedAxis[1]);
			
 
				+
			
 
				+	m_calculatedAxis[0].normalize();
			
 
				+	m_calculatedAxis[1].normalize();
			
 
				+	m_calculatedAxis[2].normalize();
			
 
				+}
			
 
				+
			
 
				+static b3Transform getCenterOfMassTransform(const b3RigidBodyData& body)
			
 
				+{
			
 
				+	b3Transform tr(body.m_quat, body.m_pos);
			
 
				+	return tr;
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::calculateTransforms(const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	b3Transform transA;
			
 
				+	b3Transform transB;
			
 
				+	transA = getCenterOfMassTransform(bodies[m_rbA]);
			
 
				+	transB = getCenterOfMassTransform(bodies[m_rbB]);
			
 
				+	calculateTransforms(transA, transB, bodies);
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::calculateTransforms(const b3Transform& transA, const b3Transform& transB, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	m_calculatedTransformA = transA * m_frameInA;
			
 
				+	m_calculatedTransformB = transB * m_frameInB;
			
 
				+	calculateLinearInfo();
			
 
				+	calculateAngleInfo();
			
 
				+	if (m_useOffsetForConstraintFrame)
			
 
				+	{  //  get weight factors depending on masses
			
 
				+		b3Scalar miA = bodies[m_rbA].m_invMass;
			
 
				+		b3Scalar miB = bodies[m_rbB].m_invMass;
			
 
				+		m_hasStaticBody = (miA < B3_EPSILON) || (miB < B3_EPSILON);
			
 
				+		b3Scalar miS = miA + miB;
			
 
				+		if (miS > b3Scalar(0.f))
			
 
				+		{
			
 
				+			m_factA = miB / miS;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			m_factA = b3Scalar(0.5f);
			
 
				+		}
			
 
				+		m_factB = b3Scalar(1.0f) - m_factA;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+bool b3Generic6DofConstraint::testAngularLimitMotor(int axis_index)
			
 
				+{
			
 
				+	b3Scalar angle = m_calculatedAxisAngleDiff[axis_index];
			
 
				+	angle = b3AdjustAngleToLimits(angle, m_angularLimits[axis_index].m_loLimit, m_angularLimits[axis_index].m_hiLimit);
			
 
				+	m_angularLimits[axis_index].m_currentPosition = angle;
			
 
				+	//test limits
			
 
				+	m_angularLimits[axis_index].testLimitValue(angle);
			
 
				+	return m_angularLimits[axis_index].needApplyTorques();
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::getInfo1(b3ConstraintInfo1* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	//prepare constraint
			
 
				+	calculateTransforms(getCenterOfMassTransform(bodies[m_rbA]), getCenterOfMassTransform(bodies[m_rbB]), bodies);
			
 
				+	info->m_numConstraintRows = 0;
			
 
				+	info->nub = 6;
			
 
				+	int i;
			
 
				+	//test linear limits
			
 
				+	for (i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		if (m_linearLimits.needApplyForce(i))
			
 
				+		{
			
 
				+			info->m_numConstraintRows++;
			
 
				+			info->nub--;
			
 
				+		}
			
 
				+	}
			
 
				+	//test angular limits
			
 
				+	for (i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		if (testAngularLimitMotor(i))
			
 
				+		{
			
 
				+			info->m_numConstraintRows++;
			
 
				+			info->nub--;
			
 
				+		}
			
 
				+	}
			
 
				+	//	printf("info->m_numConstraintRows=%d\n",info->m_numConstraintRows);
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::getInfo1NonVirtual(b3ConstraintInfo1* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	//pre-allocate all 6
			
 
				+	info->m_numConstraintRows = 6;
			
 
				+	info->nub = 0;
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::getInfo2(b3ConstraintInfo2* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	b3Transform transA = getCenterOfMassTransform(bodies[m_rbA]);
			
 
				+	b3Transform transB = getCenterOfMassTransform(bodies[m_rbB]);
			
 
				+	const b3Vector3& linVelA = bodies[m_rbA].m_linVel;
			
 
				+	const b3Vector3& linVelB = bodies[m_rbB].m_linVel;
			
 
				+	const b3Vector3& angVelA = bodies[m_rbA].m_angVel;
			
 
				+	const b3Vector3& angVelB = bodies[m_rbB].m_angVel;
			
 
				+
			
 
				+	if (m_useOffsetForConstraintFrame)
			
 
				+	{  // for stability better to solve angular limits first
			
 
				+		int row = setAngularLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+		setLinearLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+	}
			
 
				+	else
			
 
				+	{  // leave old version for compatibility
			
 
				+		int row = setLinearLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+		setAngularLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::getInfo2NonVirtual(b3ConstraintInfo2* info, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	//prepare constraint
			
 
				+	calculateTransforms(transA, transB, bodies);
			
 
				+
			
 
				+	int i;
			
 
				+	for (i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		testAngularLimitMotor(i);
			
 
				+	}
			
 
				+
			
 
				+	if (m_useOffsetForConstraintFrame)
			
 
				+	{  // for stability better to solve angular limits first
			
 
				+		int row = setAngularLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+		setLinearLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+	}
			
 
				+	else
			
 
				+	{  // leave old version for compatibility
			
 
				+		int row = setLinearLimits(info, 0, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+		setAngularLimits(info, row, transA, transB, linVelA, linVelB, angVelA, angVelB);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int b3Generic6DofConstraint::setLinearLimits(b3ConstraintInfo2* info, int row, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB)
			
 
				+{
			
 
				+	//	int row = 0;
			
 
				+	//solve linear limits
			
 
				+	b3RotationalLimitMotor limot;
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		if (m_linearLimits.needApplyForce(i))
			
 
				+		{  // re-use rotational motor code
			
 
				+			limot.m_bounce = b3Scalar(0.f);
			
 
				+			limot.m_currentLimit = m_linearLimits.m_currentLimit[i];
			
 
				+			limot.m_currentPosition = m_linearLimits.m_currentLinearDiff[i];
			
 
				+			limot.m_currentLimitError = m_linearLimits.m_currentLimitError[i];
			
 
				+			limot.m_damping = m_linearLimits.m_damping;
			
 
				+			limot.m_enableMotor = m_linearLimits.m_enableMotor[i];
			
 
				+			limot.m_hiLimit = m_linearLimits.m_upperLimit[i];
			
 
				+			limot.m_limitSoftness = m_linearLimits.m_limitSoftness;
			
 
				+			limot.m_loLimit = m_linearLimits.m_lowerLimit[i];
			
 
				+			limot.m_maxLimitForce = b3Scalar(0.f);
			
 
				+			limot.m_maxMotorForce = m_linearLimits.m_maxMotorForce[i];
			
 
				+			limot.m_targetVelocity = m_linearLimits.m_targetVelocity[i];
			
 
				+			b3Vector3 axis = m_calculatedTransformA.getBasis().getColumn(i);
			
 
				+			int flags = m_flags >> (i * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+			limot.m_normalCFM = (flags & B3_6DOF_FLAGS_CFM_NORM) ? m_linearLimits.m_normalCFM[i] : info->cfm[0];
			
 
				+			limot.m_stopCFM = (flags & B3_6DOF_FLAGS_CFM_STOP) ? m_linearLimits.m_stopCFM[i] : info->cfm[0];
			
 
				+			limot.m_stopERP = (flags & B3_6DOF_FLAGS_ERP_STOP) ? m_linearLimits.m_stopERP[i] : info->erp;
			
 
				+			if (m_useOffsetForConstraintFrame)
			
 
				+			{
			
 
				+				int indx1 = (i + 1) % 3;
			
 
				+				int indx2 = (i + 2) % 3;
			
 
				+				int rotAllowed = 1;  // rotations around orthos to current axis
			
 
				+				if (m_angularLimits[indx1].m_currentLimit && m_angularLimits[indx2].m_currentLimit)
			
 
				+				{
			
 
				+					rotAllowed = 0;
			
 
				+				}
			
 
				+				row += get_limit_motor_info2(&limot, transA, transB, linVelA, linVelB, angVelA, angVelB, info, row, axis, 0, rotAllowed);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				row += get_limit_motor_info2(&limot, transA, transB, linVelA, linVelB, angVelA, angVelB, info, row, axis, 0);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return row;
			
 
				+}
			
 
				+
			
 
				+int b3Generic6DofConstraint::setAngularLimits(b3ConstraintInfo2* info, int row_offset, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB)
			
 
				+{
			
 
				+	b3Generic6DofConstraint* d6constraint = this;
			
 
				+	int row = row_offset;
			
 
				+	//solve angular limits
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		if (d6constraint->getRotationalLimitMotor(i)->needApplyTorques())
			
 
				+		{
			
 
				+			b3Vector3 axis = d6constraint->getAxis(i);
			
 
				+			int flags = m_flags >> ((i + 3) * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+			if (!(flags & B3_6DOF_FLAGS_CFM_NORM))
			
 
				+			{
			
 
				+				m_angularLimits[i].m_normalCFM = info->cfm[0];
			
 
				+			}
			
 
				+			if (!(flags & B3_6DOF_FLAGS_CFM_STOP))
			
 
				+			{
			
 
				+				m_angularLimits[i].m_stopCFM = info->cfm[0];
			
 
				+			}
			
 
				+			if (!(flags & B3_6DOF_FLAGS_ERP_STOP))
			
 
				+			{
			
 
				+				m_angularLimits[i].m_stopERP = info->erp;
			
 
				+			}
			
 
				+			row += get_limit_motor_info2(d6constraint->getRotationalLimitMotor(i),
			
 
				+										 transA, transB, linVelA, linVelB, angVelA, angVelB, info, row, axis, 1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return row;
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::updateRHS(b3Scalar timeStep)
			
 
				+{
			
 
				+	(void)timeStep;
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::setFrames(const b3Transform& frameA, const b3Transform& frameB, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	m_frameInA = frameA;
			
 
				+	m_frameInB = frameB;
			
 
				+
			
 
				+	calculateTransforms(bodies);
			
 
				+}
			
 
				+
			
 
				+b3Vector3 b3Generic6DofConstraint::getAxis(int axis_index) const
			
 
				+{
			
 
				+	return m_calculatedAxis[axis_index];
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3Generic6DofConstraint::getRelativePivotPosition(int axisIndex) const
			
 
				+{
			
 
				+	return m_calculatedLinearDiff[axisIndex];
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3Generic6DofConstraint::getAngle(int axisIndex) const
			
 
				+{
			
 
				+	return m_calculatedAxisAngleDiff[axisIndex];
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::calcAnchorPos(const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	b3Scalar imA = bodies[m_rbA].m_invMass;
			
 
				+	b3Scalar imB = bodies[m_rbB].m_invMass;
			
 
				+	b3Scalar weight;
			
 
				+	if (imB == b3Scalar(0.0))
			
 
				+	{
			
 
				+		weight = b3Scalar(1.0);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		weight = imA / (imA + imB);
			
 
				+	}
			
 
				+	const b3Vector3& pA = m_calculatedTransformA.getOrigin();
			
 
				+	const b3Vector3& pB = m_calculatedTransformB.getOrigin();
			
 
				+	m_AnchorPos = pA * weight + pB * (b3Scalar(1.0) - weight);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::calculateLinearInfo()
			
 
				+{
			
 
				+	m_calculatedLinearDiff = m_calculatedTransformB.getOrigin() - m_calculatedTransformA.getOrigin();
			
 
				+	m_calculatedLinearDiff = m_calculatedTransformA.getBasis().inverse() * m_calculatedLinearDiff;
			
 
				+	for (int i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		m_linearLimits.m_currentLinearDiff[i] = m_calculatedLinearDiff[i];
			
 
				+		m_linearLimits.testLimitValue(i, m_calculatedLinearDiff[i]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int b3Generic6DofConstraint::get_limit_motor_info2(
			
 
				+	b3RotationalLimitMotor* limot,
			
 
				+	const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB,
			
 
				+	b3ConstraintInfo2* info, int row, b3Vector3& ax1, int rotational, int rotAllowed)
			
 
				+{
			
 
				+	int srow = row * info->rowskip;
			
 
				+	bool powered = limot->m_enableMotor;
			
 
				+	int limit = limot->m_currentLimit;
			
 
				+	if (powered || limit)
			
 
				+	{  // if the joint is powered, or has joint limits, add in the extra row
			
 
				+		b3Scalar* J1 = rotational ? info->m_J1angularAxis : info->m_J1linearAxis;
			
 
				+		b3Scalar* J2 = rotational ? info->m_J2angularAxis : info->m_J2linearAxis;
			
 
				+		if (J1)
			
 
				+		{
			
 
				+			J1[srow + 0] = ax1[0];
			
 
				+			J1[srow + 1] = ax1[1];
			
 
				+			J1[srow + 2] = ax1[2];
			
 
				+		}
			
 
				+		if (J2)
			
 
				+		{
			
 
				+			J2[srow + 0] = -ax1[0];
			
 
				+			J2[srow + 1] = -ax1[1];
			
 
				+			J2[srow + 2] = -ax1[2];
			
 
				+		}
			
 
				+		if ((!rotational))
			
 
				+		{
			
 
				+			if (m_useOffsetForConstraintFrame)
			
 
				+			{
			
 
				+				b3Vector3 tmpA, tmpB, relA, relB;
			
 
				+				// get vector from bodyB to frameB in WCS
			
 
				+				relB = m_calculatedTransformB.getOrigin() - transB.getOrigin();
			
 
				+				// get its projection to constraint axis
			
 
				+				b3Vector3 projB = ax1 * relB.dot(ax1);
			
 
				+				// get vector directed from bodyB to constraint axis (and orthogonal to it)
			
 
				+				b3Vector3 orthoB = relB - projB;
			
 
				+				// same for bodyA
			
 
				+				relA = m_calculatedTransformA.getOrigin() - transA.getOrigin();
			
 
				+				b3Vector3 projA = ax1 * relA.dot(ax1);
			
 
				+				b3Vector3 orthoA = relA - projA;
			
 
				+				// get desired offset between frames A and B along constraint axis
			
 
				+				b3Scalar desiredOffs = limot->m_currentPosition - limot->m_currentLimitError;
			
 
				+				// desired vector from projection of center of bodyA to projection of center of bodyB to constraint axis
			
 
				+				b3Vector3 totalDist = projA + ax1 * desiredOffs - projB;
			
 
				+				// get offset vectors relA and relB
			
 
				+				relA = orthoA + totalDist * m_factA;
			
 
				+				relB = orthoB - totalDist * m_factB;
			
 
				+				tmpA = relA.cross(ax1);
			
 
				+				tmpB = relB.cross(ax1);
			
 
				+				if (m_hasStaticBody && (!rotAllowed))
			
 
				+				{
			
 
				+					tmpA *= m_factA;
			
 
				+					tmpB *= m_factB;
			
 
				+				}
			
 
				+				int i;
			
 
				+				for (i = 0; i < 3; i++) info->m_J1angularAxis[srow + i] = tmpA[i];
			
 
				+				for (i = 0; i < 3; i++) info->m_J2angularAxis[srow + i] = -tmpB[i];
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				b3Vector3 ltd;  // Linear Torque Decoupling vector
			
 
				+				b3Vector3 c = m_calculatedTransformB.getOrigin() - transA.getOrigin();
			
 
				+				ltd = c.cross(ax1);
			
 
				+				info->m_J1angularAxis[srow + 0] = ltd[0];
			
 
				+				info->m_J1angularAxis[srow + 1] = ltd[1];
			
 
				+				info->m_J1angularAxis[srow + 2] = ltd[2];
			
 
				+
			
 
				+				c = m_calculatedTransformB.getOrigin() - transB.getOrigin();
			
 
				+				ltd = -c.cross(ax1);
			
 
				+				info->m_J2angularAxis[srow + 0] = ltd[0];
			
 
				+				info->m_J2angularAxis[srow + 1] = ltd[1];
			
 
				+				info->m_J2angularAxis[srow + 2] = ltd[2];
			
 
				+			}
			
 
				+		}
			
 
				+		// if we're limited low and high simultaneously, the joint motor is
			
 
				+		// ineffective
			
 
				+		if (limit && (limot->m_loLimit == limot->m_hiLimit)) powered = false;
			
 
				+		info->m_constraintError[srow] = b3Scalar(0.f);
			
 
				+		if (powered)
			
 
				+		{
			
 
				+			info->cfm[srow] = limot->m_normalCFM;
			
 
				+			if (!limit)
			
 
				+			{
			
 
				+				b3Scalar tag_vel = rotational ? limot->m_targetVelocity : -limot->m_targetVelocity;
			
 
				+
			
 
				+				b3Scalar mot_fact = getMotorFactor(limot->m_currentPosition,
			
 
				+												   limot->m_loLimit,
			
 
				+												   limot->m_hiLimit,
			
 
				+												   tag_vel,
			
 
				+												   info->fps * limot->m_stopERP);
			
 
				+				info->m_constraintError[srow] += mot_fact * limot->m_targetVelocity;
			
 
				+				info->m_lowerLimit[srow] = -limot->m_maxMotorForce / info->fps;
			
 
				+				info->m_upperLimit[srow] = limot->m_maxMotorForce / info->fps;
			
 
				+			}
			
 
				+		}
			
 
				+		if (limit)
			
 
				+		{
			
 
				+			b3Scalar k = info->fps * limot->m_stopERP;
			
 
				+			if (!rotational)
			
 
				+			{
			
 
				+				info->m_constraintError[srow] += k * limot->m_currentLimitError;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				info->m_constraintError[srow] += -k * limot->m_currentLimitError;
			
 
				+			}
			
 
				+			info->cfm[srow] = limot->m_stopCFM;
			
 
				+			if (limot->m_loLimit == limot->m_hiLimit)
			
 
				+			{  // limited low and high simultaneously
			
 
				+				info->m_lowerLimit[srow] = -B3_INFINITY;
			
 
				+				info->m_upperLimit[srow] = B3_INFINITY;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (limit == 1)
			
 
				+				{
			
 
				+					info->m_lowerLimit[srow] = 0;
			
 
				+					info->m_upperLimit[srow] = B3_INFINITY;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					info->m_lowerLimit[srow] = -B3_INFINITY;
			
 
				+					info->m_upperLimit[srow] = 0;
			
 
				+				}
			
 
				+				// deal with bounce
			
 
				+				if (limot->m_bounce > 0)
			
 
				+				{
			
 
				+					// calculate joint velocity
			
 
				+					b3Scalar vel;
			
 
				+					if (rotational)
			
 
				+					{
			
 
				+						vel = angVelA.dot(ax1);
			
 
				+						//make sure that if no body -> angVelB == zero vec
			
 
				+						//                        if (body1)
			
 
				+						vel -= angVelB.dot(ax1);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						vel = linVelA.dot(ax1);
			
 
				+						//make sure that if no body -> angVelB == zero vec
			
 
				+						//                        if (body1)
			
 
				+						vel -= linVelB.dot(ax1);
			
 
				+					}
			
 
				+					// only apply bounce if the velocity is incoming, and if the
			
 
				+					// resulting c[] exceeds what we already have.
			
 
				+					if (limit == 1)
			
 
				+					{
			
 
				+						if (vel < 0)
			
 
				+						{
			
 
				+							b3Scalar newc = -limot->m_bounce * vel;
			
 
				+							if (newc > info->m_constraintError[srow])
			
 
				+								info->m_constraintError[srow] = newc;
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if (vel > 0)
			
 
				+						{
			
 
				+							b3Scalar newc = -limot->m_bounce * vel;
			
 
				+							if (newc < info->m_constraintError[srow])
			
 
				+								info->m_constraintError[srow] = newc;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		return 1;
			
 
				+	}
			
 
				+	else
			
 
				+		return 0;
			
 
				+}
			
 
				+
			
 
				+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
			
 
				+///If no axis is provided, it uses the default axis for this constraint.
			
 
				+void b3Generic6DofConstraint::setParam(int num, b3Scalar value, int axis)
			
 
				+{
			
 
				+	if ((axis >= 0) && (axis < 3))
			
 
				+	{
			
 
				+		switch (num)
			
 
				+		{
			
 
				+			case B3_CONSTRAINT_STOP_ERP:
			
 
				+				m_linearLimits.m_stopERP[axis] = value;
			
 
				+				m_flags |= B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_STOP_CFM:
			
 
				+				m_linearLimits.m_stopCFM[axis] = value;
			
 
				+				m_flags |= B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_CFM:
			
 
				+				m_linearLimits.m_normalCFM[axis] = value;
			
 
				+				m_flags |= B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+				break;
			
 
				+			default:
			
 
				+				b3AssertConstrParams(0);
			
 
				+		}
			
 
				+	}
			
 
				+	else if ((axis >= 3) && (axis < 6))
			
 
				+	{
			
 
				+		switch (num)
			
 
				+		{
			
 
				+			case B3_CONSTRAINT_STOP_ERP:
			
 
				+				m_angularLimits[axis - 3].m_stopERP = value;
			
 
				+				m_flags |= B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_STOP_CFM:
			
 
				+				m_angularLimits[axis - 3].m_stopCFM = value;
			
 
				+				m_flags |= B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_CFM:
			
 
				+				m_angularLimits[axis - 3].m_normalCFM = value;
			
 
				+				m_flags |= B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
			
 
				+				break;
			
 
				+			default:
			
 
				+				b3AssertConstrParams(0);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3AssertConstrParams(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+///return the local value of parameter
			
 
				+b3Scalar b3Generic6DofConstraint::getParam(int num, int axis) const
			
 
				+{
			
 
				+	b3Scalar retVal = 0;
			
 
				+	if ((axis >= 0) && (axis < 3))
			
 
				+	{
			
 
				+		switch (num)
			
 
				+		{
			
 
				+			case B3_CONSTRAINT_STOP_ERP:
			
 
				+				b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
			
 
				+				retVal = m_linearLimits.m_stopERP[axis];
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_STOP_CFM:
			
 
				+				b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
			
 
				+				retVal = m_linearLimits.m_stopCFM[axis];
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_CFM:
			
 
				+				b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
			
 
				+				retVal = m_linearLimits.m_normalCFM[axis];
			
 
				+				break;
			
 
				+			default:
			
 
				+				b3AssertConstrParams(0);
			
 
				+		}
			
 
				+	}
			
 
				+	else if ((axis >= 3) && (axis < 6))
			
 
				+	{
			
 
				+		switch (num)
			
 
				+		{
			
 
				+			case B3_CONSTRAINT_STOP_ERP:
			
 
				+				b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
			
 
				+				retVal = m_angularLimits[axis - 3].m_stopERP;
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_STOP_CFM:
			
 
				+				b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
			
 
				+				retVal = m_angularLimits[axis - 3].m_stopCFM;
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_CFM:
			
 
				+				b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
			
 
				+				retVal = m_angularLimits[axis - 3].m_normalCFM;
			
 
				+				break;
			
 
				+			default:
			
 
				+				b3AssertConstrParams(0);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3AssertConstrParams(0);
			
 
				+	}
			
 
				+	return retVal;
			
 
				+}
			
 
				+
			
 
				+void b3Generic6DofConstraint::setAxis(const b3Vector3& axis1, const b3Vector3& axis2, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	b3Vector3 zAxis = axis1.normalized();
			
 
				+	b3Vector3 yAxis = axis2.normalized();
			
 
				+	b3Vector3 xAxis = yAxis.cross(zAxis);  // we want right coordinate system
			
 
				+
			
 
				+	b3Transform frameInW;
			
 
				+	frameInW.setIdentity();
			
 
				+	frameInW.getBasis().setValue(xAxis[0], yAxis[0], zAxis[0],
			
 
				+								 xAxis[1], yAxis[1], zAxis[1],
			
 
				+								 xAxis[2], yAxis[2], zAxis[2]);
			
 
				+
			
 
				+	// now get constraint frame in local coordinate systems
			
 
				+	m_frameInA = getCenterOfMassTransform(bodies[m_rbA]).inverse() * frameInW;
			
 
				+	m_frameInB = getCenterOfMassTransform(bodies[m_rbB]).inverse() * frameInW;
			
 
				+
			
 
				+	calculateTransforms(bodies);
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h
@@ -0,0 +1,517 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+/// 2009 March: b3Generic6DofConstraint refactored by Roman Ponomarev
			
 
				+/// Added support for generic constraint solver through getInfo1/getInfo2 methods
			
 
				+
			
 
				+/*
			
 
				+2007-09-09
			
 
				+b3Generic6DofConstraint Refactored by Francisco Le?n
			
 
				+email: [email protected]
			
 
				+http://gimpact.sf.net
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_GENERIC_6DOF_CONSTRAINT_H
			
 
				+#define B3_GENERIC_6DOF_CONSTRAINT_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "b3JacobianEntry.h"
			
 
				+#include "b3TypedConstraint.h"
			
 
				+
			
 
				+struct b3RigidBodyData;
			
 
				+
			
 
				+//! Rotation Limit structure for generic joints
			
 
				+class b3RotationalLimitMotor
			
 
				+{
			
 
				+public:
			
 
				+	//! limit_parameters
			
 
				+	//!@{
			
 
				+	b3Scalar m_loLimit;         //!< joint limit
			
 
				+	b3Scalar m_hiLimit;         //!< joint limit
			
 
				+	b3Scalar m_targetVelocity;  //!< target motor velocity
			
 
				+	b3Scalar m_maxMotorForce;   //!< max force on motor
			
 
				+	b3Scalar m_maxLimitForce;   //!< max force on limit
			
 
				+	b3Scalar m_damping;         //!< Damping.
			
 
				+	b3Scalar m_limitSoftness;   //! Relaxation factor
			
 
				+	b3Scalar m_normalCFM;       //!< Constraint force mixing factor
			
 
				+	b3Scalar m_stopERP;         //!< Error tolerance factor when joint is at limit
			
 
				+	b3Scalar m_stopCFM;         //!< Constraint force mixing factor when joint is at limit
			
 
				+	b3Scalar m_bounce;          //!< restitution factor
			
 
				+	bool m_enableMotor;
			
 
				+
			
 
				+	//!@}
			
 
				+
			
 
				+	//! temp_variables
			
 
				+	//!@{
			
 
				+	b3Scalar m_currentLimitError;  //!  How much is violated this limit
			
 
				+	b3Scalar m_currentPosition;    //!  current value of angle
			
 
				+	int m_currentLimit;            //!< 0=free, 1=at lo limit, 2=at hi limit
			
 
				+	b3Scalar m_accumulatedImpulse;
			
 
				+	//!@}
			
 
				+
			
 
				+	b3RotationalLimitMotor()
			
 
				+	{
			
 
				+		m_accumulatedImpulse = 0.f;
			
 
				+		m_targetVelocity = 0;
			
 
				+		m_maxMotorForce = 6.0f;
			
 
				+		m_maxLimitForce = 300.0f;
			
 
				+		m_loLimit = 1.0f;
			
 
				+		m_hiLimit = -1.0f;
			
 
				+		m_normalCFM = 0.f;
			
 
				+		m_stopERP = 0.2f;
			
 
				+		m_stopCFM = 0.f;
			
 
				+		m_bounce = 0.0f;
			
 
				+		m_damping = 1.0f;
			
 
				+		m_limitSoftness = 0.5f;
			
 
				+		m_currentLimit = 0;
			
 
				+		m_currentLimitError = 0;
			
 
				+		m_enableMotor = false;
			
 
				+	}
			
 
				+
			
 
				+	b3RotationalLimitMotor(const b3RotationalLimitMotor& limot)
			
 
				+	{
			
 
				+		m_targetVelocity = limot.m_targetVelocity;
			
 
				+		m_maxMotorForce = limot.m_maxMotorForce;
			
 
				+		m_limitSoftness = limot.m_limitSoftness;
			
 
				+		m_loLimit = limot.m_loLimit;
			
 
				+		m_hiLimit = limot.m_hiLimit;
			
 
				+		m_normalCFM = limot.m_normalCFM;
			
 
				+		m_stopERP = limot.m_stopERP;
			
 
				+		m_stopCFM = limot.m_stopCFM;
			
 
				+		m_bounce = limot.m_bounce;
			
 
				+		m_currentLimit = limot.m_currentLimit;
			
 
				+		m_currentLimitError = limot.m_currentLimitError;
			
 
				+		m_enableMotor = limot.m_enableMotor;
			
 
				+	}
			
 
				+
			
 
				+	//! Is limited
			
 
				+	bool isLimited()
			
 
				+	{
			
 
				+		if (m_loLimit > m_hiLimit) return false;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//! Need apply correction
			
 
				+	bool needApplyTorques()
			
 
				+	{
			
 
				+		if (m_currentLimit == 0 && m_enableMotor == false) return false;
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+	//! calculates  error
			
 
				+	/*!
			
 
				+	calculates m_currentLimit and m_currentLimitError.
			
 
				+	*/
			
 
				+	int testLimitValue(b3Scalar test_value);
			
 
				+
			
 
				+	//! apply the correction impulses for two bodies
			
 
				+	b3Scalar solveAngularLimits(b3Scalar timeStep, b3Vector3& axis, b3Scalar jacDiagABInv, b3RigidBodyData* body0, b3RigidBodyData* body1);
			
 
				+};
			
 
				+
			
 
				+class b3TranslationalLimitMotor
			
 
				+{
			
 
				+public:
			
 
				+	b3Vector3 m_lowerLimit;  //!< the constraint lower limits
			
 
				+	b3Vector3 m_upperLimit;  //!< the constraint upper limits
			
 
				+	b3Vector3 m_accumulatedImpulse;
			
 
				+	//! Linear_Limit_parameters
			
 
				+	//!@{
			
 
				+	b3Vector3 m_normalCFM;          //!< Constraint force mixing factor
			
 
				+	b3Vector3 m_stopERP;            //!< Error tolerance factor when joint is at limit
			
 
				+	b3Vector3 m_stopCFM;            //!< Constraint force mixing factor when joint is at limit
			
 
				+	b3Vector3 m_targetVelocity;     //!< target motor velocity
			
 
				+	b3Vector3 m_maxMotorForce;      //!< max force on motor
			
 
				+	b3Vector3 m_currentLimitError;  //!  How much is violated this limit
			
 
				+	b3Vector3 m_currentLinearDiff;  //!  Current relative offset of constraint frames
			
 
				+	b3Scalar m_limitSoftness;       //!< Softness for linear limit
			
 
				+	b3Scalar m_damping;             //!< Damping for linear limit
			
 
				+	b3Scalar m_restitution;         //! Bounce parameter for linear limit
			
 
				+	//!@}
			
 
				+	bool m_enableMotor[3];
			
 
				+	int m_currentLimit[3];  //!< 0=free, 1=at lower limit, 2=at upper limit
			
 
				+
			
 
				+	b3TranslationalLimitMotor()
			
 
				+	{
			
 
				+		m_lowerLimit.setValue(0.f, 0.f, 0.f);
			
 
				+		m_upperLimit.setValue(0.f, 0.f, 0.f);
			
 
				+		m_accumulatedImpulse.setValue(0.f, 0.f, 0.f);
			
 
				+		m_normalCFM.setValue(0.f, 0.f, 0.f);
			
 
				+		m_stopERP.setValue(0.2f, 0.2f, 0.2f);
			
 
				+		m_stopCFM.setValue(0.f, 0.f, 0.f);
			
 
				+
			
 
				+		m_limitSoftness = 0.7f;
			
 
				+		m_damping = b3Scalar(1.0f);
			
 
				+		m_restitution = b3Scalar(0.5f);
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+		{
			
 
				+			m_enableMotor[i] = false;
			
 
				+			m_targetVelocity[i] = b3Scalar(0.f);
			
 
				+			m_maxMotorForce[i] = b3Scalar(0.f);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	b3TranslationalLimitMotor(const b3TranslationalLimitMotor& other)
			
 
				+	{
			
 
				+		m_lowerLimit = other.m_lowerLimit;
			
 
				+		m_upperLimit = other.m_upperLimit;
			
 
				+		m_accumulatedImpulse = other.m_accumulatedImpulse;
			
 
				+
			
 
				+		m_limitSoftness = other.m_limitSoftness;
			
 
				+		m_damping = other.m_damping;
			
 
				+		m_restitution = other.m_restitution;
			
 
				+		m_normalCFM = other.m_normalCFM;
			
 
				+		m_stopERP = other.m_stopERP;
			
 
				+		m_stopCFM = other.m_stopCFM;
			
 
				+
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+		{
			
 
				+			m_enableMotor[i] = other.m_enableMotor[i];
			
 
				+			m_targetVelocity[i] = other.m_targetVelocity[i];
			
 
				+			m_maxMotorForce[i] = other.m_maxMotorForce[i];
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//! Test limit
			
 
				+	/*!
			
 
				+    - free means upper < lower,
			
 
				+    - locked means upper == lower
			
 
				+    - limited means upper > lower
			
 
				+    - limitIndex: first 3 are linear, next 3 are angular
			
 
				+    */
			
 
				+	inline bool isLimited(int limitIndex)
			
 
				+	{
			
 
				+		return (m_upperLimit[limitIndex] >= m_lowerLimit[limitIndex]);
			
 
				+	}
			
 
				+	inline bool needApplyForce(int limitIndex)
			
 
				+	{
			
 
				+		if (m_currentLimit[limitIndex] == 0 && m_enableMotor[limitIndex] == false) return false;
			
 
				+		return true;
			
 
				+	}
			
 
				+	int testLimitValue(int limitIndex, b3Scalar test_value);
			
 
				+
			
 
				+	b3Scalar solveLinearAxis(
			
 
				+		b3Scalar timeStep,
			
 
				+		b3Scalar jacDiagABInv,
			
 
				+		b3RigidBodyData& body1, const b3Vector3& pointInA,
			
 
				+		b3RigidBodyData& body2, const b3Vector3& pointInB,
			
 
				+		int limit_index,
			
 
				+		const b3Vector3& axis_normal_on_a,
			
 
				+		const b3Vector3& anchorPos);
			
 
				+};
			
 
				+
			
 
				+enum b36DofFlags
			
 
				+{
			
 
				+	B3_6DOF_FLAGS_CFM_NORM = 1,
			
 
				+	B3_6DOF_FLAGS_CFM_STOP = 2,
			
 
				+	B3_6DOF_FLAGS_ERP_STOP = 4
			
 
				+};
			
 
				+#define B3_6DOF_FLAGS_AXIS_SHIFT 3  // bits per axis
			
 
				+
			
 
				+/// b3Generic6DofConstraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
			
 
				+/*!
			
 
				+b3Generic6DofConstraint can leave any of the 6 degree of freedom 'free' or 'locked'.
			
 
				+currently this limit supports rotational motors<br>
			
 
				+<ul>
			
 
				+<li> For Linear limits, use b3Generic6DofConstraint.setLinearUpperLimit, b3Generic6DofConstraint.setLinearLowerLimit. You can set the parameters with the b3TranslationalLimitMotor structure accsesible through the b3Generic6DofConstraint.getTranslationalLimitMotor method.
			
 
				+At this moment translational motors are not supported. May be in the future. </li>
			
 
				+
			
 
				+<li> For Angular limits, use the b3RotationalLimitMotor structure for configuring the limit.
			
 
				+This is accessible through b3Generic6DofConstraint.getLimitMotor method,
			
 
				+This brings support for limit parameters and motors. </li>
			
 
				+
			
 
				+<li> Angulars limits have these possible ranges:
			
 
				+<table border=1 >
			
 
				+<tr>
			
 
				+	<td><b>AXIS</b></td>
			
 
				+	<td><b>MIN ANGLE</b></td>
			
 
				+	<td><b>MAX ANGLE</b></td>
			
 
				+</tr><tr>
			
 
				+	<td>X</td>
			
 
				+	<td>-PI</td>
			
 
				+	<td>PI</td>
			
 
				+</tr><tr>
			
 
				+	<td>Y</td>
			
 
				+	<td>-PI/2</td>
			
 
				+	<td>PI/2</td>
			
 
				+</tr><tr>
			
 
				+	<td>Z</td>
			
 
				+	<td>-PI</td>
			
 
				+	<td>PI</td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+</li>
			
 
				+</ul>
			
 
				+
			
 
				+*/
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3Generic6DofConstraint : public b3TypedConstraint
			
 
				+{
			
 
				+protected:
			
 
				+	//! relative_frames
			
 
				+	//!@{
			
 
				+	b3Transform m_frameInA;  //!< the constraint space w.r.t body A
			
 
				+	b3Transform m_frameInB;  //!< the constraint space w.r.t body B
			
 
				+	//!@}
			
 
				+
			
 
				+	//! Jacobians
			
 
				+	//!@{
			
 
				+	//    b3JacobianEntry	m_jacLinear[3];//!< 3 orthogonal linear constraints
			
 
				+	//    b3JacobianEntry	m_jacAng[3];//!< 3 orthogonal angular constraints
			
 
				+	//!@}
			
 
				+
			
 
				+	//! Linear_Limit_parameters
			
 
				+	//!@{
			
 
				+	b3TranslationalLimitMotor m_linearLimits;
			
 
				+	//!@}
			
 
				+
			
 
				+	//! hinge_parameters
			
 
				+	//!@{
			
 
				+	b3RotationalLimitMotor m_angularLimits[3];
			
 
				+	//!@}
			
 
				+
			
 
				+protected:
			
 
				+	//! temporal variables
			
 
				+	//!@{
			
 
				+	b3Transform m_calculatedTransformA;
			
 
				+	b3Transform m_calculatedTransformB;
			
 
				+	b3Vector3 m_calculatedAxisAngleDiff;
			
 
				+	b3Vector3 m_calculatedAxis[3];
			
 
				+	b3Vector3 m_calculatedLinearDiff;
			
 
				+	b3Scalar m_timeStep;
			
 
				+	b3Scalar m_factA;
			
 
				+	b3Scalar m_factB;
			
 
				+	bool m_hasStaticBody;
			
 
				+
			
 
				+	b3Vector3 m_AnchorPos;  // point betwen pivots of bodies A and B to solve linear axes
			
 
				+
			
 
				+	bool m_useLinearReferenceFrameA;
			
 
				+	bool m_useOffsetForConstraintFrame;
			
 
				+
			
 
				+	int m_flags;
			
 
				+
			
 
				+	//!@}
			
 
				+
			
 
				+	b3Generic6DofConstraint& operator=(b3Generic6DofConstraint& other)
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+		(void)other;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	int setAngularLimits(b3ConstraintInfo2 * info, int row_offset, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB);
			
 
				+
			
 
				+	int setLinearLimits(b3ConstraintInfo2 * info, int row, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB);
			
 
				+
			
 
				+	// tests linear limits
			
 
				+	void calculateLinearInfo();
			
 
				+
			
 
				+	//! calcs the euler angles between the two bodies.
			
 
				+	void calculateAngleInfo();
			
 
				+
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3Generic6DofConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB, bool useLinearReferenceFrameA, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	//! Calcs global transform of the offsets
			
 
				+	/*!
			
 
				+	Calcs the global transform for the joint offset for body A an B, and also calcs the agle differences between the bodies.
			
 
				+	\sa b3Generic6DofConstraint.getCalculatedTransformA , b3Generic6DofConstraint.getCalculatedTransformB, b3Generic6DofConstraint.calculateAngleInfo
			
 
				+	*/
			
 
				+	void calculateTransforms(const b3Transform& transA, const b3Transform& transB, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void calculateTransforms(const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	//! Gets the global transform of the offset for body A
			
 
				+	/*!
			
 
				+    \sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo.
			
 
				+    */
			
 
				+	const b3Transform& getCalculatedTransformA() const
			
 
				+	{
			
 
				+		return m_calculatedTransformA;
			
 
				+	}
			
 
				+
			
 
				+	//! Gets the global transform of the offset for body B
			
 
				+	/*!
			
 
				+    \sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo.
			
 
				+    */
			
 
				+	const b3Transform& getCalculatedTransformB() const
			
 
				+	{
			
 
				+		return m_calculatedTransformB;
			
 
				+	}
			
 
				+
			
 
				+	const b3Transform& getFrameOffsetA() const
			
 
				+	{
			
 
				+		return m_frameInA;
			
 
				+	}
			
 
				+
			
 
				+	const b3Transform& getFrameOffsetB() const
			
 
				+	{
			
 
				+		return m_frameInB;
			
 
				+	}
			
 
				+
			
 
				+	b3Transform& getFrameOffsetA()
			
 
				+	{
			
 
				+		return m_frameInA;
			
 
				+	}
			
 
				+
			
 
				+	b3Transform& getFrameOffsetB()
			
 
				+	{
			
 
				+		return m_frameInB;
			
 
				+	}
			
 
				+
			
 
				+	virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo1NonVirtual(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo2NonVirtual(b3ConstraintInfo2 * info, const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void updateRHS(b3Scalar timeStep);
			
 
				+
			
 
				+	//! Get the rotation axis in global coordinates
			
 
				+	b3Vector3 getAxis(int axis_index) const;
			
 
				+
			
 
				+	//! Get the relative Euler angle
			
 
				+	/*!
			
 
				+	\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
			
 
				+	*/
			
 
				+	b3Scalar getAngle(int axis_index) const;
			
 
				+
			
 
				+	//! Get the relative position of the constraint pivot
			
 
				+	/*!
			
 
				+	\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
			
 
				+	*/
			
 
				+	b3Scalar getRelativePivotPosition(int axis_index) const;
			
 
				+
			
 
				+	void setFrames(const b3Transform& frameA, const b3Transform& frameB, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	//! Test angular limit.
			
 
				+	/*!
			
 
				+	Calculates angular correction and returns true if limit needs to be corrected.
			
 
				+	\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
			
 
				+	*/
			
 
				+	bool testAngularLimitMotor(int axis_index);
			
 
				+
			
 
				+	void setLinearLowerLimit(const b3Vector3& linearLower)
			
 
				+	{
			
 
				+		m_linearLimits.m_lowerLimit = linearLower;
			
 
				+	}
			
 
				+
			
 
				+	void getLinearLowerLimit(b3Vector3 & linearLower)
			
 
				+	{
			
 
				+		linearLower = m_linearLimits.m_lowerLimit;
			
 
				+	}
			
 
				+
			
 
				+	void setLinearUpperLimit(const b3Vector3& linearUpper)
			
 
				+	{
			
 
				+		m_linearLimits.m_upperLimit = linearUpper;
			
 
				+	}
			
 
				+
			
 
				+	void getLinearUpperLimit(b3Vector3 & linearUpper)
			
 
				+	{
			
 
				+		linearUpper = m_linearLimits.m_upperLimit;
			
 
				+	}
			
 
				+
			
 
				+	void setAngularLowerLimit(const b3Vector3& angularLower)
			
 
				+	{
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+			m_angularLimits[i].m_loLimit = b3NormalizeAngle(angularLower[i]);
			
 
				+	}
			
 
				+
			
 
				+	void getAngularLowerLimit(b3Vector3 & angularLower)
			
 
				+	{
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+			angularLower[i] = m_angularLimits[i].m_loLimit;
			
 
				+	}
			
 
				+
			
 
				+	void setAngularUpperLimit(const b3Vector3& angularUpper)
			
 
				+	{
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+			m_angularLimits[i].m_hiLimit = b3NormalizeAngle(angularUpper[i]);
			
 
				+	}
			
 
				+
			
 
				+	void getAngularUpperLimit(b3Vector3 & angularUpper)
			
 
				+	{
			
 
				+		for (int i = 0; i < 3; i++)
			
 
				+			angularUpper[i] = m_angularLimits[i].m_hiLimit;
			
 
				+	}
			
 
				+
			
 
				+	//! Retrieves the angular limit informacion
			
 
				+	b3RotationalLimitMotor* getRotationalLimitMotor(int index)
			
 
				+	{
			
 
				+		return &m_angularLimits[index];
			
 
				+	}
			
 
				+
			
 
				+	//! Retrieves the  limit informacion
			
 
				+	b3TranslationalLimitMotor* getTranslationalLimitMotor()
			
 
				+	{
			
 
				+		return &m_linearLimits;
			
 
				+	}
			
 
				+
			
 
				+	//first 3 are linear, next 3 are angular
			
 
				+	void setLimit(int axis, b3Scalar lo, b3Scalar hi)
			
 
				+	{
			
 
				+		if (axis < 3)
			
 
				+		{
			
 
				+			m_linearLimits.m_lowerLimit[axis] = lo;
			
 
				+			m_linearLimits.m_upperLimit[axis] = hi;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			lo = b3NormalizeAngle(lo);
			
 
				+			hi = b3NormalizeAngle(hi);
			
 
				+			m_angularLimits[axis - 3].m_loLimit = lo;
			
 
				+			m_angularLimits[axis - 3].m_hiLimit = hi;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//! Test limit
			
 
				+	/*!
			
 
				+    - free means upper < lower,
			
 
				+    - locked means upper == lower
			
 
				+    - limited means upper > lower
			
 
				+    - limitIndex: first 3 are linear, next 3 are angular
			
 
				+    */
			
 
				+	bool isLimited(int limitIndex)
			
 
				+	{
			
 
				+		if (limitIndex < 3)
			
 
				+		{
			
 
				+			return m_linearLimits.isLimited(limitIndex);
			
 
				+		}
			
 
				+		return m_angularLimits[limitIndex - 3].isLimited();
			
 
				+	}
			
 
				+
			
 
				+	virtual void calcAnchorPos(const b3RigidBodyData* bodies);  // overridable
			
 
				+
			
 
				+	int get_limit_motor_info2(b3RotationalLimitMotor * limot,
			
 
				+							  const b3Transform& transA, const b3Transform& transB, const b3Vector3& linVelA, const b3Vector3& linVelB, const b3Vector3& angVelA, const b3Vector3& angVelB,
			
 
				+							  b3ConstraintInfo2* info, int row, b3Vector3& ax1, int rotational, int rotAllowed = false);
			
 
				+
			
 
				+	// access for UseFrameOffset
			
 
				+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
			
 
				+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
			
 
				+
			
 
				+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
			
 
				+	///If no axis is provided, it uses the default axis for this constraint.
			
 
				+	virtual void setParam(int num, b3Scalar value, int axis = -1);
			
 
				+	///return the local value of parameter
			
 
				+	virtual b3Scalar getParam(int num, int axis = -1) const;
			
 
				+
			
 
				+	void setAxis(const b3Vector3& axis1, const b3Vector3& axis2, const b3RigidBodyData* bodies);
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_GENERIC_6DOF_CONSTRAINT_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h
@@ -0,0 +1,150 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_JACOBIAN_ENTRY_H
			
 
				+#define B3_JACOBIAN_ENTRY_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+
			
 
				+//notes:
			
 
				+// Another memory optimization would be to store m_1MinvJt in the remaining 3 w components
			
 
				+// which makes the b3JacobianEntry memory layout 16 bytes
			
 
				+// if you only are interested in angular part, just feed massInvA and massInvB zero
			
 
				+
			
 
				+/// Jacobian entry is an abstraction that allows to describe constraints
			
 
				+/// it can be used in combination with a constraint solver
			
 
				+/// Can be used to relate the effect of an impulse to the constraint error
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3JacobianEntry
			
 
				+{
			
 
				+public:
			
 
				+	b3JacobianEntry(){};
			
 
				+	//constraint between two different rigidbodies
			
 
				+	b3JacobianEntry(
			
 
				+		const b3Matrix3x3& world2A,
			
 
				+		const b3Matrix3x3& world2B,
			
 
				+		const b3Vector3& rel_pos1, const b3Vector3& rel_pos2,
			
 
				+		const b3Vector3& jointAxis,
			
 
				+		const b3Vector3& inertiaInvA,
			
 
				+		const b3Scalar massInvA,
			
 
				+		const b3Vector3& inertiaInvB,
			
 
				+		const b3Scalar massInvB)
			
 
				+		: m_linearJointAxis(jointAxis)
			
 
				+	{
			
 
				+		m_aJ = world2A * (rel_pos1.cross(m_linearJointAxis));
			
 
				+		m_bJ = world2B * (rel_pos2.cross(-m_linearJointAxis));
			
 
				+		m_0MinvJt = inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = inertiaInvB * m_bJ;
			
 
				+		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	//angular constraint between two different rigidbodies
			
 
				+	b3JacobianEntry(const b3Vector3& jointAxis,
			
 
				+					const b3Matrix3x3& world2A,
			
 
				+					const b3Matrix3x3& world2B,
			
 
				+					const b3Vector3& inertiaInvA,
			
 
				+					const b3Vector3& inertiaInvB)
			
 
				+		: m_linearJointAxis(b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)))
			
 
				+	{
			
 
				+		m_aJ = world2A * jointAxis;
			
 
				+		m_bJ = world2B * -jointAxis;
			
 
				+		m_0MinvJt = inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = inertiaInvB * m_bJ;
			
 
				+		m_Adiag = m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	//angular constraint between two different rigidbodies
			
 
				+	b3JacobianEntry(const b3Vector3& axisInA,
			
 
				+					const b3Vector3& axisInB,
			
 
				+					const b3Vector3& inertiaInvA,
			
 
				+					const b3Vector3& inertiaInvB)
			
 
				+		: m_linearJointAxis(b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.))), m_aJ(axisInA), m_bJ(-axisInB)
			
 
				+	{
			
 
				+		m_0MinvJt = inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = inertiaInvB * m_bJ;
			
 
				+		m_Adiag = m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	//constraint on one rigidbody
			
 
				+	b3JacobianEntry(
			
 
				+		const b3Matrix3x3& world2A,
			
 
				+		const b3Vector3& rel_pos1, const b3Vector3& rel_pos2,
			
 
				+		const b3Vector3& jointAxis,
			
 
				+		const b3Vector3& inertiaInvA,
			
 
				+		const b3Scalar massInvA)
			
 
				+		: m_linearJointAxis(jointAxis)
			
 
				+	{
			
 
				+		m_aJ = world2A * (rel_pos1.cross(jointAxis));
			
 
				+		m_bJ = world2A * (rel_pos2.cross(-jointAxis));
			
 
				+		m_0MinvJt = inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));
			
 
				+		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar getDiagonal() const { return m_Adiag; }
			
 
				+
			
 
				+	// for two constraints on the same rigidbody (for example vehicle friction)
			
 
				+	b3Scalar getNonDiagonal(const b3JacobianEntry& jacB, const b3Scalar massInvA) const
			
 
				+	{
			
 
				+		const b3JacobianEntry& jacA = *this;
			
 
				+		b3Scalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis);
			
 
				+		b3Scalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ);
			
 
				+		return lin + ang;
			
 
				+	}
			
 
				+
			
 
				+	// for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies)
			
 
				+	b3Scalar getNonDiagonal(const b3JacobianEntry& jacB, const b3Scalar massInvA, const b3Scalar massInvB) const
			
 
				+	{
			
 
				+		const b3JacobianEntry& jacA = *this;
			
 
				+		b3Vector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis;
			
 
				+		b3Vector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ;
			
 
				+		b3Vector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ;
			
 
				+		b3Vector3 lin0 = massInvA * lin;
			
 
				+		b3Vector3 lin1 = massInvB * lin;
			
 
				+		b3Vector3 sum = ang0 + ang1 + lin0 + lin1;
			
 
				+		return sum[0] + sum[1] + sum[2];
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar getRelativeVelocity(const b3Vector3& linvelA, const b3Vector3& angvelA, const b3Vector3& linvelB, const b3Vector3& angvelB)
			
 
				+	{
			
 
				+		b3Vector3 linrel = linvelA - linvelB;
			
 
				+		b3Vector3 angvela = angvelA * m_aJ;
			
 
				+		b3Vector3 angvelb = angvelB * m_bJ;
			
 
				+		linrel *= m_linearJointAxis;
			
 
				+		angvela += angvelb;
			
 
				+		angvela += linrel;
			
 
				+		b3Scalar rel_vel2 = angvela[0] + angvela[1] + angvela[2];
			
 
				+		return rel_vel2 + B3_EPSILON;
			
 
				+	}
			
 
				+	//private:
			
 
				+
			
 
				+	b3Vector3 m_linearJointAxis;
			
 
				+	b3Vector3 m_aJ;
			
 
				+	b3Vector3 m_bJ;
			
 
				+	b3Vector3 m_0MinvJt;
			
 
				+	b3Vector3 m_1MinvJt;
			
 
				+	//Optimization: can be stored in the w/last component of one of the vectors
			
 
				+	b3Scalar m_Adiag;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_JACOBIAN_ENTRY_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp
@@ -0,0 +1,1696 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2012 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+//enable B3_SOLVER_DEBUG if you experience solver crashes
			
 
				+//#define B3_SOLVER_DEBUG
			
 
				+//#define COMPUTE_IMPULSE_DENOM 1
			
 
				+//It is not necessary (redundant) to refresh contact manifolds, this refresh has been moved to the collision algorithms.
			
 
				+
			
 
				+//#define DISABLE_JOINTS
			
 
				+
			
 
				+#include "b3PgsJacobiSolver.h"
			
 
				+#include "Bullet3Common/b3MinMax.h"
			
 
				+#include "b3TypedConstraint.h"
			
 
				+#include <new>
			
 
				+#include "Bullet3Common/b3StackAlloc.h"
			
 
				+
			
 
				+//#include "b3SolverBody.h"
			
 
				+//#include "b3SolverConstraint.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include <string.h>  //for memset
			
 
				+//#include "../../dynamics/basic_demo/Stubs/AdlContact4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+static b3Transform getWorldTransform(b3RigidBodyData* rb)
			
 
				+{
			
 
				+	b3Transform newTrans;
			
 
				+	newTrans.setOrigin(rb->m_pos);
			
 
				+	newTrans.setRotation(rb->m_quat);
			
 
				+	return newTrans;
			
 
				+}
			
 
				+
			
 
				+static const b3Matrix3x3& getInvInertiaTensorWorld(b3InertiaData* inertia)
			
 
				+{
			
 
				+	return inertia->m_invInertiaWorld;
			
 
				+}
			
 
				+
			
 
				+static const b3Vector3& getLinearVelocity(b3RigidBodyData* rb)
			
 
				+{
			
 
				+	return rb->m_linVel;
			
 
				+}
			
 
				+
			
 
				+static const b3Vector3& getAngularVelocity(b3RigidBodyData* rb)
			
 
				+{
			
 
				+	return rb->m_angVel;
			
 
				+}
			
 
				+
			
 
				+static b3Vector3 getVelocityInLocalPoint(b3RigidBodyData* rb, const b3Vector3& rel_pos)
			
 
				+{
			
 
				+	//we also calculate lin/ang velocity for kinematic objects
			
 
				+	return getLinearVelocity(rb) + getAngularVelocity(rb).cross(rel_pos);
			
 
				+}
			
 
				+
			
 
				+struct b3ContactPoint
			
 
				+{
			
 
				+	b3Vector3 m_positionWorldOnA;
			
 
				+	b3Vector3 m_positionWorldOnB;
			
 
				+	b3Vector3 m_normalWorldOnB;
			
 
				+	b3Scalar m_appliedImpulse;
			
 
				+	b3Scalar m_distance;
			
 
				+	b3Scalar m_combinedRestitution;
			
 
				+
			
 
				+	///information related to friction
			
 
				+	b3Scalar m_combinedFriction;
			
 
				+	b3Vector3 m_lateralFrictionDir1;
			
 
				+	b3Vector3 m_lateralFrictionDir2;
			
 
				+	b3Scalar m_appliedImpulseLateral1;
			
 
				+	b3Scalar m_appliedImpulseLateral2;
			
 
				+	b3Scalar m_combinedRollingFriction;
			
 
				+	b3Scalar m_contactMotion1;
			
 
				+	b3Scalar m_contactMotion2;
			
 
				+	b3Scalar m_contactCFM1;
			
 
				+	b3Scalar m_contactCFM2;
			
 
				+
			
 
				+	bool m_lateralFrictionInitialized;
			
 
				+
			
 
				+	b3Vector3 getPositionWorldOnA()
			
 
				+	{
			
 
				+		return m_positionWorldOnA;
			
 
				+	}
			
 
				+	b3Vector3 getPositionWorldOnB()
			
 
				+	{
			
 
				+		return m_positionWorldOnB;
			
 
				+	}
			
 
				+	b3Scalar getDistance()
			
 
				+	{
			
 
				+		return m_distance;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+void getContactPoint(b3Contact4* contact, int contactIndex, b3ContactPoint& pointOut)
			
 
				+{
			
 
				+	pointOut.m_appliedImpulse = 0.f;
			
 
				+	pointOut.m_appliedImpulseLateral1 = 0.f;
			
 
				+	pointOut.m_appliedImpulseLateral2 = 0.f;
			
 
				+	pointOut.m_combinedFriction = contact->getFrictionCoeff();
			
 
				+	pointOut.m_combinedRestitution = contact->getRestituitionCoeff();
			
 
				+	pointOut.m_combinedRollingFriction = 0.f;
			
 
				+	pointOut.m_contactCFM1 = 0.f;
			
 
				+	pointOut.m_contactCFM2 = 0.f;
			
 
				+	pointOut.m_contactMotion1 = 0.f;
			
 
				+	pointOut.m_contactMotion2 = 0.f;
			
 
				+	pointOut.m_distance = contact->getPenetration(contactIndex);  //??0.01f
			
 
				+	b3Vector3 normalOnB = contact->m_worldNormalOnB;
			
 
				+	normalOnB.normalize();  //is this needed?
			
 
				+
			
 
				+	b3Vector3 l1, l2;
			
 
				+	b3PlaneSpace1(normalOnB, l1, l2);
			
 
				+
			
 
				+	pointOut.m_normalWorldOnB = normalOnB;
			
 
				+	//printf("normalOnB = %f,%f,%f\n",normalOnB.getX(),normalOnB.getY(),normalOnB.getZ());
			
 
				+	pointOut.m_lateralFrictionDir1 = l1;
			
 
				+	pointOut.m_lateralFrictionDir2 = l2;
			
 
				+	pointOut.m_lateralFrictionInitialized = true;
			
 
				+
			
 
				+	b3Vector3 worldPosB = contact->m_worldPosB[contactIndex];
			
 
				+	pointOut.m_positionWorldOnB = worldPosB;
			
 
				+	pointOut.m_positionWorldOnA = worldPosB + normalOnB * pointOut.m_distance;
			
 
				+}
			
 
				+
			
 
				+int getNumContacts(b3Contact4* contact)
			
 
				+{
			
 
				+	return contact->getNPoints();
			
 
				+}
			
 
				+
			
 
				+b3PgsJacobiSolver::b3PgsJacobiSolver(bool usePgs)
			
 
				+	: m_usePgs(usePgs),
			
 
				+	  m_numSplitImpulseRecoveries(0),
			
 
				+	  m_btSeed2(0)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+b3PgsJacobiSolver::~b3PgsJacobiSolver()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts, int numConstraints, b3TypedConstraint** constraints)
			
 
				+{
			
 
				+	b3ContactSolverInfo infoGlobal;
			
 
				+	infoGlobal.m_splitImpulse = false;
			
 
				+	infoGlobal.m_timeStep = 1.f / 60.f;
			
 
				+	infoGlobal.m_numIterations = 4;  //4;
			
 
				+									 //	infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS|B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION;
			
 
				+	//infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS;
			
 
				+	infoGlobal.m_solverMode |= B3_SOLVER_USE_2_FRICTION_DIRECTIONS;
			
 
				+
			
 
				+	//if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS)
			
 
				+	//if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION))
			
 
				+
			
 
				+	solveGroup(bodies, inertias, numBodies, contacts, numContacts, constraints, numConstraints, infoGlobal);
			
 
				+
			
 
				+	if (!numContacts)
			
 
				+		return;
			
 
				+}
			
 
				+
			
 
				+/// b3PgsJacobiSolver Sequentially applies impulses
			
 
				+b3Scalar b3PgsJacobiSolver::solveGroup(b3RigidBodyData* bodies,
			
 
				+									   b3InertiaData* inertias,
			
 
				+									   int numBodies,
			
 
				+									   b3Contact4* manifoldPtr,
			
 
				+									   int numManifolds,
			
 
				+									   b3TypedConstraint** constraints,
			
 
				+									   int numConstraints,
			
 
				+									   const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	B3_PROFILE("solveGroup");
			
 
				+	//you need to provide at least some bodies
			
 
				+
			
 
				+	solveGroupCacheFriendlySetup(bodies, inertias, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal);
			
 
				+
			
 
				+	solveGroupCacheFriendlyIterations(constraints, numConstraints, infoGlobal);
			
 
				+
			
 
				+	solveGroupCacheFriendlyFinish(bodies, inertias, numBodies, infoGlobal);
			
 
				+
			
 
				+	return 0.f;
			
 
				+}
			
 
				+
			
 
				+#ifdef USE_SIMD
			
 
				+#include <emmintrin.h>
			
 
				+#define b3VecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e))
			
 
				+static inline __m128 b3SimdDot3(__m128 vec0, __m128 vec1)
			
 
				+{
			
 
				+	__m128 result = _mm_mul_ps(vec0, vec1);
			
 
				+	return _mm_add_ps(b3VecSplat(result, 0), _mm_add_ps(b3VecSplat(result, 1), b3VecSplat(result, 2)));
			
 
				+}
			
 
				+#endif  //USE_SIMD
			
 
				+
			
 
				+// Project Gauss Seidel or the equivalent Sequential Impulse
			
 
				+void b3PgsJacobiSolver::resolveSingleConstraintRowGenericSIMD(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c)
			
 
				+{
			
 
				+#ifdef USE_SIMD
			
 
				+	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
			
 
				+	__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
			
 
				+	__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
			
 
				+	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse), _mm_set1_ps(c.m_cfm)));
			
 
				+	__m128 deltaVel1Dotn = _mm_add_ps(b3SimdDot3(c.m_contactNormal.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), b3SimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128));
			
 
				+	__m128 deltaVel2Dotn = _mm_sub_ps(b3SimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128), b3SimdDot3((c.m_contactNormal).mVec128, body2.internalGetDeltaLinearVelocity().mVec128));
			
 
				+	deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
			
 
				+	deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
			
 
				+	b3SimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse);
			
 
				+	b3SimdScalar resultLowerLess, resultUpperLess;
			
 
				+	resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1);
			
 
				+	resultUpperLess = _mm_cmplt_ps(sum, upperLimit1);
			
 
				+	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
			
 
				+	deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
			
 
				+	c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum));
			
 
				+	__m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp);
			
 
				+	deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied));
			
 
				+	c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, c.m_appliedImpulse), _mm_andnot_ps(resultUpperLess, upperLimit1));
			
 
				+	__m128 linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128, body1.internalGetInvMass().mVec128);
			
 
				+	__m128 linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128, body2.internalGetInvMass().mVec128);
			
 
				+	__m128 impulseMagnitude = deltaImpulse;
			
 
				+	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude));
			
 
				+	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude));
			
 
				+	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude));
			
 
				+	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude));
			
 
				+#else
			
 
				+	resolveSingleConstraintRowGeneric(body1, body2, c);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+// Project Gauss Seidel or the equivalent Sequential Impulse
			
 
				+void b3PgsJacobiSolver::resolveSingleConstraintRowGeneric(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c)
			
 
				+{
			
 
				+	b3Scalar deltaImpulse = c.m_rhs - b3Scalar(c.m_appliedImpulse) * c.m_cfm;
			
 
				+	const b3Scalar deltaVel1Dotn = c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) + c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
			
 
				+	const b3Scalar deltaVel2Dotn = -c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
			
 
				+
			
 
				+	//	const b3Scalar delta_rel_vel	=	deltaVel1Dotn-deltaVel2Dotn;
			
 
				+	deltaImpulse -= deltaVel1Dotn * c.m_jacDiagABInv;
			
 
				+	deltaImpulse -= deltaVel2Dotn * c.m_jacDiagABInv;
			
 
				+
			
 
				+	const b3Scalar sum = b3Scalar(c.m_appliedImpulse) + deltaImpulse;
			
 
				+	if (sum < c.m_lowerLimit)
			
 
				+	{
			
 
				+		deltaImpulse = c.m_lowerLimit - c.m_appliedImpulse;
			
 
				+		c.m_appliedImpulse = c.m_lowerLimit;
			
 
				+	}
			
 
				+	else if (sum > c.m_upperLimit)
			
 
				+	{
			
 
				+		deltaImpulse = c.m_upperLimit - c.m_appliedImpulse;
			
 
				+		c.m_appliedImpulse = c.m_upperLimit;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		c.m_appliedImpulse = sum;
			
 
				+	}
			
 
				+
			
 
				+	body1.internalApplyImpulse(c.m_contactNormal * body1.internalGetInvMass(), c.m_angularComponentA, deltaImpulse);
			
 
				+	body2.internalApplyImpulse(-c.m_contactNormal * body2.internalGetInvMass(), c.m_angularComponentB, deltaImpulse);
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::resolveSingleConstraintRowLowerLimitSIMD(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c)
			
 
				+{
			
 
				+#ifdef USE_SIMD
			
 
				+	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
			
 
				+	__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
			
 
				+	__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
			
 
				+	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse), _mm_set1_ps(c.m_cfm)));
			
 
				+	__m128 deltaVel1Dotn = _mm_add_ps(b3SimdDot3(c.m_contactNormal.mVec128, body1.internalGetDeltaLinearVelocity().mVec128), b3SimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetDeltaAngularVelocity().mVec128));
			
 
				+	__m128 deltaVel2Dotn = _mm_sub_ps(b3SimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetDeltaAngularVelocity().mVec128), b3SimdDot3((c.m_contactNormal).mVec128, body2.internalGetDeltaLinearVelocity().mVec128));
			
 
				+	deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
			
 
				+	deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
			
 
				+	b3SimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse);
			
 
				+	b3SimdScalar resultLowerLess, resultUpperLess;
			
 
				+	resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1);
			
 
				+	resultUpperLess = _mm_cmplt_ps(sum, upperLimit1);
			
 
				+	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
			
 
				+	deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
			
 
				+	c.m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum));
			
 
				+	__m128 linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128, body1.internalGetInvMass().mVec128);
			
 
				+	__m128 linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128, body2.internalGetInvMass().mVec128);
			
 
				+	__m128 impulseMagnitude = deltaImpulse;
			
 
				+	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude));
			
 
				+	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude));
			
 
				+	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude));
			
 
				+	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude));
			
 
				+#else
			
 
				+	resolveSingleConstraintRowLowerLimit(body1, body2, c);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+// Project Gauss Seidel or the equivalent Sequential Impulse
			
 
				+void b3PgsJacobiSolver::resolveSingleConstraintRowLowerLimit(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c)
			
 
				+{
			
 
				+	b3Scalar deltaImpulse = c.m_rhs - b3Scalar(c.m_appliedImpulse) * c.m_cfm;
			
 
				+	const b3Scalar deltaVel1Dotn = c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) + c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
			
 
				+	const b3Scalar deltaVel2Dotn = -c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
			
 
				+
			
 
				+	deltaImpulse -= deltaVel1Dotn * c.m_jacDiagABInv;
			
 
				+	deltaImpulse -= deltaVel2Dotn * c.m_jacDiagABInv;
			
 
				+	const b3Scalar sum = b3Scalar(c.m_appliedImpulse) + deltaImpulse;
			
 
				+	if (sum < c.m_lowerLimit)
			
 
				+	{
			
 
				+		deltaImpulse = c.m_lowerLimit - c.m_appliedImpulse;
			
 
				+		c.m_appliedImpulse = c.m_lowerLimit;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		c.m_appliedImpulse = sum;
			
 
				+	}
			
 
				+	body1.internalApplyImpulse(c.m_contactNormal * body1.internalGetInvMass(), c.m_angularComponentA, deltaImpulse);
			
 
				+	body2.internalApplyImpulse(-c.m_contactNormal * body2.internalGetInvMass(), c.m_angularComponentB, deltaImpulse);
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::resolveSplitPenetrationImpulseCacheFriendly(
			
 
				+	b3SolverBody& body1,
			
 
				+	b3SolverBody& body2,
			
 
				+	const b3SolverConstraint& c)
			
 
				+{
			
 
				+	if (c.m_rhsPenetration)
			
 
				+	{
			
 
				+		m_numSplitImpulseRecoveries++;
			
 
				+		b3Scalar deltaImpulse = c.m_rhsPenetration - b3Scalar(c.m_appliedPushImpulse) * c.m_cfm;
			
 
				+		const b3Scalar deltaVel1Dotn = c.m_contactNormal.dot(body1.internalGetPushVelocity()) + c.m_relpos1CrossNormal.dot(body1.internalGetTurnVelocity());
			
 
				+		const b3Scalar deltaVel2Dotn = -c.m_contactNormal.dot(body2.internalGetPushVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetTurnVelocity());
			
 
				+
			
 
				+		deltaImpulse -= deltaVel1Dotn * c.m_jacDiagABInv;
			
 
				+		deltaImpulse -= deltaVel2Dotn * c.m_jacDiagABInv;
			
 
				+		const b3Scalar sum = b3Scalar(c.m_appliedPushImpulse) + deltaImpulse;
			
 
				+		if (sum < c.m_lowerLimit)
			
 
				+		{
			
 
				+			deltaImpulse = c.m_lowerLimit - c.m_appliedPushImpulse;
			
 
				+			c.m_appliedPushImpulse = c.m_lowerLimit;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			c.m_appliedPushImpulse = sum;
			
 
				+		}
			
 
				+		body1.internalApplyPushImpulse(c.m_contactNormal * body1.internalGetInvMass(), c.m_angularComponentA, deltaImpulse);
			
 
				+		body2.internalApplyPushImpulse(-c.m_contactNormal * body2.internalGetInvMass(), c.m_angularComponentB, deltaImpulse);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::resolveSplitPenetrationSIMD(b3SolverBody& body1, b3SolverBody& body2, const b3SolverConstraint& c)
			
 
				+{
			
 
				+#ifdef USE_SIMD
			
 
				+	if (!c.m_rhsPenetration)
			
 
				+		return;
			
 
				+
			
 
				+	m_numSplitImpulseRecoveries++;
			
 
				+
			
 
				+	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedPushImpulse);
			
 
				+	__m128 lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
			
 
				+	__m128 upperLimit1 = _mm_set1_ps(c.m_upperLimit);
			
 
				+	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhsPenetration), _mm_mul_ps(_mm_set1_ps(c.m_appliedPushImpulse), _mm_set1_ps(c.m_cfm)));
			
 
				+	__m128 deltaVel1Dotn = _mm_add_ps(b3SimdDot3(c.m_contactNormal.mVec128, body1.internalGetPushVelocity().mVec128), b3SimdDot3(c.m_relpos1CrossNormal.mVec128, body1.internalGetTurnVelocity().mVec128));
			
 
				+	__m128 deltaVel2Dotn = _mm_sub_ps(b3SimdDot3(c.m_relpos2CrossNormal.mVec128, body2.internalGetTurnVelocity().mVec128), b3SimdDot3((c.m_contactNormal).mVec128, body2.internalGetPushVelocity().mVec128));
			
 
				+	deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
			
 
				+	deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.m_jacDiagABInv)));
			
 
				+	b3SimdScalar sum = _mm_add_ps(cpAppliedImp, deltaImpulse);
			
 
				+	b3SimdScalar resultLowerLess, resultUpperLess;
			
 
				+	resultLowerLess = _mm_cmplt_ps(sum, lowerLimit1);
			
 
				+	resultUpperLess = _mm_cmplt_ps(sum, upperLimit1);
			
 
				+	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
			
 
				+	deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
			
 
				+	c.m_appliedPushImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum));
			
 
				+	__m128 linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128, body1.internalGetInvMass().mVec128);
			
 
				+	__m128 linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128, body2.internalGetInvMass().mVec128);
			
 
				+	__m128 impulseMagnitude = deltaImpulse;
			
 
				+	body1.internalGetPushVelocity().mVec128 = _mm_add_ps(body1.internalGetPushVelocity().mVec128, _mm_mul_ps(linearComponentA, impulseMagnitude));
			
 
				+	body1.internalGetTurnVelocity().mVec128 = _mm_add_ps(body1.internalGetTurnVelocity().mVec128, _mm_mul_ps(c.m_angularComponentA.mVec128, impulseMagnitude));
			
 
				+	body2.internalGetPushVelocity().mVec128 = _mm_sub_ps(body2.internalGetPushVelocity().mVec128, _mm_mul_ps(linearComponentB, impulseMagnitude));
			
 
				+	body2.internalGetTurnVelocity().mVec128 = _mm_add_ps(body2.internalGetTurnVelocity().mVec128, _mm_mul_ps(c.m_angularComponentB.mVec128, impulseMagnitude));
			
 
				+#else
			
 
				+	resolveSplitPenetrationImpulseCacheFriendly(body1, body2, c);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+unsigned long b3PgsJacobiSolver::b3Rand2()
			
 
				+{
			
 
				+	m_btSeed2 = (1664525L * m_btSeed2 + 1013904223L) & 0xffffffff;
			
 
				+	return m_btSeed2;
			
 
				+}
			
 
				+
			
 
				+//See ODE: adam's all-int straightforward(?) dRandInt (0..n-1)
			
 
				+int b3PgsJacobiSolver::b3RandInt2(int n)
			
 
				+{
			
 
				+	// seems good; xor-fold and modulus
			
 
				+	const unsigned long un = static_cast<unsigned long>(n);
			
 
				+	unsigned long r = b3Rand2();
			
 
				+
			
 
				+	// note: probably more aggressive than it needs to be -- might be
			
 
				+	//       able to get away without one or two of the innermost branches.
			
 
				+	if (un <= 0x00010000UL)
			
 
				+	{
			
 
				+		r ^= (r >> 16);
			
 
				+		if (un <= 0x00000100UL)
			
 
				+		{
			
 
				+			r ^= (r >> 8);
			
 
				+			if (un <= 0x00000010UL)
			
 
				+			{
			
 
				+				r ^= (r >> 4);
			
 
				+				if (un <= 0x00000004UL)
			
 
				+				{
			
 
				+					r ^= (r >> 2);
			
 
				+					if (un <= 0x00000002UL)
			
 
				+					{
			
 
				+						r ^= (r >> 1);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return (int)(r % un);
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::initSolverBody(int bodyIndex, b3SolverBody* solverBody, b3RigidBodyData* rb)
			
 
				+{
			
 
				+	solverBody->m_deltaLinearVelocity.setValue(0.f, 0.f, 0.f);
			
 
				+	solverBody->m_deltaAngularVelocity.setValue(0.f, 0.f, 0.f);
			
 
				+	solverBody->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+	solverBody->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+
			
 
				+	if (rb)
			
 
				+	{
			
 
				+		solverBody->m_worldTransform = getWorldTransform(rb);
			
 
				+		solverBody->internalSetInvMass(b3MakeVector3(rb->m_invMass, rb->m_invMass, rb->m_invMass));
			
 
				+		solverBody->m_originalBodyIndex = bodyIndex;
			
 
				+		solverBody->m_angularFactor = b3MakeVector3(1, 1, 1);
			
 
				+		solverBody->m_linearFactor = b3MakeVector3(1, 1, 1);
			
 
				+		solverBody->m_linearVelocity = getLinearVelocity(rb);
			
 
				+		solverBody->m_angularVelocity = getAngularVelocity(rb);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		solverBody->m_worldTransform.setIdentity();
			
 
				+		solverBody->internalSetInvMass(b3MakeVector3(0, 0, 0));
			
 
				+		solverBody->m_originalBodyIndex = bodyIndex;
			
 
				+		solverBody->m_angularFactor.setValue(1, 1, 1);
			
 
				+		solverBody->m_linearFactor.setValue(1, 1, 1);
			
 
				+		solverBody->m_linearVelocity.setValue(0, 0, 0);
			
 
				+		solverBody->m_angularVelocity.setValue(0, 0, 0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3PgsJacobiSolver::restitutionCurve(b3Scalar rel_vel, b3Scalar restitution)
			
 
				+{
			
 
				+	b3Scalar rest = restitution * -rel_vel;
			
 
				+	return rest;
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::setupFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity, b3Scalar cfmSlip)
			
 
				+{
			
 
				+	solverConstraint.m_contactNormal = normalAxis;
			
 
				+	b3SolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA];
			
 
				+	b3SolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB];
			
 
				+
			
 
				+	b3RigidBodyData* body0 = &bodies[solverBodyA.m_originalBodyIndex];
			
 
				+	b3RigidBodyData* body1 = &bodies[solverBodyB.m_originalBodyIndex];
			
 
				+
			
 
				+	solverConstraint.m_solverBodyIdA = solverBodyIdA;
			
 
				+	solverConstraint.m_solverBodyIdB = solverBodyIdB;
			
 
				+
			
 
				+	solverConstraint.m_friction = cp.m_combinedFriction;
			
 
				+	solverConstraint.m_originalContactPoint = 0;
			
 
				+
			
 
				+	solverConstraint.m_appliedImpulse = 0.f;
			
 
				+	solverConstraint.m_appliedPushImpulse = 0.f;
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3 ftorqueAxis1 = rel_pos1.cross(solverConstraint.m_contactNormal);
			
 
				+		solverConstraint.m_relpos1CrossNormal = ftorqueAxis1;
			
 
				+		solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0);
			
 
				+	}
			
 
				+	{
			
 
				+		b3Vector3 ftorqueAxis1 = rel_pos2.cross(-solverConstraint.m_contactNormal);
			
 
				+		solverConstraint.m_relpos2CrossNormal = ftorqueAxis1;
			
 
				+		solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0);
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar scaledDenom;
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3 vec;
			
 
				+		b3Scalar denom0 = 0.f;
			
 
				+		b3Scalar denom1 = 0.f;
			
 
				+		if (body0)
			
 
				+		{
			
 
				+			vec = (solverConstraint.m_angularComponentA).cross(rel_pos1);
			
 
				+			denom0 = body0->m_invMass + normalAxis.dot(vec);
			
 
				+		}
			
 
				+		if (body1)
			
 
				+		{
			
 
				+			vec = (-solverConstraint.m_angularComponentB).cross(rel_pos2);
			
 
				+			denom1 = body1->m_invMass + normalAxis.dot(vec);
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar denom;
			
 
				+		if (m_usePgs)
			
 
				+		{
			
 
				+			scaledDenom = denom = relaxation / (denom0 + denom1);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			denom = relaxation / (denom0 + denom1);
			
 
				+			b3Scalar countA = body0->m_invMass ? b3Scalar(m_bodyCount[solverBodyA.m_originalBodyIndex]) : 1.f;
			
 
				+			b3Scalar countB = body1->m_invMass ? b3Scalar(m_bodyCount[solverBodyB.m_originalBodyIndex]) : 1.f;
			
 
				+
			
 
				+			scaledDenom = relaxation / (denom0 * countA + denom1 * countB);
			
 
				+		}
			
 
				+
			
 
				+		solverConstraint.m_jacDiagABInv = denom;
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		b3Scalar rel_vel;
			
 
				+		b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0 ? solverBodyA.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos1CrossNormal.dot(body0 ? solverBodyA.m_angularVelocity : b3MakeVector3(0, 0, 0));
			
 
				+		b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1 ? solverBodyB.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos2CrossNormal.dot(body1 ? solverBodyB.m_angularVelocity : b3MakeVector3(0, 0, 0));
			
 
				+
			
 
				+		rel_vel = vel1Dotn + vel2Dotn;
			
 
				+
			
 
				+		//		b3Scalar positionalError = 0.f;
			
 
				+
			
 
				+		b3SimdScalar velocityError = desiredVelocity - rel_vel;
			
 
				+		b3SimdScalar velocityImpulse = velocityError * b3SimdScalar(scaledDenom);  //solverConstraint.m_jacDiagABInv);
			
 
				+		solverConstraint.m_rhs = velocityImpulse;
			
 
				+		solverConstraint.m_cfm = cfmSlip;
			
 
				+		solverConstraint.m_lowerLimit = 0;
			
 
				+		solverConstraint.m_upperLimit = 1e10f;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3SolverConstraint& b3PgsJacobiSolver::addFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity, b3Scalar cfmSlip)
			
 
				+{
			
 
				+	b3SolverConstraint& solverConstraint = m_tmpSolverContactFrictionConstraintPool.expandNonInitializing();
			
 
				+	solverConstraint.m_frictionIndex = frictionIndex;
			
 
				+	setupFrictionConstraint(bodies, inertias, solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2,
			
 
				+							colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
			
 
				+	return solverConstraint;
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::setupRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis1, int solverBodyIdA, int solverBodyIdB,
			
 
				+													   b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2,
			
 
				+													   b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation,
			
 
				+													   b3Scalar desiredVelocity, b3Scalar cfmSlip)
			
 
				+
			
 
				+{
			
 
				+	b3Vector3 normalAxis = b3MakeVector3(0, 0, 0);
			
 
				+
			
 
				+	solverConstraint.m_contactNormal = normalAxis;
			
 
				+	b3SolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA];
			
 
				+	b3SolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB];
			
 
				+
			
 
				+	b3RigidBodyData* body0 = &bodies[m_tmpSolverBodyPool[solverBodyIdA].m_originalBodyIndex];
			
 
				+	b3RigidBodyData* body1 = &bodies[m_tmpSolverBodyPool[solverBodyIdB].m_originalBodyIndex];
			
 
				+
			
 
				+	solverConstraint.m_solverBodyIdA = solverBodyIdA;
			
 
				+	solverConstraint.m_solverBodyIdB = solverBodyIdB;
			
 
				+
			
 
				+	solverConstraint.m_friction = cp.m_combinedRollingFriction;
			
 
				+	solverConstraint.m_originalContactPoint = 0;
			
 
				+
			
 
				+	solverConstraint.m_appliedImpulse = 0.f;
			
 
				+	solverConstraint.m_appliedPushImpulse = 0.f;
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3 ftorqueAxis1 = -normalAxis1;
			
 
				+		solverConstraint.m_relpos1CrossNormal = ftorqueAxis1;
			
 
				+		solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0);
			
 
				+	}
			
 
				+	{
			
 
				+		b3Vector3 ftorqueAxis1 = normalAxis1;
			
 
				+		solverConstraint.m_relpos2CrossNormal = ftorqueAxis1;
			
 
				+		solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex]) * ftorqueAxis1 : b3MakeVector3(0, 0, 0);
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3 iMJaA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex]) * solverConstraint.m_relpos1CrossNormal : b3MakeVector3(0, 0, 0);
			
 
				+		b3Vector3 iMJaB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex]) * solverConstraint.m_relpos2CrossNormal : b3MakeVector3(0, 0, 0);
			
 
				+		b3Scalar sum = 0;
			
 
				+		sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
			
 
				+		sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
			
 
				+		solverConstraint.m_jacDiagABInv = b3Scalar(1.) / sum;
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		b3Scalar rel_vel;
			
 
				+		b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0 ? solverBodyA.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos1CrossNormal.dot(body0 ? solverBodyA.m_angularVelocity : b3MakeVector3(0, 0, 0));
			
 
				+		b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1 ? solverBodyB.m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos2CrossNormal.dot(body1 ? solverBodyB.m_angularVelocity : b3MakeVector3(0, 0, 0));
			
 
				+
			
 
				+		rel_vel = vel1Dotn + vel2Dotn;
			
 
				+
			
 
				+		//		b3Scalar positionalError = 0.f;
			
 
				+
			
 
				+		b3SimdScalar velocityError = desiredVelocity - rel_vel;
			
 
				+		b3SimdScalar velocityImpulse = velocityError * b3SimdScalar(solverConstraint.m_jacDiagABInv);
			
 
				+		solverConstraint.m_rhs = velocityImpulse;
			
 
				+		solverConstraint.m_cfm = cfmSlip;
			
 
				+		solverConstraint.m_lowerLimit = 0;
			
 
				+		solverConstraint.m_upperLimit = 1e10f;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3SolverConstraint& b3PgsJacobiSolver::addRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity, b3Scalar cfmSlip)
			
 
				+{
			
 
				+	b3SolverConstraint& solverConstraint = m_tmpSolverContactRollingFrictionConstraintPool.expandNonInitializing();
			
 
				+	solverConstraint.m_frictionIndex = frictionIndex;
			
 
				+	setupRollingFrictionConstraint(bodies, inertias, solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2,
			
 
				+								   colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
			
 
				+	return solverConstraint;
			
 
				+}
			
 
				+
			
 
				+int b3PgsJacobiSolver::getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies, b3InertiaData* inertias)
			
 
				+{
			
 
				+	//b3Assert(bodyIndex< m_tmpSolverBodyPool.size());
			
 
				+
			
 
				+	b3RigidBodyData& body = bodies[bodyIndex];
			
 
				+	int curIndex = -1;
			
 
				+	if (m_usePgs || body.m_invMass == 0.f)
			
 
				+	{
			
 
				+		if (m_bodyCount[bodyIndex] < 0)
			
 
				+		{
			
 
				+			curIndex = m_tmpSolverBodyPool.size();
			
 
				+			b3SolverBody& solverBody = m_tmpSolverBodyPool.expand();
			
 
				+			initSolverBody(bodyIndex, &solverBody, &body);
			
 
				+			solverBody.m_originalBodyIndex = bodyIndex;
			
 
				+			m_bodyCount[bodyIndex] = curIndex;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			curIndex = m_bodyCount[bodyIndex];
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3Assert(m_bodyCount[bodyIndex] > 0);
			
 
				+		m_bodyCountCheck[bodyIndex]++;
			
 
				+		curIndex = m_tmpSolverBodyPool.size();
			
 
				+		b3SolverBody& solverBody = m_tmpSolverBodyPool.expand();
			
 
				+		initSolverBody(bodyIndex, &solverBody, &body);
			
 
				+		solverBody.m_originalBodyIndex = bodyIndex;
			
 
				+	}
			
 
				+
			
 
				+	b3Assert(curIndex >= 0);
			
 
				+	return curIndex;
			
 
				+}
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+void b3PgsJacobiSolver::setupContactConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint,
			
 
				+											   int solverBodyIdA, int solverBodyIdB,
			
 
				+											   b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal,
			
 
				+											   b3Vector3& vel, b3Scalar& rel_vel, b3Scalar& relaxation,
			
 
				+											   b3Vector3& rel_pos1, b3Vector3& rel_pos2)
			
 
				+{
			
 
				+	const b3Vector3& pos1 = cp.getPositionWorldOnA();
			
 
				+	const b3Vector3& pos2 = cp.getPositionWorldOnB();
			
 
				+
			
 
				+	b3SolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA];
			
 
				+	b3SolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB];
			
 
				+
			
 
				+	b3RigidBodyData* rb0 = &bodies[bodyA->m_originalBodyIndex];
			
 
				+	b3RigidBodyData* rb1 = &bodies[bodyB->m_originalBodyIndex];
			
 
				+
			
 
				+	//			b3Vector3 rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin();
			
 
				+	//			b3Vector3 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
			
 
				+	rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin();
			
 
				+	rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin();
			
 
				+
			
 
				+	relaxation = 1.f;
			
 
				+
			
 
				+	b3Vector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB);
			
 
				+	solverConstraint.m_angularComponentA = rb0 ? getInvInertiaTensorWorld(&inertias[bodyA->m_originalBodyIndex]) * torqueAxis0 : b3MakeVector3(0, 0, 0);
			
 
				+	b3Vector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB);
			
 
				+	solverConstraint.m_angularComponentB = rb1 ? getInvInertiaTensorWorld(&inertias[bodyB->m_originalBodyIndex]) * -torqueAxis1 : b3MakeVector3(0, 0, 0);
			
 
				+
			
 
				+	b3Scalar scaledDenom;
			
 
				+	{
			
 
				+#ifdef COMPUTE_IMPULSE_DENOM
			
 
				+		b3Scalar denom0 = rb0->computeImpulseDenominator(pos1, cp.m_normalWorldOnB);
			
 
				+		b3Scalar denom1 = rb1->computeImpulseDenominator(pos2, cp.m_normalWorldOnB);
			
 
				+#else
			
 
				+		b3Vector3 vec;
			
 
				+		b3Scalar denom0 = 0.f;
			
 
				+		b3Scalar denom1 = 0.f;
			
 
				+		if (rb0)
			
 
				+		{
			
 
				+			vec = (solverConstraint.m_angularComponentA).cross(rel_pos1);
			
 
				+			denom0 = rb0->m_invMass + cp.m_normalWorldOnB.dot(vec);
			
 
				+		}
			
 
				+		if (rb1)
			
 
				+		{
			
 
				+			vec = (-solverConstraint.m_angularComponentB).cross(rel_pos2);
			
 
				+			denom1 = rb1->m_invMass + cp.m_normalWorldOnB.dot(vec);
			
 
				+		}
			
 
				+#endif  //COMPUTE_IMPULSE_DENOM
			
 
				+
			
 
				+		b3Scalar denom;
			
 
				+		if (m_usePgs)
			
 
				+		{
			
 
				+			scaledDenom = denom = relaxation / (denom0 + denom1);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			denom = relaxation / (denom0 + denom1);
			
 
				+
			
 
				+			b3Scalar countA = rb0->m_invMass ? b3Scalar(m_bodyCount[bodyA->m_originalBodyIndex]) : 1.f;
			
 
				+			b3Scalar countB = rb1->m_invMass ? b3Scalar(m_bodyCount[bodyB->m_originalBodyIndex]) : 1.f;
			
 
				+			scaledDenom = relaxation / (denom0 * countA + denom1 * countB);
			
 
				+		}
			
 
				+		solverConstraint.m_jacDiagABInv = denom;
			
 
				+	}
			
 
				+
			
 
				+	solverConstraint.m_contactNormal = cp.m_normalWorldOnB;
			
 
				+	solverConstraint.m_relpos1CrossNormal = torqueAxis0;
			
 
				+	solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
			
 
				+
			
 
				+	b3Scalar restitution = 0.f;
			
 
				+	b3Scalar penetration = cp.getDistance() + infoGlobal.m_linearSlop;
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3 vel1, vel2;
			
 
				+
			
 
				+		vel1 = rb0 ? getVelocityInLocalPoint(rb0, rel_pos1) : b3MakeVector3(0, 0, 0);
			
 
				+		vel2 = rb1 ? getVelocityInLocalPoint(rb1, rel_pos2) : b3MakeVector3(0, 0, 0);
			
 
				+
			
 
				+		//			b3Vector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : b3Vector3(0,0,0);
			
 
				+		vel = vel1 - vel2;
			
 
				+		rel_vel = cp.m_normalWorldOnB.dot(vel);
			
 
				+
			
 
				+		solverConstraint.m_friction = cp.m_combinedFriction;
			
 
				+
			
 
				+		restitution = restitutionCurve(rel_vel, cp.m_combinedRestitution);
			
 
				+		if (restitution <= b3Scalar(0.))
			
 
				+		{
			
 
				+			restitution = 0.f;
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	///warm starting (or zero if disabled)
			
 
				+	if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING)
			
 
				+	{
			
 
				+		solverConstraint.m_appliedImpulse = cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;
			
 
				+		if (rb0)
			
 
				+			bodyA->internalApplyImpulse(solverConstraint.m_contactNormal * bodyA->internalGetInvMass(), solverConstraint.m_angularComponentA, solverConstraint.m_appliedImpulse);
			
 
				+		if (rb1)
			
 
				+			bodyB->internalApplyImpulse(solverConstraint.m_contactNormal * bodyB->internalGetInvMass(), -solverConstraint.m_angularComponentB, -(b3Scalar)solverConstraint.m_appliedImpulse);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		solverConstraint.m_appliedImpulse = 0.f;
			
 
				+	}
			
 
				+
			
 
				+	solverConstraint.m_appliedPushImpulse = 0.f;
			
 
				+
			
 
				+	{
			
 
				+		b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0 ? bodyA->m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos1CrossNormal.dot(rb0 ? bodyA->m_angularVelocity : b3MakeVector3(0, 0, 0));
			
 
				+		b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1 ? bodyB->m_linearVelocity : b3MakeVector3(0, 0, 0)) + solverConstraint.m_relpos2CrossNormal.dot(rb1 ? bodyB->m_angularVelocity : b3MakeVector3(0, 0, 0));
			
 
				+		b3Scalar rel_vel = vel1Dotn + vel2Dotn;
			
 
				+
			
 
				+		b3Scalar positionalError = 0.f;
			
 
				+		b3Scalar velocityError = restitution - rel_vel;  // * damping;
			
 
				+
			
 
				+		b3Scalar erp = infoGlobal.m_erp2;
			
 
				+		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
			
 
				+		{
			
 
				+			erp = infoGlobal.m_erp;
			
 
				+		}
			
 
				+
			
 
				+		if (penetration > 0)
			
 
				+		{
			
 
				+			positionalError = 0;
			
 
				+
			
 
				+			velocityError -= penetration / infoGlobal.m_timeStep;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			positionalError = -penetration * erp / infoGlobal.m_timeStep;
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar penetrationImpulse = positionalError * scaledDenom;  //solverConstraint.m_jacDiagABInv;
			
 
				+		b3Scalar velocityImpulse = velocityError * scaledDenom;       //solverConstraint.m_jacDiagABInv;
			
 
				+
			
 
				+		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
			
 
				+		{
			
 
				+			//combine position and velocity into rhs
			
 
				+			solverConstraint.m_rhs = penetrationImpulse + velocityImpulse;
			
 
				+			solverConstraint.m_rhsPenetration = 0.f;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			//split position and velocity into rhs and m_rhsPenetration
			
 
				+			solverConstraint.m_rhs = velocityImpulse;
			
 
				+			solverConstraint.m_rhsPenetration = penetrationImpulse;
			
 
				+		}
			
 
				+		solverConstraint.m_cfm = 0.f;
			
 
				+		solverConstraint.m_lowerLimit = 0;
			
 
				+		solverConstraint.m_upperLimit = 1e10f;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::setFrictionConstraintImpulse(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint,
			
 
				+													 int solverBodyIdA, int solverBodyIdB,
			
 
				+													 b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	b3SolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA];
			
 
				+	b3SolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB];
			
 
				+
			
 
				+	{
			
 
				+		b3SolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
			
 
				+		if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING)
			
 
				+		{
			
 
				+			frictionConstraint1.m_appliedImpulse = cp.m_appliedImpulseLateral1 * infoGlobal.m_warmstartingFactor;
			
 
				+			if (bodies[bodyA->m_originalBodyIndex].m_invMass)
			
 
				+				bodyA->internalApplyImpulse(frictionConstraint1.m_contactNormal * bodies[bodyA->m_originalBodyIndex].m_invMass, frictionConstraint1.m_angularComponentA, frictionConstraint1.m_appliedImpulse);
			
 
				+			if (bodies[bodyB->m_originalBodyIndex].m_invMass)
			
 
				+				bodyB->internalApplyImpulse(frictionConstraint1.m_contactNormal * bodies[bodyB->m_originalBodyIndex].m_invMass, -frictionConstraint1.m_angularComponentB, -(b3Scalar)frictionConstraint1.m_appliedImpulse);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			frictionConstraint1.m_appliedImpulse = 0.f;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
			
 
				+	{
			
 
				+		b3SolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex + 1];
			
 
				+		if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING)
			
 
				+		{
			
 
				+			frictionConstraint2.m_appliedImpulse = cp.m_appliedImpulseLateral2 * infoGlobal.m_warmstartingFactor;
			
 
				+			if (bodies[bodyA->m_originalBodyIndex].m_invMass)
			
 
				+				bodyA->internalApplyImpulse(frictionConstraint2.m_contactNormal * bodies[bodyA->m_originalBodyIndex].m_invMass, frictionConstraint2.m_angularComponentA, frictionConstraint2.m_appliedImpulse);
			
 
				+			if (bodies[bodyB->m_originalBodyIndex].m_invMass)
			
 
				+				bodyB->internalApplyImpulse(frictionConstraint2.m_contactNormal * bodies[bodyB->m_originalBodyIndex].m_invMass, -frictionConstraint2.m_angularComponentB, -(b3Scalar)frictionConstraint2.m_appliedImpulse);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			frictionConstraint2.m_appliedImpulse = 0.f;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::convertContact(b3RigidBodyData* bodies, b3InertiaData* inertias, b3Contact4* manifold, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	b3RigidBodyData *colObj0 = 0, *colObj1 = 0;
			
 
				+
			
 
				+	int solverBodyIdA = getOrInitSolverBody(manifold->getBodyA(), bodies, inertias);
			
 
				+	int solverBodyIdB = getOrInitSolverBody(manifold->getBodyB(), bodies, inertias);
			
 
				+
			
 
				+	//	b3RigidBody* bodyA = b3RigidBody::upcast(colObj0);
			
 
				+	//	b3RigidBody* bodyB = b3RigidBody::upcast(colObj1);
			
 
				+
			
 
				+	b3SolverBody* solverBodyA = &m_tmpSolverBodyPool[solverBodyIdA];
			
 
				+	b3SolverBody* solverBodyB = &m_tmpSolverBodyPool[solverBodyIdB];
			
 
				+
			
 
				+	///avoid collision response between two static objects
			
 
				+	if (solverBodyA->m_invMass.isZero() && solverBodyB->m_invMass.isZero())
			
 
				+		return;
			
 
				+
			
 
				+	int rollingFriction = 1;
			
 
				+	int numContacts = getNumContacts(manifold);
			
 
				+	for (int j = 0; j < numContacts; j++)
			
 
				+	{
			
 
				+		b3ContactPoint cp;
			
 
				+		getContactPoint(manifold, j, cp);
			
 
				+
			
 
				+		if (cp.getDistance() <= getContactProcessingThreshold(manifold))
			
 
				+		{
			
 
				+			b3Vector3 rel_pos1;
			
 
				+			b3Vector3 rel_pos2;
			
 
				+			b3Scalar relaxation;
			
 
				+			b3Scalar rel_vel;
			
 
				+			b3Vector3 vel;
			
 
				+
			
 
				+			int frictionIndex = m_tmpSolverContactConstraintPool.size();
			
 
				+			b3SolverConstraint& solverConstraint = m_tmpSolverContactConstraintPool.expandNonInitializing();
			
 
				+			//			b3RigidBody* rb0 = b3RigidBody::upcast(colObj0);
			
 
				+			//			b3RigidBody* rb1 = b3RigidBody::upcast(colObj1);
			
 
				+			solverConstraint.m_solverBodyIdA = solverBodyIdA;
			
 
				+			solverConstraint.m_solverBodyIdB = solverBodyIdB;
			
 
				+
			
 
				+			solverConstraint.m_originalContactPoint = &cp;
			
 
				+
			
 
				+			setupContactConstraint(bodies, inertias, solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, vel, rel_vel, relaxation, rel_pos1, rel_pos2);
			
 
				+
			
 
				+			//			const b3Vector3& pos1 = cp.getPositionWorldOnA();
			
 
				+			//			const b3Vector3& pos2 = cp.getPositionWorldOnB();
			
 
				+
			
 
				+			/////setup the friction constraints
			
 
				+
			
 
				+			solverConstraint.m_frictionIndex = m_tmpSolverContactFrictionConstraintPool.size();
			
 
				+
			
 
				+			b3Vector3 angVelA, angVelB;
			
 
				+			solverBodyA->getAngularVelocity(angVelA);
			
 
				+			solverBodyB->getAngularVelocity(angVelB);
			
 
				+			b3Vector3 relAngVel = angVelB - angVelA;
			
 
				+
			
 
				+			if ((cp.m_combinedRollingFriction > 0.f) && (rollingFriction > 0))
			
 
				+			{
			
 
				+				//only a single rollingFriction per manifold
			
 
				+				rollingFriction--;
			
 
				+				if (relAngVel.length() > infoGlobal.m_singleAxisRollingFrictionThreshold)
			
 
				+				{
			
 
				+					relAngVel.normalize();
			
 
				+					if (relAngVel.length() > 0.001)
			
 
				+						addRollingFrictionConstraint(bodies, inertias, relAngVel, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					addRollingFrictionConstraint(bodies, inertias, cp.m_normalWorldOnB, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+					b3Vector3 axis0, axis1;
			
 
				+					b3PlaneSpace1(cp.m_normalWorldOnB, axis0, axis1);
			
 
				+					if (axis0.length() > 0.001)
			
 
				+						addRollingFrictionConstraint(bodies, inertias, axis0, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+					if (axis1.length() > 0.001)
			
 
				+						addRollingFrictionConstraint(bodies, inertias, axis1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			///Bullet has several options to set the friction directions
			
 
				+			///By default, each contact has only a single friction direction that is recomputed automatically very frame
			
 
				+			///based on the relative linear velocity.
			
 
				+			///If the relative velocity it zero, it will automatically compute a friction direction.
			
 
				+
			
 
				+			///You can also enable two friction directions, using the B3_SOLVER_USE_2_FRICTION_DIRECTIONS.
			
 
				+			///In that case, the second friction direction will be orthogonal to both contact normal and first friction direction.
			
 
				+			///
			
 
				+			///If you choose B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION, then the friction will be independent from the relative projected velocity.
			
 
				+			///
			
 
				+			///The user can manually override the friction directions for certain contacts using a contact callback,
			
 
				+			///and set the cp.m_lateralFrictionInitialized to true
			
 
				+			///In that case, you can set the target relative motion in each friction direction (cp.m_contactMotion1 and cp.m_contactMotion2)
			
 
				+			///this will give a conveyor belt effect
			
 
				+			///
			
 
				+			if (!(infoGlobal.m_solverMode & B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized)
			
 
				+			{
			
 
				+				cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel;
			
 
				+				b3Scalar lat_rel_vel = cp.m_lateralFrictionDir1.length2();
			
 
				+				if (!(infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > B3_EPSILON)
			
 
				+				{
			
 
				+					cp.m_lateralFrictionDir1 *= 1.f / b3Sqrt(lat_rel_vel);
			
 
				+					if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
			
 
				+					{
			
 
				+						cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB);
			
 
				+						cp.m_lateralFrictionDir2.normalize();  //??
			
 
				+						addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+					}
			
 
				+
			
 
				+					addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					b3PlaneSpace1(cp.m_normalWorldOnB, cp.m_lateralFrictionDir1, cp.m_lateralFrictionDir2);
			
 
				+
			
 
				+					if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
			
 
				+					{
			
 
				+						addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+					}
			
 
				+
			
 
				+					addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
			
 
				+
			
 
				+					if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION))
			
 
				+					{
			
 
				+						cp.m_lateralFrictionInitialized = true;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, cp.m_contactMotion1, cp.m_contactCFM1);
			
 
				+
			
 
				+				if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
			
 
				+					addFrictionConstraint(bodies, inertias, cp.m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, cp.m_contactMotion2, cp.m_contactCFM2);
			
 
				+
			
 
				+				setFrictionConstraintImpulse(bodies, inertias, solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	B3_PROFILE("solveGroupCacheFriendlySetup");
			
 
				+
			
 
				+	m_maxOverrideNumSolverIterations = 0;
			
 
				+
			
 
				+	m_tmpSolverBodyPool.resize(0);
			
 
				+
			
 
				+	m_bodyCount.resize(0);
			
 
				+	m_bodyCount.resize(numBodies, 0);
			
 
				+	m_bodyCountCheck.resize(0);
			
 
				+	m_bodyCountCheck.resize(numBodies, 0);
			
 
				+
			
 
				+	m_deltaLinearVelocities.resize(0);
			
 
				+	m_deltaLinearVelocities.resize(numBodies, b3MakeVector3(0, 0, 0));
			
 
				+	m_deltaAngularVelocities.resize(0);
			
 
				+	m_deltaAngularVelocities.resize(numBodies, b3MakeVector3(0, 0, 0));
			
 
				+
			
 
				+	//int totalBodies = 0;
			
 
				+
			
 
				+	for (int i = 0; i < numConstraints; i++)
			
 
				+	{
			
 
				+		int bodyIndexA = constraints[i]->getRigidBodyA();
			
 
				+		int bodyIndexB = constraints[i]->getRigidBodyB();
			
 
				+		if (m_usePgs)
			
 
				+		{
			
 
				+			m_bodyCount[bodyIndexA] = -1;
			
 
				+			m_bodyCount[bodyIndexB] = -1;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			//didn't implement joints with Jacobi version yet
			
 
				+			b3Assert(0);
			
 
				+		}
			
 
				+	}
			
 
				+	for (int i = 0; i < numManifolds; i++)
			
 
				+	{
			
 
				+		int bodyIndexA = manifoldPtr[i].getBodyA();
			
 
				+		int bodyIndexB = manifoldPtr[i].getBodyB();
			
 
				+		if (m_usePgs)
			
 
				+		{
			
 
				+			m_bodyCount[bodyIndexA] = -1;
			
 
				+			m_bodyCount[bodyIndexB] = -1;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (bodies[bodyIndexA].m_invMass)
			
 
				+			{
			
 
				+				//m_bodyCount[bodyIndexA]+=manifoldPtr[i].getNPoints();
			
 
				+				m_bodyCount[bodyIndexA]++;
			
 
				+			}
			
 
				+			else
			
 
				+				m_bodyCount[bodyIndexA] = -1;
			
 
				+
			
 
				+			if (bodies[bodyIndexB].m_invMass)
			
 
				+				//	m_bodyCount[bodyIndexB]+=manifoldPtr[i].getNPoints();
			
 
				+				m_bodyCount[bodyIndexB]++;
			
 
				+			else
			
 
				+				m_bodyCount[bodyIndexB] = -1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (1)
			
 
				+	{
			
 
				+		int j;
			
 
				+		for (j = 0; j < numConstraints; j++)
			
 
				+		{
			
 
				+			b3TypedConstraint* constraint = constraints[j];
			
 
				+
			
 
				+			constraint->internalSetAppliedImpulse(0.0f);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//b3RigidBody* rb0=0,*rb1=0;
			
 
				+	//if (1)
			
 
				+	{
			
 
				+		{
			
 
				+			int totalNumRows = 0;
			
 
				+			int i;
			
 
				+
			
 
				+			m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
			
 
				+			//calculate the total number of contraint rows
			
 
				+			for (i = 0; i < numConstraints; i++)
			
 
				+			{
			
 
				+				b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
			
 
				+				b3JointFeedback* fb = constraints[i]->getJointFeedback();
			
 
				+				if (fb)
			
 
				+				{
			
 
				+					fb->m_appliedForceBodyA.setZero();
			
 
				+					fb->m_appliedTorqueBodyA.setZero();
			
 
				+					fb->m_appliedForceBodyB.setZero();
			
 
				+					fb->m_appliedTorqueBodyB.setZero();
			
 
				+				}
			
 
				+
			
 
				+				if (constraints[i]->isEnabled())
			
 
				+				{
			
 
				+				}
			
 
				+				if (constraints[i]->isEnabled())
			
 
				+				{
			
 
				+					constraints[i]->getInfo1(&info1, bodies);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					info1.m_numConstraintRows = 0;
			
 
				+					info1.nub = 0;
			
 
				+				}
			
 
				+				totalNumRows += info1.m_numConstraintRows;
			
 
				+			}
			
 
				+			m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
			
 
				+
			
 
				+#ifndef DISABLE_JOINTS
			
 
				+			///setup the b3SolverConstraints
			
 
				+			int currentRow = 0;
			
 
				+
			
 
				+			for (i = 0; i < numConstraints; i++)
			
 
				+			{
			
 
				+				const b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
			
 
				+
			
 
				+				if (info1.m_numConstraintRows)
			
 
				+				{
			
 
				+					b3Assert(currentRow < totalNumRows);
			
 
				+
			
 
				+					b3SolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
			
 
				+					b3TypedConstraint* constraint = constraints[i];
			
 
				+
			
 
				+					b3RigidBodyData& rbA = bodies[constraint->getRigidBodyA()];
			
 
				+					//b3RigidBody& rbA = constraint->getRigidBodyA();
			
 
				+					//				b3RigidBody& rbB = constraint->getRigidBodyB();
			
 
				+					b3RigidBodyData& rbB = bodies[constraint->getRigidBodyB()];
			
 
				+
			
 
				+					int solverBodyIdA = getOrInitSolverBody(constraint->getRigidBodyA(), bodies, inertias);
			
 
				+					int solverBodyIdB = getOrInitSolverBody(constraint->getRigidBodyB(), bodies, inertias);
			
 
				+
			
 
				+					b3SolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
			
 
				+					b3SolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
			
 
				+
			
 
				+					int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
			
 
				+					if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations)
			
 
				+						m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
			
 
				+
			
 
				+					int j;
			
 
				+					for (j = 0; j < info1.m_numConstraintRows; j++)
			
 
				+					{
			
 
				+						memset(&currentConstraintRow[j], 0, sizeof(b3SolverConstraint));
			
 
				+						currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;
			
 
				+						currentConstraintRow[j].m_upperLimit = B3_INFINITY;
			
 
				+						currentConstraintRow[j].m_appliedImpulse = 0.f;
			
 
				+						currentConstraintRow[j].m_appliedPushImpulse = 0.f;
			
 
				+						currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
			
 
				+						currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
			
 
				+						currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
			
 
				+					}
			
 
				+
			
 
				+					bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+					bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);
			
 
				+
			
 
				+					b3TypedConstraint::b3ConstraintInfo2 info2;
			
 
				+					info2.fps = 1.f / infoGlobal.m_timeStep;
			
 
				+					info2.erp = infoGlobal.m_erp;
			
 
				+					info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
			
 
				+					info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
			
 
				+					info2.m_J2linearAxis = 0;
			
 
				+					info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
			
 
				+					info2.rowskip = sizeof(b3SolverConstraint) / sizeof(b3Scalar);  //check this
			
 
				+																					///the size of b3SolverConstraint needs be a multiple of b3Scalar
			
 
				+					b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3SolverConstraint));
			
 
				+					info2.m_constraintError = &currentConstraintRow->m_rhs;
			
 
				+					currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
			
 
				+					info2.m_damping = infoGlobal.m_damping;
			
 
				+					info2.cfm = &currentConstraintRow->m_cfm;
			
 
				+					info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
			
 
				+					info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
			
 
				+					info2.m_numIterations = infoGlobal.m_numIterations;
			
 
				+					constraints[i]->getInfo2(&info2, bodies);
			
 
				+
			
 
				+					///finalize the constraint setup
			
 
				+					for (j = 0; j < info1.m_numConstraintRows; j++)
			
 
				+					{
			
 
				+						b3SolverConstraint& solverConstraint = currentConstraintRow[j];
			
 
				+
			
 
				+						if (solverConstraint.m_upperLimit >= constraints[i]->getBreakingImpulseThreshold())
			
 
				+						{
			
 
				+							solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
			
 
				+						}
			
 
				+
			
 
				+						if (solverConstraint.m_lowerLimit <= -constraints[i]->getBreakingImpulseThreshold())
			
 
				+						{
			
 
				+							solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
			
 
				+						}
			
 
				+
			
 
				+						solverConstraint.m_originalContactPoint = constraint;
			
 
				+
			
 
				+						b3Matrix3x3& invInertiaWorldA = inertias[constraint->getRigidBodyA()].m_invInertiaWorld;
			
 
				+						{
			
 
				+							//b3Vector3 angularFactorA(1,1,1);
			
 
				+							const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
			
 
				+							solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1;  //*angularFactorA;
			
 
				+						}
			
 
				+
			
 
				+						b3Matrix3x3& invInertiaWorldB = inertias[constraint->getRigidBodyB()].m_invInertiaWorld;
			
 
				+						{
			
 
				+							const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
			
 
				+							solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2;  //*constraint->getRigidBodyB().getAngularFactor();
			
 
				+						}
			
 
				+
			
 
				+						{
			
 
				+							//it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal
			
 
				+							//because it gets multiplied iMJlB
			
 
				+							b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass;
			
 
				+							b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal;
			
 
				+							b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass;  //sign of normal?
			
 
				+							b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal;
			
 
				+
			
 
				+							b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
			
 
				+							sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
			
 
				+							sum += iMJlB.dot(solverConstraint.m_contactNormal);
			
 
				+							sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
			
 
				+							b3Scalar fsum = b3Fabs(sum);
			
 
				+							b3Assert(fsum > B3_EPSILON);
			
 
				+							solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f;
			
 
				+						}
			
 
				+
			
 
				+						///fix rhs
			
 
				+						///todo: add force/torque accelerators
			
 
				+						{
			
 
				+							b3Scalar rel_vel;
			
 
				+							b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel);
			
 
				+							b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel);
			
 
				+
			
 
				+							rel_vel = vel1Dotn + vel2Dotn;
			
 
				+
			
 
				+							b3Scalar restitution = 0.f;
			
 
				+							b3Scalar positionalError = solverConstraint.m_rhs;  //already filled in by getConstraintInfo2
			
 
				+							b3Scalar velocityError = restitution - rel_vel * info2.m_damping;
			
 
				+							b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv;
			
 
				+							b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv;
			
 
				+							solverConstraint.m_rhs = penetrationImpulse + velocityImpulse;
			
 
				+							solverConstraint.m_appliedImpulse = 0.f;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				currentRow += m_tmpConstraintSizesPool[i].m_numConstraintRows;
			
 
				+			}
			
 
				+#endif  //DISABLE_JOINTS
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			int i;
			
 
				+
			
 
				+			for (i = 0; i < numManifolds; i++)
			
 
				+			{
			
 
				+				b3Contact4& manifold = manifoldPtr[i];
			
 
				+				convertContact(bodies, inertias, &manifold, infoGlobal);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//	b3ContactSolverInfo info = infoGlobal;
			
 
				+
			
 
				+	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
			
 
				+	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
			
 
				+	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
			
 
				+
			
 
				+	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
			
 
				+	m_orderNonContactConstraintPool.resizeNoInitialize(numNonContactPool);
			
 
				+	if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
			
 
				+		m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool * 2);
			
 
				+	else
			
 
				+		m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool);
			
 
				+
			
 
				+	m_orderFrictionConstraintPool.resizeNoInitialize(numFrictionPool);
			
 
				+	{
			
 
				+		int i;
			
 
				+		for (i = 0; i < numNonContactPool; i++)
			
 
				+		{
			
 
				+			m_orderNonContactConstraintPool[i] = i;
			
 
				+		}
			
 
				+		for (i = 0; i < numConstraintPool; i++)
			
 
				+		{
			
 
				+			m_orderTmpConstraintPool[i] = i;
			
 
				+		}
			
 
				+		for (i = 0; i < numFrictionPool; i++)
			
 
				+		{
			
 
				+			m_orderFrictionConstraintPool[i] = i;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 0.f;
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3PgsJacobiSolver::solveSingleIteration(int iteration, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
			
 
				+	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
			
 
				+	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
			
 
				+
			
 
				+	if (infoGlobal.m_solverMode & B3_SOLVER_RANDMIZE_ORDER)
			
 
				+	{
			
 
				+		if (1)  // uncomment this for a bit less random ((iteration & 7) == 0)
			
 
				+		{
			
 
				+			for (int j = 0; j < numNonContactPool; ++j)
			
 
				+			{
			
 
				+				int tmp = m_orderNonContactConstraintPool[j];
			
 
				+				int swapi = b3RandInt2(j + 1);
			
 
				+				m_orderNonContactConstraintPool[j] = m_orderNonContactConstraintPool[swapi];
			
 
				+				m_orderNonContactConstraintPool[swapi] = tmp;
			
 
				+			}
			
 
				+
			
 
				+			//contact/friction constraints are not solved more than
			
 
				+			if (iteration < infoGlobal.m_numIterations)
			
 
				+			{
			
 
				+				for (int j = 0; j < numConstraintPool; ++j)
			
 
				+				{
			
 
				+					int tmp = m_orderTmpConstraintPool[j];
			
 
				+					int swapi = b3RandInt2(j + 1);
			
 
				+					m_orderTmpConstraintPool[j] = m_orderTmpConstraintPool[swapi];
			
 
				+					m_orderTmpConstraintPool[swapi] = tmp;
			
 
				+				}
			
 
				+
			
 
				+				for (int j = 0; j < numFrictionPool; ++j)
			
 
				+				{
			
 
				+					int tmp = m_orderFrictionConstraintPool[j];
			
 
				+					int swapi = b3RandInt2(j + 1);
			
 
				+					m_orderFrictionConstraintPool[j] = m_orderFrictionConstraintPool[swapi];
			
 
				+					m_orderFrictionConstraintPool[swapi] = tmp;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (infoGlobal.m_solverMode & B3_SOLVER_SIMD)
			
 
				+	{
			
 
				+		///solve all joint constraints, using SIMD, if available
			
 
				+		for (int j = 0; j < m_tmpSolverNonContactConstraintPool.size(); j++)
			
 
				+		{
			
 
				+			b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
			
 
				+			if (iteration < constraint.m_overrideNumSolverIterations)
			
 
				+				resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA], m_tmpSolverBodyPool[constraint.m_solverBodyIdB], constraint);
			
 
				+		}
			
 
				+
			
 
				+		if (iteration < infoGlobal.m_numIterations)
			
 
				+		{
			
 
				+			///solve all contact constraints using SIMD, if available
			
 
				+			if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS)
			
 
				+			{
			
 
				+				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
			
 
				+				int multiplier = (infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) ? 2 : 1;
			
 
				+
			
 
				+				for (int c = 0; c < numPoolConstraints; c++)
			
 
				+				{
			
 
				+					b3Scalar totalImpulse = 0;
			
 
				+
			
 
				+					{
			
 
				+						const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[c]];
			
 
				+						resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+						totalImpulse = solveManifold.m_appliedImpulse;
			
 
				+					}
			
 
				+					bool applyFriction = true;
			
 
				+					if (applyFriction)
			
 
				+					{
			
 
				+						{
			
 
				+							b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c * multiplier]];
			
 
				+
			
 
				+							if (totalImpulse > b3Scalar(0))
			
 
				+							{
			
 
				+								solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse);
			
 
				+								solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse;
			
 
				+
			
 
				+								resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+							}
			
 
				+						}
			
 
				+
			
 
				+						if (infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS)
			
 
				+						{
			
 
				+							b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c * multiplier + 1]];
			
 
				+
			
 
				+							if (totalImpulse > b3Scalar(0))
			
 
				+							{
			
 
				+								solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse);
			
 
				+								solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse;
			
 
				+
			
 
				+								resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			else  //B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS
			
 
				+			{
			
 
				+				//solve the friction constraints after all contact constraints, don't interleave them
			
 
				+				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
			
 
				+				int j;
			
 
				+
			
 
				+				for (j = 0; j < numPoolConstraints; j++)
			
 
				+				{
			
 
				+					const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
			
 
				+					resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+				}
			
 
				+
			
 
				+				if (!m_usePgs)
			
 
				+					averageVelocities();
			
 
				+
			
 
				+				///solve all friction constraints, using SIMD, if available
			
 
				+
			
 
				+				int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
			
 
				+				for (j = 0; j < numFrictionPoolConstraints; j++)
			
 
				+				{
			
 
				+					b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
			
 
				+					b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
			
 
				+
			
 
				+					if (totalImpulse > b3Scalar(0))
			
 
				+					{
			
 
				+						solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse);
			
 
				+						solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse;
			
 
				+
			
 
				+						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size();
			
 
				+				for (j = 0; j < numRollingFrictionPoolConstraints; j++)
			
 
				+				{
			
 
				+					b3SolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j];
			
 
				+					b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse;
			
 
				+					if (totalImpulse > b3Scalar(0))
			
 
				+					{
			
 
				+						b3Scalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction * totalImpulse;
			
 
				+						if (rollingFrictionMagnitude > rollingFrictionConstraint.m_friction)
			
 
				+							rollingFrictionMagnitude = rollingFrictionConstraint.m_friction;
			
 
				+
			
 
				+						rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude;
			
 
				+						rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude;
			
 
				+
			
 
				+						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA], m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB], rollingFrictionConstraint);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		//non-SIMD version
			
 
				+		///solve all joint constraints
			
 
				+		for (int j = 0; j < m_tmpSolverNonContactConstraintPool.size(); j++)
			
 
				+		{
			
 
				+			b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
			
 
				+			if (iteration < constraint.m_overrideNumSolverIterations)
			
 
				+				resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[constraint.m_solverBodyIdA], m_tmpSolverBodyPool[constraint.m_solverBodyIdB], constraint);
			
 
				+		}
			
 
				+
			
 
				+		if (iteration < infoGlobal.m_numIterations)
			
 
				+		{
			
 
				+			///solve all contact constraints
			
 
				+			int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
			
 
				+			for (int j = 0; j < numPoolConstraints; j++)
			
 
				+			{
			
 
				+				const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
			
 
				+				resolveSingleConstraintRowLowerLimit(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+			}
			
 
				+			///solve all friction constraints
			
 
				+			int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
			
 
				+			for (int j = 0; j < numFrictionPoolConstraints; j++)
			
 
				+			{
			
 
				+				b3SolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
			
 
				+				b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
			
 
				+
			
 
				+				if (totalImpulse > b3Scalar(0))
			
 
				+				{
			
 
				+					solveManifold.m_lowerLimit = -(solveManifold.m_friction * totalImpulse);
			
 
				+					solveManifold.m_upperLimit = solveManifold.m_friction * totalImpulse;
			
 
				+
			
 
				+					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size();
			
 
				+			for (int j = 0; j < numRollingFrictionPoolConstraints; j++)
			
 
				+			{
			
 
				+				b3SolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j];
			
 
				+				b3Scalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse;
			
 
				+				if (totalImpulse > b3Scalar(0))
			
 
				+				{
			
 
				+					b3Scalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction * totalImpulse;
			
 
				+					if (rollingFrictionMagnitude > rollingFrictionConstraint.m_friction)
			
 
				+						rollingFrictionMagnitude = rollingFrictionConstraint.m_friction;
			
 
				+
			
 
				+					rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude;
			
 
				+					rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude;
			
 
				+
			
 
				+					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA], m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB], rollingFrictionConstraint);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return 0.f;
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::solveGroupCacheFriendlySplitImpulseIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	int iteration;
			
 
				+	if (infoGlobal.m_splitImpulse)
			
 
				+	{
			
 
				+		if (infoGlobal.m_solverMode & B3_SOLVER_SIMD)
			
 
				+		{
			
 
				+			for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++)
			
 
				+			{
			
 
				+				{
			
 
				+					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
			
 
				+					int j;
			
 
				+					for (j = 0; j < numPoolConstraints; j++)
			
 
				+					{
			
 
				+						const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
			
 
				+
			
 
				+						resolveSplitPenetrationSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++)
			
 
				+			{
			
 
				+				{
			
 
				+					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
			
 
				+					int j;
			
 
				+					for (j = 0; j < numPoolConstraints; j++)
			
 
				+					{
			
 
				+						const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
			
 
				+
			
 
				+						resolveSplitPenetrationImpulseCacheFriendly(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	B3_PROFILE("solveGroupCacheFriendlyIterations");
			
 
				+
			
 
				+	{
			
 
				+		///this is a special step to resolve penetrations (just for contacts)
			
 
				+		solveGroupCacheFriendlySplitImpulseIterations(constraints, numConstraints, infoGlobal);
			
 
				+
			
 
				+		int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations ? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations;
			
 
				+
			
 
				+		for (int iteration = 0; iteration < maxIterations; iteration++)
			
 
				+		//for ( int iteration = maxIterations-1  ; iteration >= 0;iteration--)
			
 
				+		{
			
 
				+			solveSingleIteration(iteration, constraints, numConstraints, infoGlobal);
			
 
				+
			
 
				+			if (!m_usePgs)
			
 
				+			{
			
 
				+				averageVelocities();
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return 0.f;
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::averageVelocities()
			
 
				+{
			
 
				+	B3_PROFILE("averaging");
			
 
				+	//average the velocities
			
 
				+	int numBodies = m_bodyCount.size();
			
 
				+
			
 
				+	m_deltaLinearVelocities.resize(0);
			
 
				+	m_deltaLinearVelocities.resize(numBodies, b3MakeVector3(0, 0, 0));
			
 
				+	m_deltaAngularVelocities.resize(0);
			
 
				+	m_deltaAngularVelocities.resize(numBodies, b3MakeVector3(0, 0, 0));
			
 
				+
			
 
				+	for (int i = 0; i < m_tmpSolverBodyPool.size(); i++)
			
 
				+	{
			
 
				+		if (!m_tmpSolverBodyPool[i].m_invMass.isZero())
			
 
				+		{
			
 
				+			int orgBodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
			
 
				+			m_deltaLinearVelocities[orgBodyIndex] += m_tmpSolverBodyPool[i].getDeltaLinearVelocity();
			
 
				+			m_deltaAngularVelocities[orgBodyIndex] += m_tmpSolverBodyPool[i].getDeltaAngularVelocity();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for (int i = 0; i < m_tmpSolverBodyPool.size(); i++)
			
 
				+	{
			
 
				+		int orgBodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
			
 
				+
			
 
				+		if (!m_tmpSolverBodyPool[i].m_invMass.isZero())
			
 
				+		{
			
 
				+			b3Assert(m_bodyCount[orgBodyIndex] == m_bodyCountCheck[orgBodyIndex]);
			
 
				+
			
 
				+			b3Scalar factor = 1.f / b3Scalar(m_bodyCount[orgBodyIndex]);
			
 
				+
			
 
				+			m_tmpSolverBodyPool[i].m_deltaLinearVelocity = m_deltaLinearVelocities[orgBodyIndex] * factor;
			
 
				+			m_tmpSolverBodyPool[i].m_deltaAngularVelocity = m_deltaAngularVelocities[orgBodyIndex] * factor;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3PgsJacobiSolver::solveGroupCacheFriendlyFinish(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, const b3ContactSolverInfo& infoGlobal)
			
 
				+{
			
 
				+	B3_PROFILE("solveGroupCacheFriendlyFinish");
			
 
				+	int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
			
 
				+	int i, j;
			
 
				+
			
 
				+	if (infoGlobal.m_solverMode & B3_SOLVER_USE_WARMSTARTING)
			
 
				+	{
			
 
				+		for (j = 0; j < numPoolConstraints; j++)
			
 
				+		{
			
 
				+			const b3SolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
			
 
				+			b3ContactPoint* pt = (b3ContactPoint*)solveManifold.m_originalContactPoint;
			
 
				+			b3Assert(pt);
			
 
				+			pt->m_appliedImpulse = solveManifold.m_appliedImpulse;
			
 
				+			//	float f = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
			
 
				+			//	printf("pt->m_appliedImpulseLateral1 = %f\n", f);
			
 
				+			pt->m_appliedImpulseLateral1 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
			
 
				+			//printf("pt->m_appliedImpulseLateral1 = %f\n", pt->m_appliedImpulseLateral1);
			
 
				+			if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
			
 
				+			{
			
 
				+				pt->m_appliedImpulseLateral2 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex + 1].m_appliedImpulse;
			
 
				+			}
			
 
				+			//do a callback here?
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	numPoolConstraints = m_tmpSolverNonContactConstraintPool.size();
			
 
				+	for (j = 0; j < numPoolConstraints; j++)
			
 
				+	{
			
 
				+		const b3SolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
			
 
				+		b3TypedConstraint* constr = (b3TypedConstraint*)solverConstr.m_originalContactPoint;
			
 
				+		b3JointFeedback* fb = constr->getJointFeedback();
			
 
				+		if (fb)
			
 
				+		{
			
 
				+			b3SolverBody* bodyA = &m_tmpSolverBodyPool[solverConstr.m_solverBodyIdA];
			
 
				+			b3SolverBody* bodyB = &m_tmpSolverBodyPool[solverConstr.m_solverBodyIdB];
			
 
				+
			
 
				+			fb->m_appliedForceBodyA += solverConstr.m_contactNormal * solverConstr.m_appliedImpulse * bodyA->m_linearFactor / infoGlobal.m_timeStep;
			
 
				+			fb->m_appliedForceBodyB += -solverConstr.m_contactNormal * solverConstr.m_appliedImpulse * bodyB->m_linearFactor / infoGlobal.m_timeStep;
			
 
				+			fb->m_appliedTorqueBodyA += solverConstr.m_relpos1CrossNormal * bodyA->m_angularFactor * solverConstr.m_appliedImpulse / infoGlobal.m_timeStep;
			
 
				+			fb->m_appliedTorqueBodyB += -solverConstr.m_relpos1CrossNormal * bodyB->m_angularFactor * solverConstr.m_appliedImpulse / infoGlobal.m_timeStep;
			
 
				+		}
			
 
				+
			
 
				+		constr->internalSetAppliedImpulse(solverConstr.m_appliedImpulse);
			
 
				+		if (b3Fabs(solverConstr.m_appliedImpulse) >= constr->getBreakingImpulseThreshold())
			
 
				+		{
			
 
				+			constr->setEnabled(false);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		B3_PROFILE("write back velocities and transforms");
			
 
				+		for (i = 0; i < m_tmpSolverBodyPool.size(); i++)
			
 
				+		{
			
 
				+			int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
			
 
				+			//b3Assert(i==bodyIndex);
			
 
				+
			
 
				+			b3RigidBodyData* body = &bodies[bodyIndex];
			
 
				+			if (body->m_invMass)
			
 
				+			{
			
 
				+				if (infoGlobal.m_splitImpulse)
			
 
				+					m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp);
			
 
				+				else
			
 
				+					m_tmpSolverBodyPool[i].writebackVelocity();
			
 
				+
			
 
				+				if (m_usePgs)
			
 
				+				{
			
 
				+					body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity;
			
 
				+					body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					b3Scalar factor = 1.f / b3Scalar(m_bodyCount[bodyIndex]);
			
 
				+
			
 
				+					b3Vector3 deltaLinVel = m_deltaLinearVelocities[bodyIndex] * factor;
			
 
				+					b3Vector3 deltaAngVel = m_deltaAngularVelocities[bodyIndex] * factor;
			
 
				+					//printf("body %d\n",bodyIndex);
			
 
				+					//printf("deltaLinVel = %f,%f,%f\n",deltaLinVel.getX(),deltaLinVel.getY(),deltaLinVel.getZ());
			
 
				+					//printf("deltaAngVel = %f,%f,%f\n",deltaAngVel.getX(),deltaAngVel.getY(),deltaAngVel.getZ());
			
 
				+
			
 
				+					body->m_linVel += deltaLinVel;
			
 
				+					body->m_angVel += deltaAngVel;
			
 
				+				}
			
 
				+
			
 
				+				if (infoGlobal.m_splitImpulse)
			
 
				+				{
			
 
				+					body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin();
			
 
				+					b3Quaternion orn;
			
 
				+					orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation();
			
 
				+					body->m_quat = orn;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
			
 
				+	m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);
			
 
				+	m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0);
			
 
				+	m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0);
			
 
				+
			
 
				+	m_tmpSolverBodyPool.resizeNoInitialize(0);
			
 
				+	return 0.f;
			
 
				+}
			
 
				+
			
 
				+void b3PgsJacobiSolver::reset()
			
 
				+{
			
 
				+	m_btSeed2 = 0;
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h
@@ -0,0 +1,133 @@
 
				+#ifndef B3_PGS_JACOBI_SOLVER
			
 
				+#define B3_PGS_JACOBI_SOLVER
			
 
				+
			
 
				+struct b3Contact4;
			
 
				+struct b3ContactPoint;
			
 
				+
			
 
				+class b3Dispatcher;
			
 
				+
			
 
				+#include "b3TypedConstraint.h"
			
 
				+#include "b3ContactSolverInfo.h"
			
 
				+#include "b3SolverBody.h"
			
 
				+#include "b3SolverConstraint.h"
			
 
				+
			
 
				+struct b3RigidBodyData;
			
 
				+struct b3InertiaData;
			
 
				+
			
 
				+class b3PgsJacobiSolver
			
 
				+{
			
 
				+protected:
			
 
				+	b3AlignedObjectArray<b3SolverBody> m_tmpSolverBodyPool;
			
 
				+	b3ConstraintArray m_tmpSolverContactConstraintPool;
			
 
				+	b3ConstraintArray m_tmpSolverNonContactConstraintPool;
			
 
				+	b3ConstraintArray m_tmpSolverContactFrictionConstraintPool;
			
 
				+	b3ConstraintArray m_tmpSolverContactRollingFrictionConstraintPool;
			
 
				+
			
 
				+	b3AlignedObjectArray<int> m_orderTmpConstraintPool;
			
 
				+	b3AlignedObjectArray<int> m_orderNonContactConstraintPool;
			
 
				+	b3AlignedObjectArray<int> m_orderFrictionConstraintPool;
			
 
				+	b3AlignedObjectArray<b3TypedConstraint::b3ConstraintInfo1> m_tmpConstraintSizesPool;
			
 
				+
			
 
				+	b3AlignedObjectArray<int> m_bodyCount;
			
 
				+	b3AlignedObjectArray<int> m_bodyCountCheck;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocities;
			
 
				+	b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocities;
			
 
				+
			
 
				+	bool m_usePgs;
			
 
				+	void averageVelocities();
			
 
				+
			
 
				+	int m_maxOverrideNumSolverIterations;
			
 
				+
			
 
				+	int m_numSplitImpulseRecoveries;
			
 
				+
			
 
				+	b3Scalar getContactProcessingThreshold(b3Contact4* contact)
			
 
				+	{
			
 
				+		return 0.02f;
			
 
				+	}
			
 
				+	void setupFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB,
			
 
				+								 b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2,
			
 
				+								 b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation,
			
 
				+								 b3Scalar desiredVelocity = 0., b3Scalar cfmSlip = 0.);
			
 
				+
			
 
				+	void setupRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB,
			
 
				+										b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2,
			
 
				+										b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation,
			
 
				+										b3Scalar desiredVelocity = 0., b3Scalar cfmSlip = 0.);
			
 
				+
			
 
				+	b3SolverConstraint& addFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity = 0., b3Scalar cfmSlip = 0.);
			
 
				+	b3SolverConstraint& addRollingFrictionConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias, const b3Vector3& normalAxis, int solverBodyIdA, int solverBodyIdB, int frictionIndex, b3ContactPoint& cp, const b3Vector3& rel_pos1, const b3Vector3& rel_pos2, b3RigidBodyData* colObj0, b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity = 0, b3Scalar cfmSlip = 0.f);
			
 
				+
			
 
				+	void setupContactConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias,
			
 
				+								b3SolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, b3ContactPoint& cp,
			
 
				+								const b3ContactSolverInfo& infoGlobal, b3Vector3& vel, b3Scalar& rel_vel, b3Scalar& relaxation,
			
 
				+								b3Vector3& rel_pos1, b3Vector3& rel_pos2);
			
 
				+
			
 
				+	void setFrictionConstraintImpulse(b3RigidBodyData* bodies, b3InertiaData* inertias, b3SolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB,
			
 
				+									  b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
			
 
				+	unsigned long m_btSeed2;
			
 
				+
			
 
				+	b3Scalar restitutionCurve(b3Scalar rel_vel, b3Scalar restitution);
			
 
				+
			
 
				+	void convertContact(b3RigidBodyData* bodies, b3InertiaData* inertias, b3Contact4* manifold, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	void resolveSplitPenetrationSIMD(
			
 
				+		b3SolverBody& bodyA, b3SolverBody& bodyB,
			
 
				+		const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	void resolveSplitPenetrationImpulseCacheFriendly(
			
 
				+		b3SolverBody& bodyA, b3SolverBody& bodyB,
			
 
				+		const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	//internal method
			
 
				+	int getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies, b3InertiaData* inertias);
			
 
				+	void initSolverBody(int bodyIndex, b3SolverBody* solverBody, b3RigidBodyData* collisionObject);
			
 
				+
			
 
				+	void resolveSingleConstraintRowGeneric(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	void resolveSingleConstraintRowGenericSIMD(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	void resolveSingleConstraintRowLowerLimit(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	void resolveSingleConstraintRowLowerLimitSIMD(b3SolverBody& bodyA, b3SolverBody& bodyB, const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+protected:
			
 
				+	virtual b3Scalar solveGroupCacheFriendlySetup(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal);
			
 
				+	virtual void solveGroupCacheFriendlySplitImpulseIterations(b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal);
			
 
				+	b3Scalar solveSingleIteration(int iteration, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	virtual b3Scalar solveGroupCacheFriendlyFinish(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3PgsJacobiSolver(bool usePgs);
			
 
				+	virtual ~b3PgsJacobiSolver();
			
 
				+
			
 
				+	//	void	solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts);
			
 
				+	void solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts, int numConstraints, b3TypedConstraint** constraints);
			
 
				+
			
 
				+	b3Scalar solveGroup(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, b3TypedConstraint** constraints, int numConstraints, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	///clear internal cached data and reset random seed
			
 
				+	virtual void reset();
			
 
				+
			
 
				+	unsigned long b3Rand2();
			
 
				+
			
 
				+	int b3RandInt2(int n);
			
 
				+
			
 
				+	void setRandSeed(unsigned long seed)
			
 
				+	{
			
 
				+		m_btSeed2 = seed;
			
 
				+	}
			
 
				+	unsigned long getRandSeed() const
			
 
				+	{
			
 
				+		return m_btSeed2;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_PGS_JACOBI_SOLVER
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp
@@ -0,0 +1,190 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#include "b3Point2PointConstraint.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+#include <new>
			
 
				+
			
 
				+b3Point2PointConstraint::b3Point2PointConstraint(int rbA, int rbB, const b3Vector3& pivotInA, const b3Vector3& pivotInB)
			
 
				+	: b3TypedConstraint(B3_POINT2POINT_CONSTRAINT_TYPE, rbA, rbB), m_pivotInA(pivotInA), m_pivotInB(pivotInB), m_flags(0)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+b3Point2PointConstraint::b3Point2PointConstraint(int  rbA,const b3Vector3& pivotInA)
			
 
				+:b3TypedConstraint(B3_POINT2POINT_CONSTRAINT_TYPE,rbA),m_pivotInA(pivotInA),m_pivotInB(rbA.getCenterOfMassTransform()(pivotInA)),
			
 
				+m_flags(0),
			
 
				+m_useSolveConstraintObsolete(false)
			
 
				+{
			
 
				+	
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+void b3Point2PointConstraint::getInfo1(b3ConstraintInfo1* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	getInfo1NonVirtual(info, bodies);
			
 
				+}
			
 
				+
			
 
				+void b3Point2PointConstraint::getInfo1NonVirtual(b3ConstraintInfo1* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	info->m_numConstraintRows = 3;
			
 
				+	info->nub = 3;
			
 
				+}
			
 
				+
			
 
				+void b3Point2PointConstraint::getInfo2(b3ConstraintInfo2* info, const b3RigidBodyData* bodies)
			
 
				+{
			
 
				+	b3Transform trA;
			
 
				+	trA.setIdentity();
			
 
				+	trA.setOrigin(bodies[m_rbA].m_pos);
			
 
				+	trA.setRotation(bodies[m_rbA].m_quat);
			
 
				+
			
 
				+	b3Transform trB;
			
 
				+	trB.setIdentity();
			
 
				+	trB.setOrigin(bodies[m_rbB].m_pos);
			
 
				+	trB.setRotation(bodies[m_rbB].m_quat);
			
 
				+
			
 
				+	getInfo2NonVirtual(info, trA, trB);
			
 
				+}
			
 
				+
			
 
				+void b3Point2PointConstraint::getInfo2NonVirtual(b3ConstraintInfo2* info, const b3Transform& body0_trans, const b3Transform& body1_trans)
			
 
				+{
			
 
				+	//retrieve matrices
			
 
				+
			
 
				+	// anchor points in global coordinates with respect to body PORs.
			
 
				+
			
 
				+	// set jacobian
			
 
				+	info->m_J1linearAxis[0] = 1;
			
 
				+	info->m_J1linearAxis[info->rowskip + 1] = 1;
			
 
				+	info->m_J1linearAxis[2 * info->rowskip + 2] = 1;
			
 
				+
			
 
				+	b3Vector3 a1 = body0_trans.getBasis() * getPivotInA();
			
 
				+	//b3Vector3 a1a = b3QuatRotate(body0_trans.getRotation(),getPivotInA());
			
 
				+
			
 
				+	{
			
 
				+		b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis);
			
 
				+		b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis + info->rowskip);
			
 
				+		b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis + 2 * info->rowskip);
			
 
				+		b3Vector3 a1neg = -a1;
			
 
				+		a1neg.getSkewSymmetricMatrix(angular0, angular1, angular2);
			
 
				+	}
			
 
				+
			
 
				+	if (info->m_J2linearAxis)
			
 
				+	{
			
 
				+		info->m_J2linearAxis[0] = -1;
			
 
				+		info->m_J2linearAxis[info->rowskip + 1] = -1;
			
 
				+		info->m_J2linearAxis[2 * info->rowskip + 2] = -1;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3 a2 = body1_trans.getBasis() * getPivotInB();
			
 
				+
			
 
				+	{
			
 
				+		//	b3Vector3 a2n = -a2;
			
 
				+		b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis);
			
 
				+		b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis + info->rowskip);
			
 
				+		b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis + 2 * info->rowskip);
			
 
				+		a2.getSkewSymmetricMatrix(angular0, angular1, angular2);
			
 
				+	}
			
 
				+
			
 
				+	// set right hand side
			
 
				+	b3Scalar currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;
			
 
				+	b3Scalar k = info->fps * currERP;
			
 
				+	int j;
			
 
				+	for (j = 0; j < 3; j++)
			
 
				+	{
			
 
				+		info->m_constraintError[j * info->rowskip] = k * (a2[j] + body1_trans.getOrigin()[j] - a1[j] - body0_trans.getOrigin()[j]);
			
 
				+		//printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]);
			
 
				+	}
			
 
				+	if (m_flags & B3_P2P_FLAGS_CFM)
			
 
				+	{
			
 
				+		for (j = 0; j < 3; j++)
			
 
				+		{
			
 
				+			info->cfm[j * info->rowskip] = m_cfm;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar impulseClamp = m_setting.m_impulseClamp;  //
			
 
				+	for (j = 0; j < 3; j++)
			
 
				+	{
			
 
				+		if (m_setting.m_impulseClamp > 0)
			
 
				+		{
			
 
				+			info->m_lowerLimit[j * info->rowskip] = -impulseClamp;
			
 
				+			info->m_upperLimit[j * info->rowskip] = impulseClamp;
			
 
				+		}
			
 
				+	}
			
 
				+	info->m_damping = m_setting.m_damping;
			
 
				+}
			
 
				+
			
 
				+void b3Point2PointConstraint::updateRHS(b3Scalar timeStep)
			
 
				+{
			
 
				+	(void)timeStep;
			
 
				+}
			
 
				+
			
 
				+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
			
 
				+///If no axis is provided, it uses the default axis for this constraint.
			
 
				+void b3Point2PointConstraint::setParam(int num, b3Scalar value, int axis)
			
 
				+{
			
 
				+	if (axis != -1)
			
 
				+	{
			
 
				+		b3AssertConstrParams(0);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		switch (num)
			
 
				+		{
			
 
				+			case B3_CONSTRAINT_ERP:
			
 
				+			case B3_CONSTRAINT_STOP_ERP:
			
 
				+				m_erp = value;
			
 
				+				m_flags |= B3_P2P_FLAGS_ERP;
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_CFM:
			
 
				+			case B3_CONSTRAINT_STOP_CFM:
			
 
				+				m_cfm = value;
			
 
				+				m_flags |= B3_P2P_FLAGS_CFM;
			
 
				+				break;
			
 
				+			default:
			
 
				+				b3AssertConstrParams(0);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+///return the local value of parameter
			
 
				+b3Scalar b3Point2PointConstraint::getParam(int num, int axis) const
			
 
				+{
			
 
				+	b3Scalar retVal(B3_INFINITY);
			
 
				+	if (axis != -1)
			
 
				+	{
			
 
				+		b3AssertConstrParams(0);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		switch (num)
			
 
				+		{
			
 
				+			case B3_CONSTRAINT_ERP:
			
 
				+			case B3_CONSTRAINT_STOP_ERP:
			
 
				+				b3AssertConstrParams(m_flags & B3_P2P_FLAGS_ERP);
			
 
				+				retVal = m_erp;
			
 
				+				break;
			
 
				+			case B3_CONSTRAINT_CFM:
			
 
				+			case B3_CONSTRAINT_STOP_CFM:
			
 
				+				b3AssertConstrParams(m_flags & B3_P2P_FLAGS_CFM);
			
 
				+				retVal = m_cfm;
			
 
				+				break;
			
 
				+			default:
			
 
				+				b3AssertConstrParams(0);
			
 
				+		}
			
 
				+	}
			
 
				+	return retVal;
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h
@@ -0,0 +1,153 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_POINT2POINTCONSTRAINT_H
			
 
				+#define B3_POINT2POINTCONSTRAINT_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+//#include "b3JacobianEntry.h"
			
 
				+#include "b3TypedConstraint.h"
			
 
				+
			
 
				+class b3RigidBody;
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3Point2PointConstraintData b3Point2PointConstraintDoubleData
			
 
				+#define b3Point2PointConstraintDataName "b3Point2PointConstraintDoubleData"
			
 
				+#else
			
 
				+#define b3Point2PointConstraintData b3Point2PointConstraintFloatData
			
 
				+#define b3Point2PointConstraintDataName "b3Point2PointConstraintFloatData"
			
 
				+#endif  //B3_USE_DOUBLE_PRECISION
			
 
				+
			
 
				+struct b3ConstraintSetting
			
 
				+{
			
 
				+	b3ConstraintSetting() : m_tau(b3Scalar(0.3)),
			
 
				+							m_damping(b3Scalar(1.)),
			
 
				+							m_impulseClamp(b3Scalar(0.))
			
 
				+	{
			
 
				+	}
			
 
				+	b3Scalar m_tau;
			
 
				+	b3Scalar m_damping;
			
 
				+	b3Scalar m_impulseClamp;
			
 
				+};
			
 
				+
			
 
				+enum b3Point2PointFlags
			
 
				+{
			
 
				+	B3_P2P_FLAGS_ERP = 1,
			
 
				+	B3_P2P_FLAGS_CFM = 2
			
 
				+};
			
 
				+
			
 
				+/// point to point constraint between two rigidbodies each with a pivotpoint that descibes the 'ballsocket' location in local space
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3Point2PointConstraint : public b3TypedConstraint
			
 
				+{
			
 
				+#ifdef IN_PARALLELL_SOLVER
			
 
				+public:
			
 
				+#endif
			
 
				+
			
 
				+	b3Vector3 m_pivotInA;
			
 
				+	b3Vector3 m_pivotInB;
			
 
				+
			
 
				+	int m_flags;
			
 
				+	b3Scalar m_erp;
			
 
				+	b3Scalar m_cfm;
			
 
				+
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3ConstraintSetting m_setting;
			
 
				+
			
 
				+	b3Point2PointConstraint(int rbA, int rbB, const b3Vector3& pivotInA, const b3Vector3& pivotInB);
			
 
				+
			
 
				+	//b3Point2PointConstraint(int  rbA,const b3Vector3& pivotInA);
			
 
				+
			
 
				+	virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo1NonVirtual(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo2NonVirtual(b3ConstraintInfo2 * info, const b3Transform& body0_trans, const b3Transform& body1_trans);
			
 
				+
			
 
				+	void updateRHS(b3Scalar timeStep);
			
 
				+
			
 
				+	void setPivotA(const b3Vector3& pivotA)
			
 
				+	{
			
 
				+		m_pivotInA = pivotA;
			
 
				+	}
			
 
				+
			
 
				+	void setPivotB(const b3Vector3& pivotB)
			
 
				+	{
			
 
				+		m_pivotInB = pivotB;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getPivotInA() const
			
 
				+	{
			
 
				+		return m_pivotInA;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getPivotInB() const
			
 
				+	{
			
 
				+		return m_pivotInB;
			
 
				+	}
			
 
				+
			
 
				+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
			
 
				+	///If no axis is provided, it uses the default axis for this constraint.
			
 
				+	virtual void setParam(int num, b3Scalar value, int axis = -1);
			
 
				+	///return the local value of parameter
			
 
				+	virtual b3Scalar getParam(int num, int axis = -1) const;
			
 
				+
			
 
				+	//	virtual	int	calculateSerializeBufferSize() const;
			
 
				+
			
 
				+	///fills the dataBuffer and returns the struct name (and 0 on failure)
			
 
				+	//	virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const;
			
 
				+};
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct b3Point2PointConstraintFloatData
			
 
				+{
			
 
				+	b3TypedConstraintData m_typeConstraintData;
			
 
				+	b3Vector3FloatData m_pivotInA;
			
 
				+	b3Vector3FloatData m_pivotInB;
			
 
				+};
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct b3Point2PointConstraintDoubleData
			
 
				+{
			
 
				+	b3TypedConstraintData m_typeConstraintData;
			
 
				+	b3Vector3DoubleData m_pivotInA;
			
 
				+	b3Vector3DoubleData m_pivotInB;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+B3_FORCE_INLINE	int	b3Point2PointConstraint::calculateSerializeBufferSize() const
			
 
				+{
			
 
				+	return sizeof(b3Point2PointConstraintData);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+	///fills the dataBuffer and returns the struct name (and 0 on failure)
			
 
				+B3_FORCE_INLINE	const char*	b3Point2PointConstraint::serialize(void* dataBuffer, b3Serializer* serializer) const
			
 
				+{
			
 
				+	b3Point2PointConstraintData* p2pData = (b3Point2PointConstraintData*)dataBuffer;
			
 
				+
			
 
				+	b3TypedConstraint::serialize(&p2pData->m_typeConstraintData,serializer);
			
 
				+	m_pivotInA.serialize(p2pData->m_pivotInA);
			
 
				+	m_pivotInB.serialize(p2pData->m_pivotInB);
			
 
				+
			
 
				+	return b3Point2PointConstraintDataName;
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+#endif  //B3_POINT2POINTCONSTRAINT_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
@@ -0,0 +1,281 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SOLVER_BODY_H
			
 
				+#define B3_SOLVER_BODY_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedAllocator.h"
			
 
				+#include "Bullet3Common/b3TransformUtil.h"
			
 
				+
			
 
				+///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision
			
 
				+#ifdef B3_USE_SSE
			
 
				+#define USE_SIMD 1
			
 
				+#endif  //
			
 
				+
			
 
				+#ifdef USE_SIMD
			
 
				+
			
 
				+struct b3SimdScalar
			
 
				+{
			
 
				+	B3_FORCE_INLINE b3SimdScalar()
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3SimdScalar(float fl)
			
 
				+		: m_vec128(_mm_set1_ps(fl))
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3SimdScalar(__m128 v128)
			
 
				+		: m_vec128(v128)
			
 
				+	{
			
 
				+	}
			
 
				+	union {
			
 
				+		__m128 m_vec128;
			
 
				+		float m_floats[4];
			
 
				+		float x, y, z, w;
			
 
				+		int m_ints[4];
			
 
				+		b3Scalar m_unusedPadding;
			
 
				+	};
			
 
				+	B3_FORCE_INLINE __m128 get128()
			
 
				+	{
			
 
				+		return m_vec128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE const __m128 get128() const
			
 
				+	{
			
 
				+		return m_vec128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void set128(__m128 v128)
			
 
				+	{
			
 
				+		m_vec128 = v128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE operator __m128()
			
 
				+	{
			
 
				+		return m_vec128;
			
 
				+	}
			
 
				+	B3_FORCE_INLINE operator const __m128() const
			
 
				+	{
			
 
				+		return m_vec128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE operator float() const
			
 
				+	{
			
 
				+		return m_floats[0];
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///@brief Return the elementwise product of two b3SimdScalar
			
 
				+B3_FORCE_INLINE b3SimdScalar
			
 
				+operator*(const b3SimdScalar& v1, const b3SimdScalar& v2)
			
 
				+{
			
 
				+	return b3SimdScalar(_mm_mul_ps(v1.get128(), v2.get128()));
			
 
				+}
			
 
				+
			
 
				+///@brief Return the elementwise product of two b3SimdScalar
			
 
				+B3_FORCE_INLINE b3SimdScalar
			
 
				+operator+(const b3SimdScalar& v1, const b3SimdScalar& v2)
			
 
				+{
			
 
				+	return b3SimdScalar(_mm_add_ps(v1.get128(), v2.get128()));
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+#define b3SimdScalar b3Scalar
			
 
				+#endif
			
 
				+
			
 
				+///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3SolverBody
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+	b3Transform m_worldTransform;
			
 
				+	b3Vector3 m_deltaLinearVelocity;
			
 
				+	b3Vector3 m_deltaAngularVelocity;
			
 
				+	b3Vector3 m_angularFactor;
			
 
				+	b3Vector3 m_linearFactor;
			
 
				+	b3Vector3 m_invMass;
			
 
				+	b3Vector3 m_pushVelocity;
			
 
				+	b3Vector3 m_turnVelocity;
			
 
				+	b3Vector3 m_linearVelocity;
			
 
				+	b3Vector3 m_angularVelocity;
			
 
				+
			
 
				+	union {
			
 
				+		void* m_originalBody;
			
 
				+		int m_originalBodyIndex;
			
 
				+	};
			
 
				+
			
 
				+	int padding[3];
			
 
				+
			
 
				+	void setWorldTransform(const b3Transform& worldTransform)
			
 
				+	{
			
 
				+		m_worldTransform = worldTransform;
			
 
				+	}
			
 
				+
			
 
				+	const b3Transform& getWorldTransform() const
			
 
				+	{
			
 
				+		return m_worldTransform;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+			velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos);
			
 
				+		else
			
 
				+			velocity.setValue(0, 0, 0);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void getAngularVelocity(b3Vector3 & angVel) const
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+			angVel = m_angularVelocity + m_deltaAngularVelocity;
			
 
				+		else
			
 
				+			angVel.setValue(0, 0, 0);
			
 
				+	}
			
 
				+
			
 
				+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
			
 
				+	B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude)
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+		{
			
 
				+			m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor;
			
 
				+			m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, b3Scalar impulseMagnitude)
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+		{
			
 
				+			m_pushVelocity += linearComponent * impulseMagnitude * m_linearFactor;
			
 
				+			m_turnVelocity += angularComponent * (impulseMagnitude * m_angularFactor);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getDeltaLinearVelocity() const
			
 
				+	{
			
 
				+		return m_deltaLinearVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getDeltaAngularVelocity() const
			
 
				+	{
			
 
				+		return m_deltaAngularVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getPushVelocity() const
			
 
				+	{
			
 
				+		return m_pushVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getTurnVelocity() const
			
 
				+	{
			
 
				+		return m_turnVelocity;
			
 
				+	}
			
 
				+
			
 
				+	////////////////////////////////////////////////
			
 
				+	///some internal methods, don't use them
			
 
				+
			
 
				+	b3Vector3& internalGetDeltaLinearVelocity()
			
 
				+	{
			
 
				+		return m_deltaLinearVelocity;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3& internalGetDeltaAngularVelocity()
			
 
				+	{
			
 
				+		return m_deltaAngularVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& internalGetAngularFactor() const
			
 
				+	{
			
 
				+		return m_angularFactor;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& internalGetInvMass() const
			
 
				+	{
			
 
				+		return m_invMass;
			
 
				+	}
			
 
				+
			
 
				+	void internalSetInvMass(const b3Vector3& invMass)
			
 
				+	{
			
 
				+		m_invMass = invMass;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3& internalGetPushVelocity()
			
 
				+	{
			
 
				+		return m_pushVelocity;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3& internalGetTurnVelocity()
			
 
				+	{
			
 
				+		return m_turnVelocity;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const
			
 
				+	{
			
 
				+		velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void internalGetAngularVelocity(b3Vector3 & angVel) const
			
 
				+	{
			
 
				+		angVel = m_angularVelocity + m_deltaAngularVelocity;
			
 
				+	}
			
 
				+
			
 
				+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
			
 
				+	B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude)
			
 
				+	{
			
 
				+		//if (m_originalBody)
			
 
				+		{
			
 
				+			m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor;
			
 
				+			m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	void writebackVelocity()
			
 
				+	{
			
 
				+		//if (m_originalBody>=0)
			
 
				+		{
			
 
				+			m_linearVelocity += m_deltaLinearVelocity;
			
 
				+			m_angularVelocity += m_deltaAngularVelocity;
			
 
				+
			
 
				+			//m_originalBody->setCompanionId(-1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	void writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp)
			
 
				+	{
			
 
				+		(void)timeStep;
			
 
				+		if (m_originalBody)
			
 
				+		{
			
 
				+			m_linearVelocity += m_deltaLinearVelocity;
			
 
				+			m_angularVelocity += m_deltaAngularVelocity;
			
 
				+
			
 
				+			//correct the position/orientation based on push/turn recovery
			
 
				+			b3Transform newTransform;
			
 
				+			if (m_pushVelocity[0] != 0.f || m_pushVelocity[1] != 0 || m_pushVelocity[2] != 0 || m_turnVelocity[0] != 0.f || m_turnVelocity[1] != 0 || m_turnVelocity[2] != 0)
			
 
				+			{
			
 
				+				//	b3Quaternion orn = m_worldTransform.getRotation();
			
 
				+				b3TransformUtil::integrateTransform(m_worldTransform, m_pushVelocity, m_turnVelocity * splitImpulseTurnErp, timeStep, newTransform);
			
 
				+				m_worldTransform = newTransform;
			
 
				+			}
			
 
				+			//m_worldTransform.setRotation(orn);
			
 
				+			//m_originalBody->setCompanionId(-1);
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_SOLVER_BODY_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h
@@ -0,0 +1,73 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SOLVER_CONSTRAINT_H
			
 
				+#define B3_SOLVER_CONSTRAINT_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+//#include "b3JacobianEntry.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+//#define NO_FRICTION_TANGENTIALS 1
			
 
				+#include "b3SolverBody.h"
			
 
				+
			
 
				+///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3SolverConstraint
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3Vector3 m_relpos1CrossNormal;
			
 
				+	b3Vector3 m_contactNormal;
			
 
				+
			
 
				+	b3Vector3 m_relpos2CrossNormal;
			
 
				+	//b3Vector3		m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal
			
 
				+
			
 
				+	b3Vector3 m_angularComponentA;
			
 
				+	b3Vector3 m_angularComponentB;
			
 
				+
			
 
				+	mutable b3SimdScalar m_appliedPushImpulse;
			
 
				+	mutable b3SimdScalar m_appliedImpulse;
			
 
				+	int m_padding1;
			
 
				+	int m_padding2;
			
 
				+	b3Scalar m_friction;
			
 
				+	b3Scalar m_jacDiagABInv;
			
 
				+	b3Scalar m_rhs;
			
 
				+	b3Scalar m_cfm;
			
 
				+
			
 
				+	b3Scalar m_lowerLimit;
			
 
				+	b3Scalar m_upperLimit;
			
 
				+	b3Scalar m_rhsPenetration;
			
 
				+	union {
			
 
				+		void* m_originalContactPoint;
			
 
				+		b3Scalar m_unusedPadding4;
			
 
				+	};
			
 
				+
			
 
				+	int m_overrideNumSolverIterations;
			
 
				+	int m_frictionIndex;
			
 
				+	int m_solverBodyIdA;
			
 
				+	int m_solverBodyIdB;
			
 
				+
			
 
				+	enum b3SolverConstraintType
			
 
				+	{
			
 
				+		B3_SOLVER_CONTACT_1D = 0,
			
 
				+		B3_SOLVER_FRICTION_1D
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+typedef b3AlignedObjectArray<b3SolverConstraint> b3ConstraintArray;
			
 
				+
			
 
				+#endif  //B3_SOLVER_CONSTRAINT_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp
@@ -0,0 +1,151 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#include "b3TypedConstraint.h"
			
 
				+//#include "Bullet3Common/b3Serializer.h"
			
 
				+
			
 
				+#define B3_DEFAULT_DEBUGDRAW_SIZE b3Scalar(0.3f)
			
 
				+
			
 
				+b3TypedConstraint::b3TypedConstraint(b3TypedConstraintType type, int rbA, int rbB)
			
 
				+	: b3TypedObject(type),
			
 
				+	  m_userConstraintType(-1),
			
 
				+	  m_userConstraintPtr((void*)-1),
			
 
				+	  m_breakingImpulseThreshold(B3_INFINITY),
			
 
				+	  m_isEnabled(true),
			
 
				+	  m_needsFeedback(false),
			
 
				+	  m_overrideNumSolverIterations(-1),
			
 
				+	  m_rbA(rbA),
			
 
				+	  m_rbB(rbB),
			
 
				+	  m_appliedImpulse(b3Scalar(0.)),
			
 
				+	  m_dbgDrawSize(B3_DEFAULT_DEBUGDRAW_SIZE),
			
 
				+	  m_jointFeedback(0)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3TypedConstraint::getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact)
			
 
				+{
			
 
				+	if (lowLim > uppLim)
			
 
				+	{
			
 
				+		return b3Scalar(1.0f);
			
 
				+	}
			
 
				+	else if (lowLim == uppLim)
			
 
				+	{
			
 
				+		return b3Scalar(0.0f);
			
 
				+	}
			
 
				+	b3Scalar lim_fact = b3Scalar(1.0f);
			
 
				+	b3Scalar delta_max = vel / timeFact;
			
 
				+	if (delta_max < b3Scalar(0.0f))
			
 
				+	{
			
 
				+		if ((pos >= lowLim) && (pos < (lowLim - delta_max)))
			
 
				+		{
			
 
				+			lim_fact = (lowLim - pos) / delta_max;
			
 
				+		}
			
 
				+		else if (pos < lowLim)
			
 
				+		{
			
 
				+			lim_fact = b3Scalar(0.0f);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			lim_fact = b3Scalar(1.0f);
			
 
				+		}
			
 
				+	}
			
 
				+	else if (delta_max > b3Scalar(0.0f))
			
 
				+	{
			
 
				+		if ((pos <= uppLim) && (pos > (uppLim - delta_max)))
			
 
				+		{
			
 
				+			lim_fact = (uppLim - pos) / delta_max;
			
 
				+		}
			
 
				+		else if (pos > uppLim)
			
 
				+		{
			
 
				+			lim_fact = b3Scalar(0.0f);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			lim_fact = b3Scalar(1.0f);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		lim_fact = b3Scalar(0.0f);
			
 
				+	}
			
 
				+	return lim_fact;
			
 
				+}
			
 
				+
			
 
				+void b3AngularLimit::set(b3Scalar low, b3Scalar high, b3Scalar _softness, b3Scalar _biasFactor, b3Scalar _relaxationFactor)
			
 
				+{
			
 
				+	m_halfRange = (high - low) / 2.0f;
			
 
				+	m_center = b3NormalizeAngle(low + m_halfRange);
			
 
				+	m_softness = _softness;
			
 
				+	m_biasFactor = _biasFactor;
			
 
				+	m_relaxationFactor = _relaxationFactor;
			
 
				+}
			
 
				+
			
 
				+void b3AngularLimit::test(const b3Scalar angle)
			
 
				+{
			
 
				+	m_correction = 0.0f;
			
 
				+	m_sign = 0.0f;
			
 
				+	m_solveLimit = false;
			
 
				+
			
 
				+	if (m_halfRange >= 0.0f)
			
 
				+	{
			
 
				+		b3Scalar deviation = b3NormalizeAngle(angle - m_center);
			
 
				+		if (deviation < -m_halfRange)
			
 
				+		{
			
 
				+			m_solveLimit = true;
			
 
				+			m_correction = -(deviation + m_halfRange);
			
 
				+			m_sign = +1.0f;
			
 
				+		}
			
 
				+		else if (deviation > m_halfRange)
			
 
				+		{
			
 
				+			m_solveLimit = true;
			
 
				+			m_correction = m_halfRange - deviation;
			
 
				+			m_sign = -1.0f;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3AngularLimit::getError() const
			
 
				+{
			
 
				+	return m_correction * m_sign;
			
 
				+}
			
 
				+
			
 
				+void b3AngularLimit::fit(b3Scalar& angle) const
			
 
				+{
			
 
				+	if (m_halfRange > 0.0f)
			
 
				+	{
			
 
				+		b3Scalar relativeAngle = b3NormalizeAngle(angle - m_center);
			
 
				+		if (!b3Equal(relativeAngle, m_halfRange))
			
 
				+		{
			
 
				+			if (relativeAngle > 0.0f)
			
 
				+			{
			
 
				+				angle = getHigh();
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				angle = getLow();
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3AngularLimit::getLow() const
			
 
				+{
			
 
				+	return b3NormalizeAngle(m_center - m_halfRange);
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3AngularLimit::getHigh() const
			
 
				+{
			
 
				+	return b3NormalizeAngle(m_center + m_halfRange);
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h
@@ -0,0 +1,469 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2010 Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_TYPED_CONSTRAINT_H
			
 
				+#define B3_TYPED_CONSTRAINT_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+#include "b3SolverConstraint.h"
			
 
				+
			
 
				+class b3Serializer;
			
 
				+
			
 
				+//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
			
 
				+enum b3TypedConstraintType
			
 
				+{
			
 
				+	B3_POINT2POINT_CONSTRAINT_TYPE = 3,
			
 
				+	B3_HINGE_CONSTRAINT_TYPE,
			
 
				+	B3_CONETWIST_CONSTRAINT_TYPE,
			
 
				+	B3_D6_CONSTRAINT_TYPE,
			
 
				+	B3_SLIDER_CONSTRAINT_TYPE,
			
 
				+	B3_CONTACT_CONSTRAINT_TYPE,
			
 
				+	B3_D6_SPRING_CONSTRAINT_TYPE,
			
 
				+	B3_GEAR_CONSTRAINT_TYPE,
			
 
				+	B3_FIXED_CONSTRAINT_TYPE,
			
 
				+	B3_MAX_CONSTRAINT_TYPE
			
 
				+};
			
 
				+
			
 
				+enum b3ConstraintParams
			
 
				+{
			
 
				+	B3_CONSTRAINT_ERP = 1,
			
 
				+	B3_CONSTRAINT_STOP_ERP,
			
 
				+	B3_CONSTRAINT_CFM,
			
 
				+	B3_CONSTRAINT_STOP_CFM
			
 
				+};
			
 
				+
			
 
				+#if 1
			
 
				+#define b3AssertConstrParams(_par) b3Assert(_par)
			
 
				+#else
			
 
				+#define b3AssertConstrParams(_par)
			
 
				+#endif
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)
			
 
				+b3JointFeedback
			
 
				+{
			
 
				+	b3Vector3 m_appliedForceBodyA;
			
 
				+	b3Vector3 m_appliedTorqueBodyA;
			
 
				+	b3Vector3 m_appliedForceBodyB;
			
 
				+	b3Vector3 m_appliedTorqueBodyB;
			
 
				+};
			
 
				+
			
 
				+struct b3RigidBodyData;
			
 
				+
			
 
				+///TypedConstraint is the baseclass for Bullet constraints and vehicles
			
 
				+B3_ATTRIBUTE_ALIGNED16(class)
			
 
				+b3TypedConstraint : public b3TypedObject
			
 
				+{
			
 
				+	int m_userConstraintType;
			
 
				+
			
 
				+	union {
			
 
				+		int m_userConstraintId;
			
 
				+		void* m_userConstraintPtr;
			
 
				+	};
			
 
				+
			
 
				+	b3Scalar m_breakingImpulseThreshold;
			
 
				+	bool m_isEnabled;
			
 
				+	bool m_needsFeedback;
			
 
				+	int m_overrideNumSolverIterations;
			
 
				+
			
 
				+	b3TypedConstraint& operator=(b3TypedConstraint& other)
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+		(void)other;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+protected:
			
 
				+	int m_rbA;
			
 
				+	int m_rbB;
			
 
				+	b3Scalar m_appliedImpulse;
			
 
				+	b3Scalar m_dbgDrawSize;
			
 
				+	b3JointFeedback* m_jointFeedback;
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	b3Scalar getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact);
			
 
				+
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	virtual ~b3TypedConstraint(){};
			
 
				+	b3TypedConstraint(b3TypedConstraintType type, int bodyA, int bodyB);
			
 
				+
			
 
				+	struct b3ConstraintInfo1
			
 
				+	{
			
 
				+		int m_numConstraintRows, nub;
			
 
				+	};
			
 
				+
			
 
				+	struct b3ConstraintInfo2
			
 
				+	{
			
 
				+		// integrator parameters: frames per second (1/stepsize), default error
			
 
				+		// reduction parameter (0..1).
			
 
				+		b3Scalar fps, erp;
			
 
				+
			
 
				+		// for the first and second body, pointers to two (linear and angular)
			
 
				+		// n*3 jacobian sub matrices, stored by rows. these matrices will have
			
 
				+		// been initialized to 0 on entry. if the second body is zero then the
			
 
				+		// J2xx pointers may be 0.
			
 
				+		b3Scalar *m_J1linearAxis, *m_J1angularAxis, *m_J2linearAxis, *m_J2angularAxis;
			
 
				+
			
 
				+		// elements to jump from one row to the next in J's
			
 
				+		int rowskip;
			
 
				+
			
 
				+		// right hand sides of the equation J*v = c + cfm * lambda. cfm is the
			
 
				+		// "constraint force mixing" vector. c is set to zero on entry, cfm is
			
 
				+		// set to a constant value (typically very small or zero) value on entry.
			
 
				+		b3Scalar *m_constraintError, *cfm;
			
 
				+
			
 
				+		// lo and hi limits for variables (set to -/+ infinity on entry).
			
 
				+		b3Scalar *m_lowerLimit, *m_upperLimit;
			
 
				+
			
 
				+		// findex vector for variables. see the LCP solver interface for a
			
 
				+		// description of what this does. this is set to -1 on entry.
			
 
				+		// note that the returned indexes are relative to the first index of
			
 
				+		// the constraint.
			
 
				+		int* findex;
			
 
				+		// number of solver iterations
			
 
				+		int m_numIterations;
			
 
				+
			
 
				+		//damping of the velocity
			
 
				+		b3Scalar m_damping;
			
 
				+	};
			
 
				+
			
 
				+	int getOverrideNumSolverIterations() const
			
 
				+	{
			
 
				+		return m_overrideNumSolverIterations;
			
 
				+	}
			
 
				+
			
 
				+	///override the number of constraint solver iterations used to solve this constraint
			
 
				+	///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
			
 
				+	void setOverrideNumSolverIterations(int overideNumIterations)
			
 
				+	{
			
 
				+		m_overrideNumSolverIterations = overideNumIterations;
			
 
				+	}
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual void setupSolverConstraint(b3ConstraintArray & ca, int solverBodyA, int solverBodyB, b3Scalar timeStep)
			
 
				+	{
			
 
				+		(void)ca;
			
 
				+		(void)solverBodyA;
			
 
				+		(void)solverBodyB;
			
 
				+		(void)timeStep;
			
 
				+	}
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual void getInfo1(b3ConstraintInfo1 * info, const b3RigidBodyData* bodies) = 0;
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual void getInfo2(b3ConstraintInfo2 * info, const b3RigidBodyData* bodies) = 0;
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	void internalSetAppliedImpulse(b3Scalar appliedImpulse)
			
 
				+	{
			
 
				+		m_appliedImpulse = appliedImpulse;
			
 
				+	}
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	b3Scalar internalGetAppliedImpulse()
			
 
				+	{
			
 
				+		return m_appliedImpulse;
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar getBreakingImpulseThreshold() const
			
 
				+	{
			
 
				+		return m_breakingImpulseThreshold;
			
 
				+	}
			
 
				+
			
 
				+	void setBreakingImpulseThreshold(b3Scalar threshold)
			
 
				+	{
			
 
				+		m_breakingImpulseThreshold = threshold;
			
 
				+	}
			
 
				+
			
 
				+	bool isEnabled() const
			
 
				+	{
			
 
				+		return m_isEnabled;
			
 
				+	}
			
 
				+
			
 
				+	void setEnabled(bool enabled)
			
 
				+	{
			
 
				+		m_isEnabled = enabled;
			
 
				+	}
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual void solveConstraintObsolete(b3SolverBody& /*bodyA*/, b3SolverBody& /*bodyB*/, b3Scalar /*timeStep*/){};
			
 
				+
			
 
				+	int getRigidBodyA() const
			
 
				+	{
			
 
				+		return m_rbA;
			
 
				+	}
			
 
				+	int getRigidBodyB() const
			
 
				+	{
			
 
				+		return m_rbB;
			
 
				+	}
			
 
				+
			
 
				+	int getRigidBodyA()
			
 
				+	{
			
 
				+		return m_rbA;
			
 
				+	}
			
 
				+	int getRigidBodyB()
			
 
				+	{
			
 
				+		return m_rbB;
			
 
				+	}
			
 
				+
			
 
				+	int getUserConstraintType() const
			
 
				+	{
			
 
				+		return m_userConstraintType;
			
 
				+	}
			
 
				+
			
 
				+	void setUserConstraintType(int userConstraintType)
			
 
				+	{
			
 
				+		m_userConstraintType = userConstraintType;
			
 
				+	};
			
 
				+
			
 
				+	void setUserConstraintId(int uid)
			
 
				+	{
			
 
				+		m_userConstraintId = uid;
			
 
				+	}
			
 
				+
			
 
				+	int getUserConstraintId() const
			
 
				+	{
			
 
				+		return m_userConstraintId;
			
 
				+	}
			
 
				+
			
 
				+	void setUserConstraintPtr(void* ptr)
			
 
				+	{
			
 
				+		m_userConstraintPtr = ptr;
			
 
				+	}
			
 
				+
			
 
				+	void* getUserConstraintPtr()
			
 
				+	{
			
 
				+		return m_userConstraintPtr;
			
 
				+	}
			
 
				+
			
 
				+	void setJointFeedback(b3JointFeedback * jointFeedback)
			
 
				+	{
			
 
				+		m_jointFeedback = jointFeedback;
			
 
				+	}
			
 
				+
			
 
				+	const b3JointFeedback* getJointFeedback() const
			
 
				+	{
			
 
				+		return m_jointFeedback;
			
 
				+	}
			
 
				+
			
 
				+	b3JointFeedback* getJointFeedback()
			
 
				+	{
			
 
				+		return m_jointFeedback;
			
 
				+	}
			
 
				+
			
 
				+	int getUid() const
			
 
				+	{
			
 
				+		return m_userConstraintId;
			
 
				+	}
			
 
				+
			
 
				+	bool needsFeedback() const
			
 
				+	{
			
 
				+		return m_needsFeedback;
			
 
				+	}
			
 
				+
			
 
				+	///enableFeedback will allow to read the applied linear and angular impulse
			
 
				+	///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information
			
 
				+	void enableFeedback(bool needsFeedback)
			
 
				+	{
			
 
				+		m_needsFeedback = needsFeedback;
			
 
				+	}
			
 
				+
			
 
				+	///getAppliedImpulse is an estimated total applied impulse.
			
 
				+	///This feedback could be used to determine breaking constraints or playing sounds.
			
 
				+	b3Scalar getAppliedImpulse() const
			
 
				+	{
			
 
				+		b3Assert(m_needsFeedback);
			
 
				+		return m_appliedImpulse;
			
 
				+	}
			
 
				+
			
 
				+	b3TypedConstraintType getConstraintType() const
			
 
				+	{
			
 
				+		return b3TypedConstraintType(m_objectType);
			
 
				+	}
			
 
				+
			
 
				+	void setDbgDrawSize(b3Scalar dbgDrawSize)
			
 
				+	{
			
 
				+		m_dbgDrawSize = dbgDrawSize;
			
 
				+	}
			
 
				+	b3Scalar getDbgDrawSize()
			
 
				+	{
			
 
				+		return m_dbgDrawSize;
			
 
				+	}
			
 
				+
			
 
				+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
			
 
				+	///If no axis is provided, it uses the default axis for this constraint.
			
 
				+	virtual void setParam(int num, b3Scalar value, int axis = -1) = 0;
			
 
				+
			
 
				+	///return the local value of parameter
			
 
				+	virtual b3Scalar getParam(int num, int axis = -1) const = 0;
			
 
				+
			
 
				+	//	virtual	int	calculateSerializeBufferSize() const;
			
 
				+
			
 
				+	///fills the dataBuffer and returns the struct name (and 0 on failure)
			
 
				+	//virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const;
			
 
				+};
			
 
				+
			
 
				+// returns angle in range [-B3_2_PI, B3_2_PI], closest to one of the limits
			
 
				+// all arguments should be normalized angles (i.e. in range [-B3_PI, B3_PI])
			
 
				+B3_FORCE_INLINE b3Scalar b3AdjustAngleToLimits(b3Scalar angleInRadians, b3Scalar angleLowerLimitInRadians, b3Scalar angleUpperLimitInRadians)
			
 
				+{
			
 
				+	if (angleLowerLimitInRadians >= angleUpperLimitInRadians)
			
 
				+	{
			
 
				+		return angleInRadians;
			
 
				+	}
			
 
				+	else if (angleInRadians < angleLowerLimitInRadians)
			
 
				+	{
			
 
				+		b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleLowerLimitInRadians - angleInRadians));
			
 
				+		b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleUpperLimitInRadians - angleInRadians));
			
 
				+		return (diffLo < diffHi) ? angleInRadians : (angleInRadians + B3_2_PI);
			
 
				+	}
			
 
				+	else if (angleInRadians > angleUpperLimitInRadians)
			
 
				+	{
			
 
				+		b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleInRadians - angleUpperLimitInRadians));
			
 
				+		b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleInRadians - angleLowerLimitInRadians));
			
 
				+		return (diffLo < diffHi) ? (angleInRadians - B3_2_PI) : angleInRadians;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return angleInRadians;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// clang-format off
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct	b3TypedConstraintData
			
 
				+{
			
 
				+	int		m_bodyA;
			
 
				+	int		m_bodyB;
			
 
				+	char	*m_name;
			
 
				+
			
 
				+	int	m_objectType;
			
 
				+	int	m_userConstraintType;
			
 
				+	int	m_userConstraintId;
			
 
				+	int	m_needsFeedback;
			
 
				+
			
 
				+	float	m_appliedImpulse;
			
 
				+	float	m_dbgDrawSize;
			
 
				+
			
 
				+	int	m_disableCollisionsBetweenLinkedBodies;
			
 
				+	int	m_overrideNumSolverIterations;
			
 
				+
			
 
				+	float	m_breakingImpulseThreshold;
			
 
				+	int		m_isEnabled;
			
 
				+	
			
 
				+};
			
 
				+
			
 
				+// clang-format on
			
 
				+
			
 
				+/*B3_FORCE_INLINE	int	b3TypedConstraint::calculateSerializeBufferSize() const
			
 
				+{
			
 
				+	return sizeof(b3TypedConstraintData);
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+class b3AngularLimit
			
 
				+{
			
 
				+private:
			
 
				+	b3Scalar
			
 
				+		m_center,
			
 
				+		m_halfRange,
			
 
				+		m_softness,
			
 
				+		m_biasFactor,
			
 
				+		m_relaxationFactor,
			
 
				+		m_correction,
			
 
				+		m_sign;
			
 
				+
			
 
				+	bool
			
 
				+		m_solveLimit;
			
 
				+
			
 
				+public:
			
 
				+	/// Default constructor initializes limit as inactive, allowing free constraint movement
			
 
				+	b3AngularLimit()
			
 
				+		: m_center(0.0f),
			
 
				+		  m_halfRange(-1.0f),
			
 
				+		  m_softness(0.9f),
			
 
				+		  m_biasFactor(0.3f),
			
 
				+		  m_relaxationFactor(1.0f),
			
 
				+		  m_correction(0.0f),
			
 
				+		  m_sign(0.0f),
			
 
				+		  m_solveLimit(false)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	/// Sets all limit's parameters.
			
 
				+	/// When low > high limit becomes inactive.
			
 
				+	/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
			
 
				+	void set(b3Scalar low, b3Scalar high, b3Scalar _softness = 0.9f, b3Scalar _biasFactor = 0.3f, b3Scalar _relaxationFactor = 1.0f);
			
 
				+
			
 
				+	/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
			
 
				+	/// correction is calculated.
			
 
				+	void test(const b3Scalar angle);
			
 
				+
			
 
				+	/// Returns limit's softness
			
 
				+	inline b3Scalar getSoftness() const
			
 
				+	{
			
 
				+		return m_softness;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns limit's bias factor
			
 
				+	inline b3Scalar getBiasFactor() const
			
 
				+	{
			
 
				+		return m_biasFactor;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns limit's relaxation factor
			
 
				+	inline b3Scalar getRelaxationFactor() const
			
 
				+	{
			
 
				+		return m_relaxationFactor;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns correction value evaluated when test() was invoked
			
 
				+	inline b3Scalar getCorrection() const
			
 
				+	{
			
 
				+		return m_correction;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns sign value evaluated when test() was invoked
			
 
				+	inline b3Scalar getSign() const
			
 
				+	{
			
 
				+		return m_sign;
			
 
				+	}
			
 
				+
			
 
				+	/// Gives half of the distance between min and max limit angle
			
 
				+	inline b3Scalar getHalfRange() const
			
 
				+	{
			
 
				+		return m_halfRange;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns true when the last test() invocation recognized limit violation
			
 
				+	inline bool isLimit() const
			
 
				+	{
			
 
				+		return m_solveLimit;
			
 
				+	}
			
 
				+
			
 
				+	/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
			
 
				+	/// returned is modified so it equals to the limit closest to given angle.
			
 
				+	void fit(b3Scalar& angle) const;
			
 
				+
			
 
				+	/// Returns correction value multiplied by sign value
			
 
				+	b3Scalar getError() const;
			
 
				+
			
 
				+	b3Scalar getLow() const;
			
 
				+
			
 
				+	b3Scalar getHigh() const;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_TYPED_CONSTRAINT_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp
@@ -0,0 +1,447 @@
 
				+#include "b3CpuRigidBodyPipeline.h"
			
 
				+
			
 
				+#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
			
 
				+#include "Bullet3Dynamics/shared/b3Inertia.h"
			
 
				+
			
 
				+struct b3CpuRigidBodyPipelineInternalData
			
 
				+{
			
 
				+	b3AlignedObjectArray<b3RigidBodyData> m_rigidBodies;
			
 
				+	b3AlignedObjectArray<b3Inertia> m_inertias;
			
 
				+	b3AlignedObjectArray<b3Aabb> m_aabbWorldSpace;
			
 
				+
			
 
				+	b3DynamicBvhBroadphase* m_bp;
			
 
				+	b3CpuNarrowPhase* m_np;
			
 
				+	b3Config m_config;
			
 
				+};
			
 
				+
			
 
				+b3CpuRigidBodyPipeline::b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
			
 
				+{
			
 
				+	m_data = new b3CpuRigidBodyPipelineInternalData;
			
 
				+	m_data->m_np = narrowphase;
			
 
				+	m_data->m_bp = broadphaseDbvt;
			
 
				+	m_data->m_config = config;
			
 
				+}
			
 
				+
			
 
				+b3CpuRigidBodyPipeline::~b3CpuRigidBodyPipeline()
			
 
				+{
			
 
				+	delete m_data;
			
 
				+}
			
 
				+
			
 
				+void b3CpuRigidBodyPipeline::updateAabbWorldSpace()
			
 
				+{
			
 
				+	for (int i = 0; i < this->getNumBodies(); i++)
			
 
				+	{
			
 
				+		b3RigidBodyData* body = &m_data->m_rigidBodies[i];
			
 
				+		b3Float4 position = body->m_pos;
			
 
				+		b3Quat orientation = body->m_quat;
			
 
				+
			
 
				+		int collidableIndex = body->m_collidableIdx;
			
 
				+		b3Collidable& collidable = m_data->m_np->getCollidableCpu(collidableIndex);
			
 
				+		int shapeIndex = collidable.m_shapeIndex;
			
 
				+
			
 
				+		if (shapeIndex >= 0)
			
 
				+		{
			
 
				+			b3Aabb localAabb = m_data->m_np->getLocalSpaceAabb(shapeIndex);
			
 
				+			b3Aabb& worldAabb = m_data->m_aabbWorldSpace[i];
			
 
				+			float margin = 0.f;
			
 
				+			b3TransformAabb2(localAabb.m_minVec, localAabb.m_maxVec, margin, position, orientation, &worldAabb.m_minVec, &worldAabb.m_maxVec);
			
 
				+			m_data->m_bp->setAabb(i, worldAabb.m_minVec, worldAabb.m_maxVec, 0);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3CpuRigidBodyPipeline::computeOverlappingPairs()
			
 
				+{
			
 
				+	int numPairs = m_data->m_bp->getOverlappingPairCache()->getNumOverlappingPairs();
			
 
				+	m_data->m_bp->calculateOverlappingPairs();
			
 
				+	numPairs = m_data->m_bp->getOverlappingPairCache()->getNumOverlappingPairs();
			
 
				+	printf("numPairs=%d\n", numPairs);
			
 
				+}
			
 
				+
			
 
				+void b3CpuRigidBodyPipeline::computeContactPoints()
			
 
				+{
			
 
				+	b3AlignedObjectArray<b3Int4>& pairs = m_data->m_bp->getOverlappingPairCache()->getOverlappingPairArray();
			
 
				+
			
 
				+	m_data->m_np->computeContacts(pairs, m_data->m_aabbWorldSpace, m_data->m_rigidBodies);
			
 
				+}
			
 
				+void b3CpuRigidBodyPipeline::stepSimulation(float deltaTime)
			
 
				+{
			
 
				+	//update world space aabb's
			
 
				+	updateAabbWorldSpace();
			
 
				+
			
 
				+	//compute overlapping pairs
			
 
				+	computeOverlappingPairs();
			
 
				+
			
 
				+	//compute contacts
			
 
				+	computeContactPoints();
			
 
				+
			
 
				+	//solve contacts
			
 
				+
			
 
				+	//update transforms
			
 
				+	integrate(deltaTime);
			
 
				+}
			
 
				+
			
 
				+static inline float b3CalcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
			
 
				+								 const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
			
 
				+{
			
 
				+	return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
			
 
				+}
			
 
				+
			
 
				+static inline void b3SetLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1,
			
 
				+										 b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
			
 
				+{
			
 
				+	linear = -n;
			
 
				+	angular0 = -b3Cross(r0, n);
			
 
				+	angular1 = b3Cross(r1, n);
			
 
				+}
			
 
				+
			
 
				+static inline void b3SolveContact(b3ContactConstraint4& cs,
			
 
				+								  const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
			
 
				+								  const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
			
 
				+								  float maxRambdaDt[4], float minRambdaDt[4])
			
 
				+{
			
 
				+	b3Vector3 dLinVelA;
			
 
				+	dLinVelA.setZero();
			
 
				+	b3Vector3 dAngVelA;
			
 
				+	dAngVelA.setZero();
			
 
				+	b3Vector3 dLinVelB;
			
 
				+	dLinVelB.setZero();
			
 
				+	b3Vector3 dAngVelB;
			
 
				+	dAngVelB.setZero();
			
 
				+
			
 
				+	for (int ic = 0; ic < 4; ic++)
			
 
				+	{
			
 
				+		//	dont necessary because this makes change to 0
			
 
				+		if (cs.m_jacCoeffInv[ic] == 0.f) continue;
			
 
				+
			
 
				+		{
			
 
				+			b3Vector3 angular0, angular1, linear;
			
 
				+			b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
			
 
				+			b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
			
 
				+			b3SetLinearAndAngular((const b3Vector3&)-cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1);
			
 
				+
			
 
				+			float rambdaDt = b3CalcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1,
			
 
				+										  linVelA, angVelA, linVelB, angVelB) +
			
 
				+							 cs.m_b[ic];
			
 
				+			rambdaDt *= cs.m_jacCoeffInv[ic];
			
 
				+
			
 
				+			{
			
 
				+				float prevSum = cs.m_appliedRambdaDt[ic];
			
 
				+				float updated = prevSum;
			
 
				+				updated += rambdaDt;
			
 
				+				updated = b3Max(updated, minRambdaDt[ic]);
			
 
				+				updated = b3Min(updated, maxRambdaDt[ic]);
			
 
				+				rambdaDt = updated - prevSum;
			
 
				+				cs.m_appliedRambdaDt[ic] = updated;
			
 
				+			}
			
 
				+
			
 
				+			b3Vector3 linImp0 = invMassA * linear * rambdaDt;
			
 
				+			b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
			
 
				+			b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
			
 
				+			b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
			
 
				+#ifdef _WIN32
			
 
				+			b3Assert(_finite(linImp0.getX()));
			
 
				+			b3Assert(_finite(linImp1.getX()));
			
 
				+#endif
			
 
				+			{
			
 
				+				linVelA += linImp0;
			
 
				+				angVelA += angImp0;
			
 
				+				linVelB += linImp1;
			
 
				+				angVelB += angImp1;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline void b3SolveFriction(b3ContactConstraint4& cs,
			
 
				+								   const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
			
 
				+								   const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
			
 
				+								   float maxRambdaDt[4], float minRambdaDt[4])
			
 
				+{
			
 
				+	if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return;
			
 
				+	const b3Vector3& center = (const b3Vector3&)cs.m_center;
			
 
				+
			
 
				+	b3Vector3 n = -(const b3Vector3&)cs.m_linear;
			
 
				+
			
 
				+	b3Vector3 tangent[2];
			
 
				+
			
 
				+	b3PlaneSpace1(n, tangent[0], tangent[1]);
			
 
				+
			
 
				+	b3Vector3 angular0, angular1, linear;
			
 
				+	b3Vector3 r0 = center - posA;
			
 
				+	b3Vector3 r1 = center - posB;
			
 
				+	for (int i = 0; i < 2; i++)
			
 
				+	{
			
 
				+		b3SetLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1);
			
 
				+		float rambdaDt = b3CalcRelVel(linear, -linear, angular0, angular1,
			
 
				+									  linVelA, angVelA, linVelB, angVelB);
			
 
				+		rambdaDt *= cs.m_fJacCoeffInv[i];
			
 
				+
			
 
				+		{
			
 
				+			float prevSum = cs.m_fAppliedRambdaDt[i];
			
 
				+			float updated = prevSum;
			
 
				+			updated += rambdaDt;
			
 
				+			updated = b3Max(updated, minRambdaDt[i]);
			
 
				+			updated = b3Min(updated, maxRambdaDt[i]);
			
 
				+			rambdaDt = updated - prevSum;
			
 
				+			cs.m_fAppliedRambdaDt[i] = updated;
			
 
				+		}
			
 
				+
			
 
				+		b3Vector3 linImp0 = invMassA * linear * rambdaDt;
			
 
				+		b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
			
 
				+		b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
			
 
				+		b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
			
 
				+#ifdef _WIN32
			
 
				+		b3Assert(_finite(linImp0.getX()));
			
 
				+		b3Assert(_finite(linImp1.getX()));
			
 
				+#endif
			
 
				+		linVelA += linImp0;
			
 
				+		angVelA += angImp0;
			
 
				+		linVelB += linImp1;
			
 
				+		angVelB += angImp1;
			
 
				+	}
			
 
				+
			
 
				+	{  //	angular damping for point constraint
			
 
				+		b3Vector3 ab = (posB - posA).normalized();
			
 
				+		b3Vector3 ac = (center - posA).normalized();
			
 
				+		if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
			
 
				+		{
			
 
				+			float angNA = b3Dot(n, angVelA);
			
 
				+			float angNB = b3Dot(n, angVelB);
			
 
				+
			
 
				+			angVelA -= (angNA * 0.1f) * n;
			
 
				+			angVelB -= (angNB * 0.1f) * n;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+struct b3SolveTask  // : public ThreadPool::Task
			
 
				+{
			
 
				+	b3SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies,
			
 
				+				b3AlignedObjectArray<b3Inertia>& shapes,
			
 
				+				b3AlignedObjectArray<b3ContactConstraint4>& constraints,
			
 
				+				int start, int nConstraints,
			
 
				+				int maxNumBatches,
			
 
				+				b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx)
			
 
				+		: m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_wgUsedBodies(wgUsedBodies), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	unsigned short int getType() { return 0; }
			
 
				+
			
 
				+	void run(int tIdx)
			
 
				+	{
			
 
				+		b3AlignedObjectArray<int> usedBodies;
			
 
				+		//printf("run..............\n");
			
 
				+
			
 
				+		for (int bb = 0; bb < m_maxNumBatches; bb++)
			
 
				+		{
			
 
				+			usedBodies.resize(0);
			
 
				+			for (int ic = m_nConstraints - 1; ic >= 0; ic--)
			
 
				+			//for(int ic=0; ic<m_nConstraints; ic++)
			
 
				+			{
			
 
				+				int i = m_start + ic;
			
 
				+				if (m_constraints[i].m_batchIdx != bb)
			
 
				+					continue;
			
 
				+
			
 
				+				float frictionCoeff = b3GetFrictionCoeff(&m_constraints[i]);
			
 
				+				int aIdx = (int)m_constraints[i].m_bodyA;
			
 
				+				int bIdx = (int)m_constraints[i].m_bodyB;
			
 
				+				//int localBatch = m_constraints[i].m_batchIdx;
			
 
				+				b3RigidBodyData& bodyA = m_bodies[aIdx];
			
 
				+				b3RigidBodyData& bodyB = m_bodies[bIdx];
			
 
				+
			
 
				+#if 0
			
 
				+				if ((bodyA.m_invMass) && (bodyB.m_invMass))
			
 
				+				{
			
 
				+				//	printf("aIdx=%d, bIdx=%d\n", aIdx,bIdx);
			
 
				+				}
			
 
				+				if (bIdx==10)
			
 
				+				{
			
 
				+					//printf("ic(b)=%d, localBatch=%d\n",ic,localBatch);
			
 
				+				}
			
 
				+#endif
			
 
				+				if (aIdx == 10)
			
 
				+				{
			
 
				+					//printf("ic(a)=%d, localBatch=%d\n",ic,localBatch);
			
 
				+				}
			
 
				+				if (usedBodies.size() < (aIdx + 1))
			
 
				+				{
			
 
				+					usedBodies.resize(aIdx + 1, 0);
			
 
				+				}
			
 
				+
			
 
				+				if (usedBodies.size() < (bIdx + 1))
			
 
				+				{
			
 
				+					usedBodies.resize(bIdx + 1, 0);
			
 
				+				}
			
 
				+
			
 
				+				if (bodyA.m_invMass)
			
 
				+				{
			
 
				+					b3Assert(usedBodies[aIdx] == 0);
			
 
				+					usedBodies[aIdx]++;
			
 
				+				}
			
 
				+
			
 
				+				if (bodyB.m_invMass)
			
 
				+				{
			
 
				+					b3Assert(usedBodies[bIdx] == 0);
			
 
				+					usedBodies[bIdx]++;
			
 
				+				}
			
 
				+
			
 
				+				if (!m_solveFriction)
			
 
				+				{
			
 
				+					float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
			
 
				+					float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
			
 
				+
			
 
				+					b3SolveContact(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld,
			
 
				+								   (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld,
			
 
				+								   maxRambdaDt, minRambdaDt);
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
			
 
				+					float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
			
 
				+
			
 
				+					float sum = 0;
			
 
				+					for (int j = 0; j < 4; j++)
			
 
				+					{
			
 
				+						sum += m_constraints[i].m_appliedRambdaDt[j];
			
 
				+					}
			
 
				+					frictionCoeff = 0.7f;
			
 
				+					for (int j = 0; j < 4; j++)
			
 
				+					{
			
 
				+						maxRambdaDt[j] = frictionCoeff * sum;
			
 
				+						minRambdaDt[j] = -maxRambdaDt[j];
			
 
				+					}
			
 
				+
			
 
				+					b3SolveFriction(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld,
			
 
				+									(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld,
			
 
				+									maxRambdaDt, minRambdaDt);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (m_wgUsedBodies)
			
 
				+			{
			
 
				+				if (m_wgUsedBodies[m_curWgidx].size() < usedBodies.size())
			
 
				+				{
			
 
				+					m_wgUsedBodies[m_curWgidx].resize(usedBodies.size());
			
 
				+				}
			
 
				+				for (int i = 0; i < usedBodies.size(); i++)
			
 
				+				{
			
 
				+					if (usedBodies[i])
			
 
				+					{
			
 
				+						//printf("cell %d uses body %d\n", m_curWgidx,i);
			
 
				+						m_wgUsedBodies[m_curWgidx][i] = 1;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	b3AlignedObjectArray<b3RigidBodyData>& m_bodies;
			
 
				+	b3AlignedObjectArray<b3Inertia>& m_shapes;
			
 
				+	b3AlignedObjectArray<b3ContactConstraint4>& m_constraints;
			
 
				+	b3AlignedObjectArray<int>* m_wgUsedBodies;
			
 
				+	int m_curWgidx;
			
 
				+	int m_start;
			
 
				+	int m_nConstraints;
			
 
				+	bool m_solveFriction;
			
 
				+	int m_maxNumBatches;
			
 
				+};
			
 
				+
			
 
				+void b3CpuRigidBodyPipeline::solveContactConstraints()
			
 
				+{
			
 
				+	int m_nIterations = 4;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3ContactConstraint4> contactConstraints;
			
 
				+	//	const b3AlignedObjectArray<b3Contact4Data>& contacts = m_data->m_np->getContacts();
			
 
				+	int n = contactConstraints.size();
			
 
				+	//convert contacts...
			
 
				+
			
 
				+	int maxNumBatches = 250;
			
 
				+
			
 
				+	for (int iter = 0; iter < m_nIterations; iter++)
			
 
				+	{
			
 
				+		b3SolveTask task(m_data->m_rigidBodies, m_data->m_inertias, contactConstraints, 0, n, maxNumBatches, 0, 0);
			
 
				+		task.m_solveFriction = false;
			
 
				+		task.run(0);
			
 
				+	}
			
 
				+
			
 
				+	for (int iter = 0; iter < m_nIterations; iter++)
			
 
				+	{
			
 
				+		b3SolveTask task(m_data->m_rigidBodies, m_data->m_inertias, contactConstraints, 0, n, maxNumBatches, 0, 0);
			
 
				+		task.m_solveFriction = true;
			
 
				+		task.run(0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3CpuRigidBodyPipeline::integrate(float deltaTime)
			
 
				+{
			
 
				+	float angDamping = 0.f;
			
 
				+	b3Vector3 gravityAcceleration = b3MakeVector3(0, -9, 0);
			
 
				+
			
 
				+	//integrate transforms (external forces/gravity should be moved into constraint solver)
			
 
				+	for (int i = 0; i < m_data->m_rigidBodies.size(); i++)
			
 
				+	{
			
 
				+		b3IntegrateTransform(&m_data->m_rigidBodies[i], deltaTime, angDamping, gravityAcceleration);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int b3CpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userData)
			
 
				+{
			
 
				+	b3RigidBodyData body;
			
 
				+	int bodyIndex = m_data->m_rigidBodies.size();
			
 
				+	body.m_invMass = mass ? 1.f / mass : 0.f;
			
 
				+	body.m_angVel.setValue(0, 0, 0);
			
 
				+	body.m_collidableIdx = collidableIndex;
			
 
				+	body.m_frictionCoeff = 0.3f;
			
 
				+	body.m_linVel.setValue(0, 0, 0);
			
 
				+	body.m_pos.setValue(position[0], position[1], position[2]);
			
 
				+	body.m_quat.setValue(orientation[0], orientation[1], orientation[2], orientation[3]);
			
 
				+	body.m_restituitionCoeff = 0.f;
			
 
				+
			
 
				+	m_data->m_rigidBodies.push_back(body);
			
 
				+
			
 
				+	if (collidableIndex >= 0)
			
 
				+	{
			
 
				+		b3Aabb& worldAabb = m_data->m_aabbWorldSpace.expand();
			
 
				+
			
 
				+		b3Aabb localAabb = m_data->m_np->getLocalSpaceAabb(collidableIndex);
			
 
				+		b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]);
			
 
				+		b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]);
			
 
				+
			
 
				+		b3Scalar margin = 0.01f;
			
 
				+		b3Transform t;
			
 
				+		t.setIdentity();
			
 
				+		t.setOrigin(b3MakeVector3(position[0], position[1], position[2]));
			
 
				+		t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3]));
			
 
				+		b3TransformAabb(localAabbMin, localAabbMax, margin, t, worldAabb.m_minVec, worldAabb.m_maxVec);
			
 
				+
			
 
				+		m_data->m_bp->createProxy(worldAabb.m_minVec, worldAabb.m_maxVec, bodyIndex, 0, 1, 1);
			
 
				+		//		b3Vector3 aabbMin,aabbMax;
			
 
				+		//	m_data->m_bp->getAabb(bodyIndex,aabbMin,aabbMax);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		b3Error("registerPhysicsInstance using invalid collidableIndex\n");
			
 
				+	}
			
 
				+
			
 
				+	return bodyIndex;
			
 
				+}
			
 
				+
			
 
				+const struct b3RigidBodyData* b3CpuRigidBodyPipeline::getBodyBuffer() const
			
 
				+{
			
 
				+	return m_data->m_rigidBodies.size() ? &m_data->m_rigidBodies[0] : 0;
			
 
				+}
			
 
				+
			
 
				+int b3CpuRigidBodyPipeline::getNumBodies() const
			
 
				+{
			
 
				+	return m_data->m_rigidBodies.size();
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/b3CpuRigidBodyPipeline.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/b3CpuRigidBodyPipeline.h
@@ -0,0 +1,62 @@
 
				+/*
			
 
				+Copyright (c) 2013 Advanced Micro Devices, Inc.  
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Originally written by Erwin Coumans
			
 
				+
			
 
				+#ifndef B3_CPU_RIGIDBODY_PIPELINE_H
			
 
				+#define B3_CPU_RIGIDBODY_PIPELINE_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
			
 
				+
			
 
				+class b3CpuRigidBodyPipeline
			
 
				+{
			
 
				+protected:
			
 
				+	struct b3CpuRigidBodyPipelineInternalData* m_data;
			
 
				+
			
 
				+	int allocateCollidable();
			
 
				+
			
 
				+public:
			
 
				+	b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const struct b3Config& config);
			
 
				+	virtual ~b3CpuRigidBodyPipeline();
			
 
				+
			
 
				+	virtual void stepSimulation(float deltaTime);
			
 
				+	virtual void integrate(float timeStep);
			
 
				+	virtual void updateAabbWorldSpace();
			
 
				+	virtual void computeOverlappingPairs();
			
 
				+	virtual void computeContactPoints();
			
 
				+	virtual void solveContactConstraints();
			
 
				+
			
 
				+	int registerConvexPolyhedron(class b3ConvexUtility* convex);
			
 
				+
			
 
				+	int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData);
			
 
				+	void writeAllInstancesToGpu();
			
 
				+	void copyConstraintsToHost();
			
 
				+	void setGravity(const float* grav);
			
 
				+	void reset();
			
 
				+
			
 
				+	int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold);
			
 
				+	int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold);
			
 
				+	void removeConstraintByUid(int uid);
			
 
				+
			
 
				+	void addConstraint(class b3TypedConstraint* constraint);
			
 
				+	void removeConstraint(b3TypedConstraint* constraint);
			
 
				+
			
 
				+	void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults);
			
 
				+
			
 
				+	const struct b3RigidBodyData* getBodyBuffer() const;
			
 
				+
			
 
				+	int getNumBodies() const;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CPU_RIGIDBODY_PIPELINE_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/premake4.lua
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/premake4.lua
@@ -0,0 +1,18 @@
 
				+	project "Bullet3Dynamics"
			
 
				+
			
 
				+	language "C++"
			
 
				+				
			
 
				+	kind "StaticLib"
			
 
				+
			
 
				+	includedirs {
			
 
				+		".."
			
 
				+	}		
			
 
				+	
			
 
				+    if os.is("Linux") then
			
 
				+        buildoptions{"-fPIC"}
			
 
				+    end
			
 
				+
			
 
				+	files {
			
 
				+		"**.cpp",
			
 
				+		"**.h"
			
 
				+	}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3ContactConstraint4.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3ContactConstraint4.h
@@ -0,0 +1,31 @@
 
				+#ifndef B3_CONTACT_CONSTRAINT5_H
			
 
				+#define B3_CONTACT_CONSTRAINT5_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+typedef struct b3ContactConstraint4 b3ContactConstraint4_t;
			
 
				+
			
 
				+struct b3ContactConstraint4
			
 
				+{
			
 
				+	b3Float4 m_linear;  //normal?
			
 
				+	b3Float4 m_worldPos[4];
			
 
				+	b3Float4 m_center;  //	friction
			
 
				+	float m_jacCoeffInv[4];
			
 
				+	float m_b[4];
			
 
				+	float m_appliedRambdaDt[4];
			
 
				+	float m_fJacCoeffInv[2];      //	friction
			
 
				+	float m_fAppliedRambdaDt[2];  //	friction
			
 
				+
			
 
				+	unsigned int m_bodyA;
			
 
				+	unsigned int m_bodyB;
			
 
				+	int m_batchIdx;
			
 
				+	unsigned int m_paddings;
			
 
				+};
			
 
				+
			
 
				+//inline	void setFrictionCoeff(float value) { m_linear[3] = value; }
			
 
				+inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint)
			
 
				+{
			
 
				+	return constraint->m_linear.w;
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_CONTACT_CONSTRAINT5_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3ConvertConstraint4.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3ConvertConstraint4.h
@@ -0,0 +1,148 @@
 
				+
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+void b3PlaneSpace1(b3Float4ConstArg n, b3Float4* p, b3Float4* q);
			
 
				+void b3PlaneSpace1(b3Float4ConstArg n, b3Float4* p, b3Float4* q)
			
 
				+{
			
 
				+	if (b3Fabs(n.z) > 0.70710678f)
			
 
				+	{
			
 
				+		// choose p in y-z plane
			
 
				+		float a = n.y * n.y + n.z * n.z;
			
 
				+		float k = 1.f / sqrt(a);
			
 
				+		p[0].x = 0;
			
 
				+		p[0].y = -n.z * k;
			
 
				+		p[0].z = n.y * k;
			
 
				+		// set q = n x p
			
 
				+		q[0].x = a * k;
			
 
				+		q[0].y = -n.x * p[0].z;
			
 
				+		q[0].z = n.x * p[0].y;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// choose p in x-y plane
			
 
				+		float a = n.x * n.x + n.y * n.y;
			
 
				+		float k = 1.f / sqrt(a);
			
 
				+		p[0].x = -n.y * k;
			
 
				+		p[0].y = n.x * k;
			
 
				+		p[0].z = 0;
			
 
				+		// set q = n x p
			
 
				+		q[0].x = -n.z * p[0].y;
			
 
				+		q[0].y = n.z * p[0].x;
			
 
				+		q[0].z = a * k;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void setLinearAndAngular(b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)
			
 
				+{
			
 
				+	*linear = b3MakeFloat4(n.x, n.y, n.z, 0.f);
			
 
				+	*angular0 = b3Cross3(r0, n);
			
 
				+	*angular1 = -b3Cross3(r1, n);
			
 
				+}
			
 
				+
			
 
				+float calcRelVel(b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,
			
 
				+				 b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1)
			
 
				+{
			
 
				+	return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);
			
 
				+}
			
 
				+
			
 
				+float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,
			
 
				+				   float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)
			
 
				+{
			
 
				+	//	linear0,1 are normlized
			
 
				+	float jmj0 = invMass0;  //b3Dot3F4(linear0, linear0)*invMass0;
			
 
				+	float jmj1 = b3Dot3F4(mtMul3(angular0, *invInertia0), angular0);
			
 
				+	float jmj2 = invMass1;  //b3Dot3F4(linear1, linear1)*invMass1;
			
 
				+	float jmj3 = b3Dot3F4(mtMul3(angular1, *invInertia1), angular1);
			
 
				+	return -1.f / (jmj0 + jmj1 + jmj2 + jmj3);
			
 
				+}
			
 
				+
			
 
				+void setConstraint4(b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,
			
 
				+					b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB,
			
 
				+					__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,
			
 
				+					b3ContactConstraint4_t* dstC)
			
 
				+{
			
 
				+	dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
			
 
				+	dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
			
 
				+
			
 
				+	float dtInv = 1.f / dt;
			
 
				+	for (int ic = 0; ic < 4; ic++)
			
 
				+	{
			
 
				+		dstC->m_appliedRambdaDt[ic] = 0.f;
			
 
				+	}
			
 
				+	dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
			
 
				+
			
 
				+	dstC->m_linear = src->m_worldNormalOnB;
			
 
				+	dstC->m_linear.w = 0.7f;  //src->getFrictionCoeff() );
			
 
				+	for (int ic = 0; ic < 4; ic++)
			
 
				+	{
			
 
				+		b3Float4 r0 = src->m_worldPosB[ic] - posA;
			
 
				+		b3Float4 r1 = src->m_worldPosB[ic] - posB;
			
 
				+
			
 
				+		if (ic >= src->m_worldNormalOnB.w)  //npoints
			
 
				+		{
			
 
				+			dstC->m_jacCoeffInv[ic] = 0.f;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		float relVelN;
			
 
				+		{
			
 
				+			b3Float4 linear, angular0, angular1;
			
 
				+			setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);
			
 
				+
			
 
				+			dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
			
 
				+												   invMassA, &invInertiaA, invMassB, &invInertiaB);
			
 
				+
			
 
				+			relVelN = calcRelVel(linear, -linear, angular0, angular1,
			
 
				+								 linVelA, angVelA, linVelB, angVelB);
			
 
				+
			
 
				+			float e = 0.f;  //src->getRestituitionCoeff();
			
 
				+			if (relVelN * relVelN < 0.004f) e = 0.f;
			
 
				+
			
 
				+			dstC->m_b[ic] = e * relVelN;
			
 
				+			//float penetration = src->m_worldPosB[ic].w;
			
 
				+			dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift) * positionConstraintCoeff * dtInv;
			
 
				+			dstC->m_appliedRambdaDt[ic] = 0.f;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (src->m_worldNormalOnB.w > 0)  //npoints
			
 
				+	{                                 //	prepare friction
			
 
				+		b3Float4 center = b3MakeFloat4(0.f, 0.f, 0.f, 0.f);
			
 
				+		for (int i = 0; i < src->m_worldNormalOnB.w; i++)
			
 
				+			center += src->m_worldPosB[i];
			
 
				+		center /= (float)src->m_worldNormalOnB.w;
			
 
				+
			
 
				+		b3Float4 tangent[2];
			
 
				+		b3PlaneSpace1(src->m_worldNormalOnB, &tangent[0], &tangent[1]);
			
 
				+
			
 
				+		b3Float4 r[2];
			
 
				+		r[0] = center - posA;
			
 
				+		r[1] = center - posB;
			
 
				+
			
 
				+		for (int i = 0; i < 2; i++)
			
 
				+		{
			
 
				+			b3Float4 linear, angular0, angular1;
			
 
				+			setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
			
 
				+
			
 
				+			dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
			
 
				+												   invMassA, &invInertiaA, invMassB, &invInertiaB);
			
 
				+			dstC->m_fAppliedRambdaDt[i] = 0.f;
			
 
				+		}
			
 
				+		dstC->m_center = center;
			
 
				+	}
			
 
				+
			
 
				+	for (int i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		if (i < src->m_worldNormalOnB.w)
			
 
				+		{
			
 
				+			dstC->m_worldPos[i] = src->m_worldPosB[i];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			dstC->m_worldPos[i] = b3MakeFloat4(0.f, 0.f, 0.f, 0.f);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3Inertia.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3Inertia.h
@@ -0,0 +1,14 @@
 
				+
			
 
				+
			
 
				+#ifndef B3_INERTIA_H
			
 
				+#define B3_INERTIA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Mat3x3.h"
			
 
				+
			
 
				+struct b3Inertia
			
 
				+{
			
 
				+	b3Mat3x3 m_invInertiaWorld;
			
 
				+	b3Mat3x3 m_initInvInertia;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_INERTIA_H
			
--- a/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3IntegrateTransforms.h
+++ b/Dependencies/include/bullet3/Bullet3Dynamics/shared/b3IntegrateTransforms.h
@@ -0,0 +1,106 @@
 
				+
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+inline void integrateSingleTransform(__global b3RigidBodyData_t* bodies, int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
			
 
				+{
			
 
				+	if (bodies[nodeID].m_invMass != 0.f)
			
 
				+	{
			
 
				+		float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
			
 
				+
			
 
				+		//angular velocity
			
 
				+		{
			
 
				+			b3Float4 axis;
			
 
				+			//add some hardcoded angular damping
			
 
				+			bodies[nodeID].m_angVel.x *= angularDamping;
			
 
				+			bodies[nodeID].m_angVel.y *= angularDamping;
			
 
				+			bodies[nodeID].m_angVel.z *= angularDamping;
			
 
				+
			
 
				+			b3Float4 angvel = bodies[nodeID].m_angVel;
			
 
				+
			
 
				+			float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
			
 
				+
			
 
				+			//limit the angular motion
			
 
				+			if (fAngle * timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
			
 
				+			{
			
 
				+				fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
			
 
				+			}
			
 
				+			if (fAngle < 0.001f)
			
 
				+			{
			
 
				+				// use Taylor's expansions of sync function
			
 
				+				axis = angvel * (0.5f * timeStep - (timeStep * timeStep * timeStep) * 0.020833333333f * fAngle * fAngle);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// sync(fAngle) = sin(c*fAngle)/t
			
 
				+				axis = angvel * (b3Sin(0.5f * fAngle * timeStep) / fAngle);
			
 
				+			}
			
 
				+
			
 
				+			b3Quat dorn;
			
 
				+			dorn.x = axis.x;
			
 
				+			dorn.y = axis.y;
			
 
				+			dorn.z = axis.z;
			
 
				+			dorn.w = b3Cos(fAngle * timeStep * 0.5f);
			
 
				+			b3Quat orn0 = bodies[nodeID].m_quat;
			
 
				+			b3Quat predictedOrn = b3QuatMul(dorn, orn0);
			
 
				+			predictedOrn = b3QuatNormalized(predictedOrn);
			
 
				+			bodies[nodeID].m_quat = predictedOrn;
			
 
				+		}
			
 
				+		//linear velocity
			
 
				+		bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;
			
 
				+
			
 
				+		//apply gravity
			
 
				+		bodies[nodeID].m_linVel += gravityAcceleration * timeStep;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+inline void b3IntegrateTransform(__global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
			
 
				+{
			
 
				+	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
			
 
				+
			
 
				+	if ((body->m_invMass != 0.f))
			
 
				+	{
			
 
				+		//angular velocity
			
 
				+		{
			
 
				+			b3Float4 axis;
			
 
				+			//add some hardcoded angular damping
			
 
				+			body->m_angVel.x *= angularDamping;
			
 
				+			body->m_angVel.y *= angularDamping;
			
 
				+			body->m_angVel.z *= angularDamping;
			
 
				+
			
 
				+			b3Float4 angvel = body->m_angVel;
			
 
				+			float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
			
 
				+			//limit the angular motion
			
 
				+			if (fAngle * timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
			
 
				+			{
			
 
				+				fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
			
 
				+			}
			
 
				+			if (fAngle < 0.001f)
			
 
				+			{
			
 
				+				// use Taylor's expansions of sync function
			
 
				+				axis = angvel * (0.5f * timeStep - (timeStep * timeStep * timeStep) * 0.020833333333f * fAngle * fAngle);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// sync(fAngle) = sin(c*fAngle)/t
			
 
				+				axis = angvel * (b3Sin(0.5f * fAngle * timeStep) / fAngle);
			
 
				+			}
			
 
				+			b3Quat dorn;
			
 
				+			dorn.x = axis.x;
			
 
				+			dorn.y = axis.y;
			
 
				+			dorn.z = axis.z;
			
 
				+			dorn.w = b3Cos(fAngle * timeStep * 0.5f);
			
 
				+			b3Quat orn0 = body->m_quat;
			
 
				+
			
 
				+			b3Quat predictedOrn = b3QuatMul(dorn, orn0);
			
 
				+			predictedOrn = b3QuatNormalized(predictedOrn);
			
 
				+			body->m_quat = predictedOrn;
			
 
				+		}
			
 
				+
			
 
				+		//apply gravity
			
 
				+		body->m_linVel += gravityAcceleration * timeStep;
			
 
				+
			
 
				+		//linear velocity
			
 
				+		body->m_pos += body->m_linVel * timeStep;
			
 
				+	}
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/CMakeLists.txt
+++ b/Dependencies/include/bullet3/Bullet3Geometry/CMakeLists.txt
@@ -0,0 +1,47 @@
 
				+
			
 
				+INCLUDE_DIRECTORIES(
			
 
				+	${BULLET_PHYSICS_SOURCE_DIR}/src
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Geometry_SRCS
			
 
				+	b3ConvexHullComputer.cpp
			
 
				+	b3GeometryUtil.cpp
			
 
				+)
			
 
				+
			
 
				+SET(Bullet3Geometry_HDRS
			
 
				+	b3AabbUtil.h
			
 
				+	b3ConvexHullComputer.h
			
 
				+	b3GeometryUtil.h
			
 
				+	b3GrahamScan2dConvexHull.h
			
 
				+)
			
 
				+
			
 
				+ADD_LIBRARY(Bullet3Geometry ${Bullet3Geometry_SRCS} ${Bullet3Geometry_HDRS})
			
 
				+if (BUILD_SHARED_LIBS)
			
 
				+  target_link_libraries(Bullet3Geometry Bullet3Common)
			
 
				+endif()
			
 
				+SET_TARGET_PROPERTIES(Bullet3Geometry PROPERTIES VERSION ${BULLET_VERSION})
			
 
				+SET_TARGET_PROPERTIES(Bullet3Geometry PROPERTIES SOVERSION ${BULLET_VERSION})
			
 
				+
			
 
				+IF (INSTALL_LIBS)
			
 
				+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+		#FILES_MATCHING requires CMake 2.6
			
 
				+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Geometry DESTINATION .)
			
 
				+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+				INSTALL(TARGETS Bullet3Geometry
			
 
				+					RUNTIME DESTINATION bin
			
 
				+					LIBRARY DESTINATION lib${LIB_SUFFIX}
			
 
				+					ARCHIVE DESTINATION lib${LIB_SUFFIX})
			
 
				+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
			
 
				+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
			
 
				+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
			
 
				+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
			
 
				+
			
 
				+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Geometry PROPERTIES FRAMEWORK true)
			
 
				+			SET_TARGET_PROPERTIES(Bullet3Geometry PROPERTIES PUBLIC_HEADER "${Bullet3Geometry_HDRS}")
			
 
				+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
			
 
				+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
			
 
				+ENDIF (INSTALL_LIBS)
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/b3AabbUtil.h
+++ b/Dependencies/include/bullet3/Bullet3Geometry/b3AabbUtil.h
@@ -0,0 +1,217 @@
 
				+/*
			
 
				+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_AABB_UTIL2
			
 
				+#define B3_AABB_UTIL2
			
 
				+
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3MinMax.h"
			
 
				+
			
 
				+B3_FORCE_INLINE void b3AabbExpand(b3Vector3& aabbMin,
			
 
				+								  b3Vector3& aabbMax,
			
 
				+								  const b3Vector3& expansionMin,
			
 
				+								  const b3Vector3& expansionMax)
			
 
				+{
			
 
				+	aabbMin = aabbMin + expansionMin;
			
 
				+	aabbMax = aabbMax + expansionMax;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between two aabbs
			
 
				+B3_FORCE_INLINE bool b3TestPointAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1,
			
 
				+											 const b3Vector3& point)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1.getX() > point.getX() || aabbMax1.getX() < point.getX()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getZ() > point.getZ() || aabbMax1.getZ() < point.getZ()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getY() > point.getY() || aabbMax1.getY() < point.getY()) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between two aabbs
			
 
				+B3_FORCE_INLINE bool b3TestAabbAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1,
			
 
				+											const b3Vector3& aabbMin2, const b3Vector3& aabbMax2)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between triangle and aabb
			
 
				+B3_FORCE_INLINE bool b3TestTriangleAgainstAabb2(const b3Vector3* vertices,
			
 
				+												const b3Vector3& aabbMin, const b3Vector3& aabbMax)
			
 
				+{
			
 
				+	const b3Vector3& p1 = vertices[0];
			
 
				+	const b3Vector3& p2 = vertices[1];
			
 
				+	const b3Vector3& p3 = vertices[2];
			
 
				+
			
 
				+	if (b3Min(b3Min(p1[0], p2[0]), p3[0]) > aabbMax[0]) return false;
			
 
				+	if (b3Max(b3Max(p1[0], p2[0]), p3[0]) < aabbMin[0]) return false;
			
 
				+
			
 
				+	if (b3Min(b3Min(p1[2], p2[2]), p3[2]) > aabbMax[2]) return false;
			
 
				+	if (b3Max(b3Max(p1[2], p2[2]), p3[2]) < aabbMin[2]) return false;
			
 
				+
			
 
				+	if (b3Min(b3Min(p1[1], p2[1]), p3[1]) > aabbMax[1]) return false;
			
 
				+	if (b3Max(b3Max(p1[1], p2[1]), p3[1]) < aabbMin[1]) return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE int b3Outcode(const b3Vector3& p, const b3Vector3& halfExtent)
			
 
				+{
			
 
				+	return (p.getX() < -halfExtent.getX() ? 0x01 : 0x0) |
			
 
				+		   (p.getX() > halfExtent.getX() ? 0x08 : 0x0) |
			
 
				+		   (p.getY() < -halfExtent.getY() ? 0x02 : 0x0) |
			
 
				+		   (p.getY() > halfExtent.getY() ? 0x10 : 0x0) |
			
 
				+		   (p.getZ() < -halfExtent.getZ() ? 0x4 : 0x0) |
			
 
				+		   (p.getZ() > halfExtent.getZ() ? 0x20 : 0x0);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3RayAabb2(const b3Vector3& rayFrom,
			
 
				+								const b3Vector3& rayInvDirection,
			
 
				+								const unsigned int raySign[3],
			
 
				+								const b3Vector3 bounds[2],
			
 
				+								b3Scalar& tmin,
			
 
				+								b3Scalar lambda_min,
			
 
				+								b3Scalar lambda_max)
			
 
				+{
			
 
				+	b3Scalar tmax, tymin, tymax, tzmin, tzmax;
			
 
				+	tmin = (bounds[raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
			
 
				+	tmax = (bounds[1 - raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
			
 
				+	tymin = (bounds[raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
			
 
				+	tymax = (bounds[1 - raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
			
 
				+
			
 
				+	if ((tmin > tymax) || (tymin > tmax))
			
 
				+		return false;
			
 
				+
			
 
				+	if (tymin > tmin)
			
 
				+		tmin = tymin;
			
 
				+
			
 
				+	if (tymax < tmax)
			
 
				+		tmax = tymax;
			
 
				+
			
 
				+	tzmin = (bounds[raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
			
 
				+	tzmax = (bounds[1 - raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
			
 
				+
			
 
				+	if ((tmin > tzmax) || (tzmin > tmax))
			
 
				+		return false;
			
 
				+	if (tzmin > tmin)
			
 
				+		tmin = tzmin;
			
 
				+	if (tzmax < tmax)
			
 
				+		tmax = tzmax;
			
 
				+	return ((tmin < lambda_max) && (tmax > lambda_min));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3RayAabb(const b3Vector3& rayFrom,
			
 
				+							   const b3Vector3& rayTo,
			
 
				+							   const b3Vector3& aabbMin,
			
 
				+							   const b3Vector3& aabbMax,
			
 
				+							   b3Scalar& param, b3Vector3& normal)
			
 
				+{
			
 
				+	b3Vector3 aabbHalfExtent = (aabbMax - aabbMin) * b3Scalar(0.5);
			
 
				+	b3Vector3 aabbCenter = (aabbMax + aabbMin) * b3Scalar(0.5);
			
 
				+	b3Vector3 source = rayFrom - aabbCenter;
			
 
				+	b3Vector3 target = rayTo - aabbCenter;
			
 
				+	int sourceOutcode = b3Outcode(source, aabbHalfExtent);
			
 
				+	int targetOutcode = b3Outcode(target, aabbHalfExtent);
			
 
				+	if ((sourceOutcode & targetOutcode) == 0x0)
			
 
				+	{
			
 
				+		b3Scalar lambda_enter = b3Scalar(0.0);
			
 
				+		b3Scalar lambda_exit = param;
			
 
				+		b3Vector3 r = target - source;
			
 
				+		int i;
			
 
				+		b3Scalar normSign = 1;
			
 
				+		b3Vector3 hitNormal = b3MakeVector3(0, 0, 0);
			
 
				+		int bit = 1;
			
 
				+
			
 
				+		for (int j = 0; j < 2; j++)
			
 
				+		{
			
 
				+			for (i = 0; i != 3; ++i)
			
 
				+			{
			
 
				+				if (sourceOutcode & bit)
			
 
				+				{
			
 
				+					b3Scalar lambda = (-source[i] - aabbHalfExtent[i] * normSign) / r[i];
			
 
				+					if (lambda_enter <= lambda)
			
 
				+					{
			
 
				+						lambda_enter = lambda;
			
 
				+						hitNormal.setValue(0, 0, 0);
			
 
				+						hitNormal[i] = normSign;
			
 
				+					}
			
 
				+				}
			
 
				+				else if (targetOutcode & bit)
			
 
				+				{
			
 
				+					b3Scalar lambda = (-source[i] - aabbHalfExtent[i] * normSign) / r[i];
			
 
				+					b3SetMin(lambda_exit, lambda);
			
 
				+				}
			
 
				+				bit <<= 1;
			
 
				+			}
			
 
				+			normSign = b3Scalar(-1.);
			
 
				+		}
			
 
				+		if (lambda_enter <= lambda_exit)
			
 
				+		{
			
 
				+			param = lambda_enter;
			
 
				+			normal = hitNormal;
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3TransformAabb(const b3Vector3& halfExtents, b3Scalar margin, const b3Transform& t, b3Vector3& aabbMinOut, b3Vector3& aabbMaxOut)
			
 
				+{
			
 
				+	b3Vector3 halfExtentsWithMargin = halfExtents + b3MakeVector3(margin, margin, margin);
			
 
				+	b3Matrix3x3 abs_b = t.getBasis().absolute();
			
 
				+	b3Vector3 center = t.getOrigin();
			
 
				+	b3Vector3 extent = halfExtentsWithMargin.dot3(abs_b[0], abs_b[1], abs_b[2]);
			
 
				+	aabbMinOut = center - extent;
			
 
				+	aabbMaxOut = center + extent;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void b3TransformAabb(const b3Vector3& localAabbMin, const b3Vector3& localAabbMax, b3Scalar margin, const b3Transform& trans, b3Vector3& aabbMinOut, b3Vector3& aabbMaxOut)
			
 
				+{
			
 
				+	//b3Assert(localAabbMin.getX() <= localAabbMax.getX());
			
 
				+	//b3Assert(localAabbMin.getY() <= localAabbMax.getY());
			
 
				+	//b3Assert(localAabbMin.getZ() <= localAabbMax.getZ());
			
 
				+	b3Vector3 localHalfExtents = b3Scalar(0.5) * (localAabbMax - localAabbMin);
			
 
				+	localHalfExtents += b3MakeVector3(margin, margin, margin);
			
 
				+
			
 
				+	b3Vector3 localCenter = b3Scalar(0.5) * (localAabbMax + localAabbMin);
			
 
				+	b3Matrix3x3 abs_b = trans.getBasis().absolute();
			
 
				+	b3Vector3 center = trans(localCenter);
			
 
				+	b3Vector3 extent = localHalfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);
			
 
				+	aabbMinOut = center - extent;
			
 
				+	aabbMaxOut = center + extent;
			
 
				+}
			
 
				+
			
 
				+#define B3_USE_BANCHLESS 1
			
 
				+#ifdef B3_USE_BANCHLESS
			
 
				+//This block replaces the block below and uses no branches, and replaces the 8 bit return with a 32 bit return for improved performance (~3x on XBox 360)
			
 
				+B3_FORCE_INLINE unsigned b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1, const unsigned short int* aabbMax1, const unsigned short int* aabbMin2, const unsigned short int* aabbMax2)
			
 
				+{
			
 
				+	return static_cast<unsigned int>(b3Select((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0]) & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2]) & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
			
 
				+											  1, 0));
			
 
				+}
			
 
				+#else
			
 
				+B3_FORCE_INLINE bool b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1, const unsigned short int* aabbMax1, const unsigned short int* aabbMin2, const unsigned short int* aabbMax2)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;
			
 
				+	overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;
			
 
				+	overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+#endif  //B3_USE_BANCHLESS
			
 
				+
			
 
				+#endif  //B3_AABB_UTIL2
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/b3ConvexHullComputer.cpp
+++ b/Dependencies/include/bullet3/Bullet3Geometry/b3ConvexHullComputer.cpp
@@ -0,0 +1,2745 @@
 
				+/*
			
 
				+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#include <string.h>
			
 
				+
			
 
				+#include "b3ConvexHullComputer.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3MinMax.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+
			
 
				+#ifdef __GNUC__
			
 
				+#include <stdint.h>
			
 
				+typedef int32_t btInt32_t;
			
 
				+typedef int64_t btInt64_t;
			
 
				+typedef uint32_t btUint32_t;
			
 
				+typedef uint64_t btUint64_t;
			
 
				+#elif defined(_MSC_VER)
			
 
				+typedef __int32 btInt32_t;
			
 
				+typedef __int64 btInt64_t;
			
 
				+typedef unsigned __int32 btUint32_t;
			
 
				+typedef unsigned __int64 btUint64_t;
			
 
				+#else
			
 
				+typedef int btInt32_t;
			
 
				+typedef long long int btInt64_t;
			
 
				+typedef unsigned int btUint32_t;
			
 
				+typedef unsigned long long int btUint64_t;
			
 
				+#endif
			
 
				+
			
 
				+//The definition of USE_X86_64_ASM is moved into the build system. You can enable it manually by commenting out the following lines
			
 
				+//#if (defined(__GNUC__) && defined(__x86_64__) && !defined(__ICL))  // || (defined(__ICL) && defined(_M_X64))   bug in Intel compiler, disable inline assembly
			
 
				+//	#define USE_X86_64_ASM
			
 
				+//#endif
			
 
				+
			
 
				+//#define DEBUG_CONVEX_HULL
			
 
				+//#define SHOW_ITERATIONS
			
 
				+
			
 
				+#if defined(DEBUG_CONVEX_HULL) || defined(SHOW_ITERATIONS)
			
 
				+#include <stdio.h>
			
 
				+#endif
			
 
				+
			
 
				+// Convex hull implementation based on Preparata and Hong
			
 
				+// Ole Kniemeyer, MAXON Computer GmbH
			
 
				+class b3ConvexHullInternal
			
 
				+{
			
 
				+public:
			
 
				+	class Point64
			
 
				+	{
			
 
				+	public:
			
 
				+		btInt64_t x;
			
 
				+		btInt64_t y;
			
 
				+		btInt64_t z;
			
 
				+
			
 
				+		Point64(btInt64_t x, btInt64_t y, btInt64_t z) : x(x), y(y), z(z)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		bool isZero()
			
 
				+		{
			
 
				+			return (x == 0) && (y == 0) && (z == 0);
			
 
				+		}
			
 
				+
			
 
				+		btInt64_t dot(const Point64& b) const
			
 
				+		{
			
 
				+			return x * b.x + y * b.y + z * b.z;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class Point32
			
 
				+	{
			
 
				+	public:
			
 
				+		btInt32_t x;
			
 
				+		btInt32_t y;
			
 
				+		btInt32_t z;
			
 
				+		int index;
			
 
				+
			
 
				+		Point32()
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		Point32(btInt32_t x, btInt32_t y, btInt32_t z) : x(x), y(y), z(z), index(-1)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		bool operator==(const Point32& b) const
			
 
				+		{
			
 
				+			return (x == b.x) && (y == b.y) && (z == b.z);
			
 
				+		}
			
 
				+
			
 
				+		bool operator!=(const Point32& b) const
			
 
				+		{
			
 
				+			return (x != b.x) || (y != b.y) || (z != b.z);
			
 
				+		}
			
 
				+
			
 
				+		bool isZero()
			
 
				+		{
			
 
				+			return (x == 0) && (y == 0) && (z == 0);
			
 
				+		}
			
 
				+
			
 
				+		Point64 cross(const Point32& b) const
			
 
				+		{
			
 
				+			return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
			
 
				+		}
			
 
				+
			
 
				+		Point64 cross(const Point64& b) const
			
 
				+		{
			
 
				+			return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
			
 
				+		}
			
 
				+
			
 
				+		btInt64_t dot(const Point32& b) const
			
 
				+		{
			
 
				+			return x * b.x + y * b.y + z * b.z;
			
 
				+		}
			
 
				+
			
 
				+		btInt64_t dot(const Point64& b) const
			
 
				+		{
			
 
				+			return x * b.x + y * b.y + z * b.z;
			
 
				+		}
			
 
				+
			
 
				+		Point32 operator+(const Point32& b) const
			
 
				+		{
			
 
				+			return Point32(x + b.x, y + b.y, z + b.z);
			
 
				+		}
			
 
				+
			
 
				+		Point32 operator-(const Point32& b) const
			
 
				+		{
			
 
				+			return Point32(x - b.x, y - b.y, z - b.z);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class Int128
			
 
				+	{
			
 
				+	public:
			
 
				+		btUint64_t low;
			
 
				+		btUint64_t high;
			
 
				+
			
 
				+		Int128()
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		Int128(btUint64_t low, btUint64_t high) : low(low), high(high)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		Int128(btUint64_t low) : low(low), high(0)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		Int128(btInt64_t value) : low(value), high((value >= 0) ? 0 : (btUint64_t)-1LL)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		static Int128 mul(btInt64_t a, btInt64_t b);
			
 
				+
			
 
				+		static Int128 mul(btUint64_t a, btUint64_t b);
			
 
				+
			
 
				+		Int128 operator-() const
			
 
				+		{
			
 
				+			return Int128((btUint64_t) - (btInt64_t)low, ~high + (low == 0));
			
 
				+		}
			
 
				+
			
 
				+		Int128 operator+(const Int128& b) const
			
 
				+		{
			
 
				+#ifdef USE_X86_64_ASM
			
 
				+			Int128 result;
			
 
				+			__asm__(
			
 
				+				"addq %[bl], %[rl]\n\t"
			
 
				+				"adcq %[bh], %[rh]\n\t"
			
 
				+				: [rl] "=r"(result.low), [rh] "=r"(result.high)
			
 
				+				: "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
			
 
				+				: "cc");
			
 
				+			return result;
			
 
				+#else
			
 
				+			btUint64_t lo = low + b.low;
			
 
				+			return Int128(lo, high + b.high + (lo < low));
			
 
				+#endif
			
 
				+		}
			
 
				+
			
 
				+		Int128 operator-(const Int128& b) const
			
 
				+		{
			
 
				+#ifdef USE_X86_64_ASM
			
 
				+			Int128 result;
			
 
				+			__asm__(
			
 
				+				"subq %[bl], %[rl]\n\t"
			
 
				+				"sbbq %[bh], %[rh]\n\t"
			
 
				+				: [rl] "=r"(result.low), [rh] "=r"(result.high)
			
 
				+				: "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
			
 
				+				: "cc");
			
 
				+			return result;
			
 
				+#else
			
 
				+			return *this + -b;
			
 
				+#endif
			
 
				+		}
			
 
				+
			
 
				+		Int128& operator+=(const Int128& b)
			
 
				+		{
			
 
				+#ifdef USE_X86_64_ASM
			
 
				+			__asm__(
			
 
				+				"addq %[bl], %[rl]\n\t"
			
 
				+				"adcq %[bh], %[rh]\n\t"
			
 
				+				: [rl] "=r"(low), [rh] "=r"(high)
			
 
				+				: "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
			
 
				+				: "cc");
			
 
				+#else
			
 
				+			btUint64_t lo = low + b.low;
			
 
				+			if (lo < low)
			
 
				+			{
			
 
				+				++high;
			
 
				+			}
			
 
				+			low = lo;
			
 
				+			high += b.high;
			
 
				+#endif
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		Int128& operator++()
			
 
				+		{
			
 
				+			if (++low == 0)
			
 
				+			{
			
 
				+				++high;
			
 
				+			}
			
 
				+			return *this;
			
 
				+		}
			
 
				+
			
 
				+		Int128 operator*(btInt64_t b) const;
			
 
				+
			
 
				+		b3Scalar toScalar() const
			
 
				+		{
			
 
				+			return ((btInt64_t)high >= 0) ? b3Scalar(high) * (b3Scalar(0x100000000LL) * b3Scalar(0x100000000LL)) + b3Scalar(low)
			
 
				+										  : -(-*this).toScalar();
			
 
				+		}
			
 
				+
			
 
				+		int getSign() const
			
 
				+		{
			
 
				+			return ((btInt64_t)high < 0) ? -1 : (high || low) ? 1 : 0;
			
 
				+		}
			
 
				+
			
 
				+		bool operator<(const Int128& b) const
			
 
				+		{
			
 
				+			return (high < b.high) || ((high == b.high) && (low < b.low));
			
 
				+		}
			
 
				+
			
 
				+		int ucmp(const Int128& b) const
			
 
				+		{
			
 
				+			if (high < b.high)
			
 
				+			{
			
 
				+				return -1;
			
 
				+			}
			
 
				+			if (high > b.high)
			
 
				+			{
			
 
				+				return 1;
			
 
				+			}
			
 
				+			if (low < b.low)
			
 
				+			{
			
 
				+				return -1;
			
 
				+			}
			
 
				+			if (low > b.low)
			
 
				+			{
			
 
				+				return 1;
			
 
				+			}
			
 
				+			return 0;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class Rational64
			
 
				+	{
			
 
				+	private:
			
 
				+		btUint64_t m_numerator;
			
 
				+		btUint64_t m_denominator;
			
 
				+		int sign;
			
 
				+
			
 
				+	public:
			
 
				+		Rational64(btInt64_t numerator, btInt64_t denominator)
			
 
				+		{
			
 
				+			if (numerator > 0)
			
 
				+			{
			
 
				+				sign = 1;
			
 
				+				m_numerator = (btUint64_t)numerator;
			
 
				+			}
			
 
				+			else if (numerator < 0)
			
 
				+			{
			
 
				+				sign = -1;
			
 
				+				m_numerator = (btUint64_t)-numerator;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				sign = 0;
			
 
				+				m_numerator = 0;
			
 
				+			}
			
 
				+			if (denominator > 0)
			
 
				+			{
			
 
				+				m_denominator = (btUint64_t)denominator;
			
 
				+			}
			
 
				+			else if (denominator < 0)
			
 
				+			{
			
 
				+				sign = -sign;
			
 
				+				m_denominator = (btUint64_t)-denominator;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				m_denominator = 0;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		bool isNegativeInfinity() const
			
 
				+		{
			
 
				+			return (sign < 0) && (m_denominator == 0);
			
 
				+		}
			
 
				+
			
 
				+		bool isNaN() const
			
 
				+		{
			
 
				+			return (sign == 0) && (m_denominator == 0);
			
 
				+		}
			
 
				+
			
 
				+		int compare(const Rational64& b) const;
			
 
				+
			
 
				+		b3Scalar toScalar() const
			
 
				+		{
			
 
				+			return sign * ((m_denominator == 0) ? B3_INFINITY : (b3Scalar)m_numerator / m_denominator);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class Rational128
			
 
				+	{
			
 
				+	private:
			
 
				+		Int128 numerator;
			
 
				+		Int128 denominator;
			
 
				+		int sign;
			
 
				+		bool isInt64;
			
 
				+
			
 
				+	public:
			
 
				+		Rational128(btInt64_t value)
			
 
				+		{
			
 
				+			if (value > 0)
			
 
				+			{
			
 
				+				sign = 1;
			
 
				+				this->numerator = value;
			
 
				+			}
			
 
				+			else if (value < 0)
			
 
				+			{
			
 
				+				sign = -1;
			
 
				+				this->numerator = -value;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				sign = 0;
			
 
				+				this->numerator = (btUint64_t)0;
			
 
				+			}
			
 
				+			this->denominator = (btUint64_t)1;
			
 
				+			isInt64 = true;
			
 
				+		}
			
 
				+
			
 
				+		Rational128(const Int128& numerator, const Int128& denominator)
			
 
				+		{
			
 
				+			sign = numerator.getSign();
			
 
				+			if (sign >= 0)
			
 
				+			{
			
 
				+				this->numerator = numerator;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				this->numerator = -numerator;
			
 
				+			}
			
 
				+			int dsign = denominator.getSign();
			
 
				+			if (dsign >= 0)
			
 
				+			{
			
 
				+				this->denominator = denominator;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				sign = -sign;
			
 
				+				this->denominator = -denominator;
			
 
				+			}
			
 
				+			isInt64 = false;
			
 
				+		}
			
 
				+
			
 
				+		int compare(const Rational128& b) const;
			
 
				+
			
 
				+		int compare(btInt64_t b) const;
			
 
				+
			
 
				+		b3Scalar toScalar() const
			
 
				+		{
			
 
				+			return sign * ((denominator.getSign() == 0) ? B3_INFINITY : numerator.toScalar() / denominator.toScalar());
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class PointR128
			
 
				+	{
			
 
				+	public:
			
 
				+		Int128 x;
			
 
				+		Int128 y;
			
 
				+		Int128 z;
			
 
				+		Int128 denominator;
			
 
				+
			
 
				+		PointR128()
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		PointR128(Int128 x, Int128 y, Int128 z, Int128 denominator) : x(x), y(y), z(z), denominator(denominator)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar xvalue() const
			
 
				+		{
			
 
				+			return x.toScalar() / denominator.toScalar();
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar yvalue() const
			
 
				+		{
			
 
				+			return y.toScalar() / denominator.toScalar();
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar zvalue() const
			
 
				+		{
			
 
				+			return z.toScalar() / denominator.toScalar();
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class Edge;
			
 
				+	class Face;
			
 
				+
			
 
				+	class Vertex
			
 
				+	{
			
 
				+	public:
			
 
				+		Vertex* next;
			
 
				+		Vertex* prev;
			
 
				+		Edge* edges;
			
 
				+		Face* firstNearbyFace;
			
 
				+		Face* lastNearbyFace;
			
 
				+		PointR128 point128;
			
 
				+		Point32 point;
			
 
				+		int copy;
			
 
				+
			
 
				+		Vertex() : next(NULL), prev(NULL), edges(NULL), firstNearbyFace(NULL), lastNearbyFace(NULL), copy(-1)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+		void print()
			
 
				+		{
			
 
				+			b3Printf("V%d (%d, %d, %d)", point.index, point.x, point.y, point.z);
			
 
				+		}
			
 
				+
			
 
				+		void printGraph();
			
 
				+#endif
			
 
				+
			
 
				+		Point32 operator-(const Vertex& b) const
			
 
				+		{
			
 
				+			return point - b.point;
			
 
				+		}
			
 
				+
			
 
				+		Rational128 dot(const Point64& b) const
			
 
				+		{
			
 
				+			return (point.index >= 0) ? Rational128(point.dot(b))
			
 
				+									  : Rational128(point128.x * b.x + point128.y * b.y + point128.z * b.z, point128.denominator);
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar xvalue() const
			
 
				+		{
			
 
				+			return (point.index >= 0) ? b3Scalar(point.x) : point128.xvalue();
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar yvalue() const
			
 
				+		{
			
 
				+			return (point.index >= 0) ? b3Scalar(point.y) : point128.yvalue();
			
 
				+		}
			
 
				+
			
 
				+		b3Scalar zvalue() const
			
 
				+		{
			
 
				+			return (point.index >= 0) ? b3Scalar(point.z) : point128.zvalue();
			
 
				+		}
			
 
				+
			
 
				+		void receiveNearbyFaces(Vertex* src)
			
 
				+		{
			
 
				+			if (lastNearbyFace)
			
 
				+			{
			
 
				+				lastNearbyFace->nextWithSameNearbyVertex = src->firstNearbyFace;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				firstNearbyFace = src->firstNearbyFace;
			
 
				+			}
			
 
				+			if (src->lastNearbyFace)
			
 
				+			{
			
 
				+				lastNearbyFace = src->lastNearbyFace;
			
 
				+			}
			
 
				+			for (Face* f = src->firstNearbyFace; f; f = f->nextWithSameNearbyVertex)
			
 
				+			{
			
 
				+				b3Assert(f->nearbyVertex == src);
			
 
				+				f->nearbyVertex = this;
			
 
				+			}
			
 
				+			src->firstNearbyFace = NULL;
			
 
				+			src->lastNearbyFace = NULL;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	class Edge
			
 
				+	{
			
 
				+	public:
			
 
				+		Edge* next;
			
 
				+		Edge* prev;
			
 
				+		Edge* reverse;
			
 
				+		Vertex* target;
			
 
				+		Face* face;
			
 
				+		int copy;
			
 
				+
			
 
				+		~Edge()
			
 
				+		{
			
 
				+			next = NULL;
			
 
				+			prev = NULL;
			
 
				+			reverse = NULL;
			
 
				+			target = NULL;
			
 
				+			face = NULL;
			
 
				+		}
			
 
				+
			
 
				+		void link(Edge* n)
			
 
				+		{
			
 
				+			b3Assert(reverse->target == n->reverse->target);
			
 
				+			next = n;
			
 
				+			n->prev = this;
			
 
				+		}
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+		void print()
			
 
				+		{
			
 
				+			b3Printf("E%p : %d -> %d,  n=%p p=%p   (0 %d\t%d\t%d) -> (%d %d %d)", this, reverse->target->point.index, target->point.index, next, prev,
			
 
				+					 reverse->target->point.x, reverse->target->point.y, reverse->target->point.z, target->point.x, target->point.y, target->point.z);
			
 
				+		}
			
 
				+#endif
			
 
				+	};
			
 
				+
			
 
				+	class Face
			
 
				+	{
			
 
				+	public:
			
 
				+		Face* next;
			
 
				+		Vertex* nearbyVertex;
			
 
				+		Face* nextWithSameNearbyVertex;
			
 
				+		Point32 origin;
			
 
				+		Point32 dir0;
			
 
				+		Point32 dir1;
			
 
				+
			
 
				+		Face() : next(NULL), nearbyVertex(NULL), nextWithSameNearbyVertex(NULL)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		void init(Vertex* a, Vertex* b, Vertex* c)
			
 
				+		{
			
 
				+			nearbyVertex = a;
			
 
				+			origin = a->point;
			
 
				+			dir0 = *b - *a;
			
 
				+			dir1 = *c - *a;
			
 
				+			if (a->lastNearbyFace)
			
 
				+			{
			
 
				+				a->lastNearbyFace->nextWithSameNearbyVertex = this;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				a->firstNearbyFace = this;
			
 
				+			}
			
 
				+			a->lastNearbyFace = this;
			
 
				+		}
			
 
				+
			
 
				+		Point64 getNormal()
			
 
				+		{
			
 
				+			return dir0.cross(dir1);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	template <typename UWord, typename UHWord>
			
 
				+	class DMul
			
 
				+	{
			
 
				+	private:
			
 
				+		static btUint32_t high(btUint64_t value)
			
 
				+		{
			
 
				+			return (btUint32_t)(value >> 32);
			
 
				+		}
			
 
				+
			
 
				+		static btUint32_t low(btUint64_t value)
			
 
				+		{
			
 
				+			return (btUint32_t)value;
			
 
				+		}
			
 
				+
			
 
				+		static btUint64_t mul(btUint32_t a, btUint32_t b)
			
 
				+		{
			
 
				+			return (btUint64_t)a * (btUint64_t)b;
			
 
				+		}
			
 
				+
			
 
				+		static void shlHalf(btUint64_t& value)
			
 
				+		{
			
 
				+			value <<= 32;
			
 
				+		}
			
 
				+
			
 
				+		static btUint64_t high(Int128 value)
			
 
				+		{
			
 
				+			return value.high;
			
 
				+		}
			
 
				+
			
 
				+		static btUint64_t low(Int128 value)
			
 
				+		{
			
 
				+			return value.low;
			
 
				+		}
			
 
				+
			
 
				+		static Int128 mul(btUint64_t a, btUint64_t b)
			
 
				+		{
			
 
				+			return Int128::mul(a, b);
			
 
				+		}
			
 
				+
			
 
				+		static void shlHalf(Int128& value)
			
 
				+		{
			
 
				+			value.high = value.low;
			
 
				+			value.low = 0;
			
 
				+		}
			
 
				+
			
 
				+	public:
			
 
				+		static void mul(UWord a, UWord b, UWord& resLow, UWord& resHigh)
			
 
				+		{
			
 
				+			UWord p00 = mul(low(a), low(b));
			
 
				+			UWord p01 = mul(low(a), high(b));
			
 
				+			UWord p10 = mul(high(a), low(b));
			
 
				+			UWord p11 = mul(high(a), high(b));
			
 
				+			UWord p0110 = UWord(low(p01)) + UWord(low(p10));
			
 
				+			p11 += high(p01);
			
 
				+			p11 += high(p10);
			
 
				+			p11 += high(p0110);
			
 
				+			shlHalf(p0110);
			
 
				+			p00 += p0110;
			
 
				+			if (p00 < p0110)
			
 
				+			{
			
 
				+				++p11;
			
 
				+			}
			
 
				+			resLow = p00;
			
 
				+			resHigh = p11;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+private:
			
 
				+	class IntermediateHull
			
 
				+	{
			
 
				+	public:
			
 
				+		Vertex* minXy;
			
 
				+		Vertex* maxXy;
			
 
				+		Vertex* minYx;
			
 
				+		Vertex* maxYx;
			
 
				+
			
 
				+		IntermediateHull() : minXy(NULL), maxXy(NULL), minYx(NULL), maxYx(NULL)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		void print();
			
 
				+	};
			
 
				+
			
 
				+	enum Orientation
			
 
				+	{
			
 
				+		NONE,
			
 
				+		CLOCKWISE,
			
 
				+		COUNTER_CLOCKWISE
			
 
				+	};
			
 
				+
			
 
				+	template <typename T>
			
 
				+	class PoolArray
			
 
				+	{
			
 
				+	private:
			
 
				+		T* array;
			
 
				+		int size;
			
 
				+
			
 
				+	public:
			
 
				+		PoolArray<T>* next;
			
 
				+
			
 
				+		PoolArray(int size) : size(size), next(NULL)
			
 
				+		{
			
 
				+			array = (T*)b3AlignedAlloc(sizeof(T) * size, 16);
			
 
				+		}
			
 
				+
			
 
				+		~PoolArray()
			
 
				+		{
			
 
				+			b3AlignedFree(array);
			
 
				+		}
			
 
				+
			
 
				+		T* init()
			
 
				+		{
			
 
				+			T* o = array;
			
 
				+			for (int i = 0; i < size; i++, o++)
			
 
				+			{
			
 
				+				o->next = (i + 1 < size) ? o + 1 : NULL;
			
 
				+			}
			
 
				+			return array;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	template <typename T>
			
 
				+	class Pool
			
 
				+	{
			
 
				+	private:
			
 
				+		PoolArray<T>* arrays;
			
 
				+		PoolArray<T>* nextArray;
			
 
				+		T* freeObjects;
			
 
				+		int arraySize;
			
 
				+
			
 
				+	public:
			
 
				+		Pool() : arrays(NULL), nextArray(NULL), freeObjects(NULL), arraySize(256)
			
 
				+		{
			
 
				+		}
			
 
				+
			
 
				+		~Pool()
			
 
				+		{
			
 
				+			while (arrays)
			
 
				+			{
			
 
				+				PoolArray<T>* p = arrays;
			
 
				+				arrays = p->next;
			
 
				+				p->~PoolArray<T>();
			
 
				+				b3AlignedFree(p);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		void reset()
			
 
				+		{
			
 
				+			nextArray = arrays;
			
 
				+			freeObjects = NULL;
			
 
				+		}
			
 
				+
			
 
				+		void setArraySize(int arraySize)
			
 
				+		{
			
 
				+			this->arraySize = arraySize;
			
 
				+		}
			
 
				+
			
 
				+		T* newObject()
			
 
				+		{
			
 
				+			T* o = freeObjects;
			
 
				+			if (!o)
			
 
				+			{
			
 
				+				PoolArray<T>* p = nextArray;
			
 
				+				if (p)
			
 
				+				{
			
 
				+					nextArray = p->next;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					p = new (b3AlignedAlloc(sizeof(PoolArray<T>), 16)) PoolArray<T>(arraySize);
			
 
				+					p->next = arrays;
			
 
				+					arrays = p;
			
 
				+				}
			
 
				+				o = p->init();
			
 
				+			}
			
 
				+			freeObjects = o->next;
			
 
				+			return new (o) T();
			
 
				+		};
			
 
				+
			
 
				+		void freeObject(T* object)
			
 
				+		{
			
 
				+			object->~T();
			
 
				+			object->next = freeObjects;
			
 
				+			freeObjects = object;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	b3Vector3 scaling;
			
 
				+	b3Vector3 center;
			
 
				+	Pool<Vertex> vertexPool;
			
 
				+	Pool<Edge> edgePool;
			
 
				+	Pool<Face> facePool;
			
 
				+	b3AlignedObjectArray<Vertex*> originalVertices;
			
 
				+	int mergeStamp;
			
 
				+	int minAxis;
			
 
				+	int medAxis;
			
 
				+	int maxAxis;
			
 
				+	int usedEdgePairs;
			
 
				+	int maxUsedEdgePairs;
			
 
				+
			
 
				+	static Orientation getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t);
			
 
				+	Edge* findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot);
			
 
				+	void findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1);
			
 
				+
			
 
				+	Edge* newEdgePair(Vertex* from, Vertex* to);
			
 
				+
			
 
				+	void removeEdgePair(Edge* edge)
			
 
				+	{
			
 
				+		Edge* n = edge->next;
			
 
				+		Edge* r = edge->reverse;
			
 
				+
			
 
				+		b3Assert(edge->target && r->target);
			
 
				+
			
 
				+		if (n != edge)
			
 
				+		{
			
 
				+			n->prev = edge->prev;
			
 
				+			edge->prev->next = n;
			
 
				+			r->target->edges = n;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			r->target->edges = NULL;
			
 
				+		}
			
 
				+
			
 
				+		n = r->next;
			
 
				+
			
 
				+		if (n != r)
			
 
				+		{
			
 
				+			n->prev = r->prev;
			
 
				+			r->prev->next = n;
			
 
				+			edge->target->edges = n;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			edge->target->edges = NULL;
			
 
				+		}
			
 
				+
			
 
				+		edgePool.freeObject(edge);
			
 
				+		edgePool.freeObject(r);
			
 
				+		usedEdgePairs--;
			
 
				+	}
			
 
				+
			
 
				+	void computeInternal(int start, int end, IntermediateHull& result);
			
 
				+
			
 
				+	bool mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1);
			
 
				+
			
 
				+	void merge(IntermediateHull& h0, IntermediateHull& h1);
			
 
				+
			
 
				+	b3Vector3 toBtVector(const Point32& v);
			
 
				+
			
 
				+	b3Vector3 getBtNormal(Face* face);
			
 
				+
			
 
				+	bool shiftFace(Face* face, b3Scalar amount, b3AlignedObjectArray<Vertex*> stack);
			
 
				+
			
 
				+public:
			
 
				+	Vertex* vertexList;
			
 
				+
			
 
				+	void compute(const void* coords, bool doubleCoords, int stride, int count);
			
 
				+
			
 
				+	b3Vector3 getCoordinates(const Vertex* v);
			
 
				+
			
 
				+	b3Scalar shrink(b3Scalar amount, b3Scalar clampAmount);
			
 
				+};
			
 
				+
			
 
				+b3ConvexHullInternal::Int128 b3ConvexHullInternal::Int128::operator*(btInt64_t b) const
			
 
				+{
			
 
				+	bool negative = (btInt64_t)high < 0;
			
 
				+	Int128 a = negative ? -*this : *this;
			
 
				+	if (b < 0)
			
 
				+	{
			
 
				+		negative = !negative;
			
 
				+		b = -b;
			
 
				+	}
			
 
				+	Int128 result = mul(a.low, (btUint64_t)b);
			
 
				+	result.high += a.high * (btUint64_t)b;
			
 
				+	return negative ? -result : result;
			
 
				+}
			
 
				+
			
 
				+b3ConvexHullInternal::Int128 b3ConvexHullInternal::Int128::mul(btInt64_t a, btInt64_t b)
			
 
				+{
			
 
				+	Int128 result;
			
 
				+
			
 
				+#ifdef USE_X86_64_ASM
			
 
				+	__asm__("imulq %[b]"
			
 
				+			: "=a"(result.low), "=d"(result.high)
			
 
				+			: "0"(a), [b] "r"(b)
			
 
				+			: "cc");
			
 
				+	return result;
			
 
				+
			
 
				+#else
			
 
				+	bool negative = a < 0;
			
 
				+	if (negative)
			
 
				+	{
			
 
				+		a = -a;
			
 
				+	}
			
 
				+	if (b < 0)
			
 
				+	{
			
 
				+		negative = !negative;
			
 
				+		b = -b;
			
 
				+	}
			
 
				+	DMul<btUint64_t, btUint32_t>::mul((btUint64_t)a, (btUint64_t)b, result.low, result.high);
			
 
				+	return negative ? -result : result;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+b3ConvexHullInternal::Int128 b3ConvexHullInternal::Int128::mul(btUint64_t a, btUint64_t b)
			
 
				+{
			
 
				+	Int128 result;
			
 
				+
			
 
				+#ifdef USE_X86_64_ASM
			
 
				+	__asm__("mulq %[b]"
			
 
				+			: "=a"(result.low), "=d"(result.high)
			
 
				+			: "0"(a), [b] "r"(b)
			
 
				+			: "cc");
			
 
				+
			
 
				+#else
			
 
				+	DMul<btUint64_t, btUint32_t>::mul(a, b, result.low, result.high);
			
 
				+#endif
			
 
				+
			
 
				+	return result;
			
 
				+}
			
 
				+
			
 
				+int b3ConvexHullInternal::Rational64::compare(const Rational64& b) const
			
 
				+{
			
 
				+	if (sign != b.sign)
			
 
				+	{
			
 
				+		return sign - b.sign;
			
 
				+	}
			
 
				+	else if (sign == 0)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	//	return (numerator * b.denominator > b.numerator * denominator) ? sign : (numerator * b.denominator < b.numerator * denominator) ? -sign : 0;
			
 
				+
			
 
				+#ifdef USE_X86_64_ASM
			
 
				+
			
 
				+	int result;
			
 
				+	btInt64_t tmp;
			
 
				+	btInt64_t dummy;
			
 
				+	__asm__(
			
 
				+		"mulq %[bn]\n\t"
			
 
				+		"movq %%rax, %[tmp]\n\t"
			
 
				+		"movq %%rdx, %%rbx\n\t"
			
 
				+		"movq %[tn], %%rax\n\t"
			
 
				+		"mulq %[bd]\n\t"
			
 
				+		"subq %[tmp], %%rax\n\t"
			
 
				+		"sbbq %%rbx, %%rdx\n\t"  // rdx:rax contains 128-bit-difference "numerator*b.denominator - b.numerator*denominator"
			
 
				+		"setnsb %%bh\n\t"        // bh=1 if difference is non-negative, bh=0 otherwise
			
 
				+		"orq %%rdx, %%rax\n\t"
			
 
				+		"setnzb %%bl\n\t"      // bl=1 if difference if non-zero, bl=0 if it is zero
			
 
				+		"decb %%bh\n\t"        // now bx=0x0000 if difference is zero, 0xff01 if it is negative, 0x0001 if it is positive (i.e., same sign as difference)
			
 
				+		"shll $16, %%ebx\n\t"  // ebx has same sign as difference
			
 
				+		: "=&b"(result), [tmp] "=&r"(tmp), "=a"(dummy)
			
 
				+		: "a"(denominator), [bn] "g"(b.numerator), [tn] "g"(numerator), [bd] "g"(b.denominator)
			
 
				+		: "%rdx", "cc");
			
 
				+	return result ? result ^ sign  // if sign is +1, only bit 0 of result is inverted, which does not change the sign of result (and cannot result in zero)
			
 
				+								   // if sign is -1, all bits of result are inverted, which changes the sign of result (and again cannot result in zero)
			
 
				+				  : 0;
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+	return sign * Int128::mul(m_numerator, b.m_denominator).ucmp(Int128::mul(m_denominator, b.m_numerator));
			
 
				+
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+int b3ConvexHullInternal::Rational128::compare(const Rational128& b) const
			
 
				+{
			
 
				+	if (sign != b.sign)
			
 
				+	{
			
 
				+		return sign - b.sign;
			
 
				+	}
			
 
				+	else if (sign == 0)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (isInt64)
			
 
				+	{
			
 
				+		return -b.compare(sign * (btInt64_t)numerator.low);
			
 
				+	}
			
 
				+
			
 
				+	Int128 nbdLow, nbdHigh, dbnLow, dbnHigh;
			
 
				+	DMul<Int128, btUint64_t>::mul(numerator, b.denominator, nbdLow, nbdHigh);
			
 
				+	DMul<Int128, btUint64_t>::mul(denominator, b.numerator, dbnLow, dbnHigh);
			
 
				+
			
 
				+	int cmp = nbdHigh.ucmp(dbnHigh);
			
 
				+	if (cmp)
			
 
				+	{
			
 
				+		return cmp * sign;
			
 
				+	}
			
 
				+	return nbdLow.ucmp(dbnLow) * sign;
			
 
				+}
			
 
				+
			
 
				+int b3ConvexHullInternal::Rational128::compare(btInt64_t b) const
			
 
				+{
			
 
				+	if (isInt64)
			
 
				+	{
			
 
				+		btInt64_t a = sign * (btInt64_t)numerator.low;
			
 
				+		return (a > b) ? 1 : (a < b) ? -1 : 0;
			
 
				+	}
			
 
				+	if (b > 0)
			
 
				+	{
			
 
				+		if (sign <= 0)
			
 
				+		{
			
 
				+			return -1;
			
 
				+		}
			
 
				+	}
			
 
				+	else if (b < 0)
			
 
				+	{
			
 
				+		if (sign >= 0)
			
 
				+		{
			
 
				+			return 1;
			
 
				+		}
			
 
				+		b = -b;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return sign;
			
 
				+	}
			
 
				+
			
 
				+	return numerator.ucmp(denominator * b) * sign;
			
 
				+}
			
 
				+
			
 
				+b3ConvexHullInternal::Edge* b3ConvexHullInternal::newEdgePair(Vertex* from, Vertex* to)
			
 
				+{
			
 
				+	b3Assert(from && to);
			
 
				+	Edge* e = edgePool.newObject();
			
 
				+	Edge* r = edgePool.newObject();
			
 
				+	e->reverse = r;
			
 
				+	r->reverse = e;
			
 
				+	e->copy = mergeStamp;
			
 
				+	r->copy = mergeStamp;
			
 
				+	e->target = to;
			
 
				+	r->target = from;
			
 
				+	e->face = NULL;
			
 
				+	r->face = NULL;
			
 
				+	usedEdgePairs++;
			
 
				+	if (usedEdgePairs > maxUsedEdgePairs)
			
 
				+	{
			
 
				+		maxUsedEdgePairs = usedEdgePairs;
			
 
				+	}
			
 
				+	return e;
			
 
				+}
			
 
				+
			
 
				+bool b3ConvexHullInternal::mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1)
			
 
				+{
			
 
				+	Vertex* v0 = h0.maxYx;
			
 
				+	Vertex* v1 = h1.minYx;
			
 
				+	if ((v0->point.x == v1->point.x) && (v0->point.y == v1->point.y))
			
 
				+	{
			
 
				+		b3Assert(v0->point.z < v1->point.z);
			
 
				+		Vertex* v1p = v1->prev;
			
 
				+		if (v1p == v1)
			
 
				+		{
			
 
				+			c0 = v0;
			
 
				+			if (v1->edges)
			
 
				+			{
			
 
				+				b3Assert(v1->edges->next == v1->edges);
			
 
				+				v1 = v1->edges->target;
			
 
				+				b3Assert(v1->edges->next == v1->edges);
			
 
				+			}
			
 
				+			c1 = v1;
			
 
				+			return false;
			
 
				+		}
			
 
				+		Vertex* v1n = v1->next;
			
 
				+		v1p->next = v1n;
			
 
				+		v1n->prev = v1p;
			
 
				+		if (v1 == h1.minXy)
			
 
				+		{
			
 
				+			if ((v1n->point.x < v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y < v1p->point.y)))
			
 
				+			{
			
 
				+				h1.minXy = v1n;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				h1.minXy = v1p;
			
 
				+			}
			
 
				+		}
			
 
				+		if (v1 == h1.maxXy)
			
 
				+		{
			
 
				+			if ((v1n->point.x > v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y > v1p->point.y)))
			
 
				+			{
			
 
				+				h1.maxXy = v1n;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				h1.maxXy = v1p;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	v0 = h0.maxXy;
			
 
				+	v1 = h1.maxXy;
			
 
				+	Vertex* v00 = NULL;
			
 
				+	Vertex* v10 = NULL;
			
 
				+	btInt32_t sign = 1;
			
 
				+
			
 
				+	for (int side = 0; side <= 1; side++)
			
 
				+	{
			
 
				+		btInt32_t dx = (v1->point.x - v0->point.x) * sign;
			
 
				+		if (dx > 0)
			
 
				+		{
			
 
				+			while (true)
			
 
				+			{
			
 
				+				btInt32_t dy = v1->point.y - v0->point.y;
			
 
				+
			
 
				+				Vertex* w0 = side ? v0->next : v0->prev;
			
 
				+				if (w0 != v0)
			
 
				+				{
			
 
				+					btInt32_t dx0 = (w0->point.x - v0->point.x) * sign;
			
 
				+					btInt32_t dy0 = w0->point.y - v0->point.y;
			
 
				+					if ((dy0 <= 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx <= dy * dx0))))
			
 
				+					{
			
 
				+						v0 = w0;
			
 
				+						dx = (v1->point.x - v0->point.x) * sign;
			
 
				+						continue;
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				Vertex* w1 = side ? v1->next : v1->prev;
			
 
				+				if (w1 != v1)
			
 
				+				{
			
 
				+					btInt32_t dx1 = (w1->point.x - v1->point.x) * sign;
			
 
				+					btInt32_t dy1 = w1->point.y - v1->point.y;
			
 
				+					btInt32_t dxn = (w1->point.x - v0->point.x) * sign;
			
 
				+					if ((dxn > 0) && (dy1 < 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx < dy * dx1))))
			
 
				+					{
			
 
				+						v1 = w1;
			
 
				+						dx = dxn;
			
 
				+						continue;
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		else if (dx < 0)
			
 
				+		{
			
 
				+			while (true)
			
 
				+			{
			
 
				+				btInt32_t dy = v1->point.y - v0->point.y;
			
 
				+
			
 
				+				Vertex* w1 = side ? v1->prev : v1->next;
			
 
				+				if (w1 != v1)
			
 
				+				{
			
 
				+					btInt32_t dx1 = (w1->point.x - v1->point.x) * sign;
			
 
				+					btInt32_t dy1 = w1->point.y - v1->point.y;
			
 
				+					if ((dy1 >= 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx <= dy * dx1))))
			
 
				+					{
			
 
				+						v1 = w1;
			
 
				+						dx = (v1->point.x - v0->point.x) * sign;
			
 
				+						continue;
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				Vertex* w0 = side ? v0->prev : v0->next;
			
 
				+				if (w0 != v0)
			
 
				+				{
			
 
				+					btInt32_t dx0 = (w0->point.x - v0->point.x) * sign;
			
 
				+					btInt32_t dy0 = w0->point.y - v0->point.y;
			
 
				+					btInt32_t dxn = (v1->point.x - w0->point.x) * sign;
			
 
				+					if ((dxn < 0) && (dy0 > 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx < dy * dx0))))
			
 
				+					{
			
 
				+						v0 = w0;
			
 
				+						dx = dxn;
			
 
				+						continue;
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			btInt32_t x = v0->point.x;
			
 
				+			btInt32_t y0 = v0->point.y;
			
 
				+			Vertex* w0 = v0;
			
 
				+			Vertex* t;
			
 
				+			while (((t = side ? w0->next : w0->prev) != v0) && (t->point.x == x) && (t->point.y <= y0))
			
 
				+			{
			
 
				+				w0 = t;
			
 
				+				y0 = t->point.y;
			
 
				+			}
			
 
				+			v0 = w0;
			
 
				+
			
 
				+			btInt32_t y1 = v1->point.y;
			
 
				+			Vertex* w1 = v1;
			
 
				+			while (((t = side ? w1->prev : w1->next) != v1) && (t->point.x == x) && (t->point.y >= y1))
			
 
				+			{
			
 
				+				w1 = t;
			
 
				+				y1 = t->point.y;
			
 
				+			}
			
 
				+			v1 = w1;
			
 
				+		}
			
 
				+
			
 
				+		if (side == 0)
			
 
				+		{
			
 
				+			v00 = v0;
			
 
				+			v10 = v1;
			
 
				+
			
 
				+			v0 = h0.minXy;
			
 
				+			v1 = h1.minXy;
			
 
				+			sign = -1;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	v0->prev = v1;
			
 
				+	v1->next = v0;
			
 
				+
			
 
				+	v00->next = v10;
			
 
				+	v10->prev = v00;
			
 
				+
			
 
				+	if (h1.minXy->point.x < h0.minXy->point.x)
			
 
				+	{
			
 
				+		h0.minXy = h1.minXy;
			
 
				+	}
			
 
				+	if (h1.maxXy->point.x >= h0.maxXy->point.x)
			
 
				+	{
			
 
				+		h0.maxXy = h1.maxXy;
			
 
				+	}
			
 
				+
			
 
				+	h0.maxYx = h1.maxYx;
			
 
				+
			
 
				+	c0 = v00;
			
 
				+	c1 = v10;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+void b3ConvexHullInternal::computeInternal(int start, int end, IntermediateHull& result)
			
 
				+{
			
 
				+	int n = end - start;
			
 
				+	switch (n)
			
 
				+	{
			
 
				+		case 0:
			
 
				+			result.minXy = NULL;
			
 
				+			result.maxXy = NULL;
			
 
				+			result.minYx = NULL;
			
 
				+			result.maxYx = NULL;
			
 
				+			return;
			
 
				+		case 2:
			
 
				+		{
			
 
				+			Vertex* v = originalVertices[start];
			
 
				+			Vertex* w = v + 1;
			
 
				+			if (v->point != w->point)
			
 
				+			{
			
 
				+				btInt32_t dx = v->point.x - w->point.x;
			
 
				+				btInt32_t dy = v->point.y - w->point.y;
			
 
				+
			
 
				+				if ((dx == 0) && (dy == 0))
			
 
				+				{
			
 
				+					if (v->point.z > w->point.z)
			
 
				+					{
			
 
				+						Vertex* t = w;
			
 
				+						w = v;
			
 
				+						v = t;
			
 
				+					}
			
 
				+					b3Assert(v->point.z < w->point.z);
			
 
				+					v->next = v;
			
 
				+					v->prev = v;
			
 
				+					result.minXy = v;
			
 
				+					result.maxXy = v;
			
 
				+					result.minYx = v;
			
 
				+					result.maxYx = v;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					v->next = w;
			
 
				+					v->prev = w;
			
 
				+					w->next = v;
			
 
				+					w->prev = v;
			
 
				+
			
 
				+					if ((dx < 0) || ((dx == 0) && (dy < 0)))
			
 
				+					{
			
 
				+						result.minXy = v;
			
 
				+						result.maxXy = w;
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						result.minXy = w;
			
 
				+						result.maxXy = v;
			
 
				+					}
			
 
				+
			
 
				+					if ((dy < 0) || ((dy == 0) && (dx < 0)))
			
 
				+					{
			
 
				+						result.minYx = v;
			
 
				+						result.maxYx = w;
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						result.minYx = w;
			
 
				+						result.maxYx = v;
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				Edge* e = newEdgePair(v, w);
			
 
				+				e->link(e);
			
 
				+				v->edges = e;
			
 
				+
			
 
				+				e = e->reverse;
			
 
				+				e->link(e);
			
 
				+				w->edges = e;
			
 
				+
			
 
				+				return;
			
 
				+			}
			
 
				+		}
			
 
				+		// lint -fallthrough
			
 
				+		case 1:
			
 
				+		{
			
 
				+			Vertex* v = originalVertices[start];
			
 
				+			v->edges = NULL;
			
 
				+			v->next = v;
			
 
				+			v->prev = v;
			
 
				+
			
 
				+			result.minXy = v;
			
 
				+			result.maxXy = v;
			
 
				+			result.minYx = v;
			
 
				+			result.maxYx = v;
			
 
				+
			
 
				+			return;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int split0 = start + n / 2;
			
 
				+	Point32 p = originalVertices[split0 - 1]->point;
			
 
				+	int split1 = split0;
			
 
				+	while ((split1 < end) && (originalVertices[split1]->point == p))
			
 
				+	{
			
 
				+		split1++;
			
 
				+	}
			
 
				+	computeInternal(start, split0, result);
			
 
				+	IntermediateHull hull1;
			
 
				+	computeInternal(split1, end, hull1);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("\n\nMerge\n");
			
 
				+	result.print();
			
 
				+	hull1.print();
			
 
				+#endif
			
 
				+	merge(result, hull1);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("\n  Result\n");
			
 
				+	result.print();
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+void b3ConvexHullInternal::IntermediateHull::print()
			
 
				+{
			
 
				+	b3Printf("    Hull\n");
			
 
				+	for (Vertex* v = minXy; v;)
			
 
				+	{
			
 
				+		b3Printf("      ");
			
 
				+		v->print();
			
 
				+		if (v == maxXy)
			
 
				+		{
			
 
				+			b3Printf(" maxXy");
			
 
				+		}
			
 
				+		if (v == minYx)
			
 
				+		{
			
 
				+			b3Printf(" minYx");
			
 
				+		}
			
 
				+		if (v == maxYx)
			
 
				+		{
			
 
				+			b3Printf(" maxYx");
			
 
				+		}
			
 
				+		if (v->next->prev != v)
			
 
				+		{
			
 
				+			b3Printf(" Inconsistency");
			
 
				+		}
			
 
				+		b3Printf("\n");
			
 
				+		v = v->next;
			
 
				+		if (v == minXy)
			
 
				+		{
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	if (minXy)
			
 
				+	{
			
 
				+		minXy->copy = (minXy->copy == -1) ? -2 : -1;
			
 
				+		minXy->printGraph();
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3ConvexHullInternal::Vertex::printGraph()
			
 
				+{
			
 
				+	print();
			
 
				+	b3Printf("\nEdges\n");
			
 
				+	Edge* e = edges;
			
 
				+	if (e)
			
 
				+	{
			
 
				+		do
			
 
				+		{
			
 
				+			e->print();
			
 
				+			b3Printf("\n");
			
 
				+			e = e->next;
			
 
				+		} while (e != edges);
			
 
				+		do
			
 
				+		{
			
 
				+			Vertex* v = e->target;
			
 
				+			if (v->copy != copy)
			
 
				+			{
			
 
				+				v->copy = copy;
			
 
				+				v->printGraph();
			
 
				+			}
			
 
				+			e = e->next;
			
 
				+		} while (e != edges);
			
 
				+	}
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+b3ConvexHullInternal::Orientation b3ConvexHullInternal::getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t)
			
 
				+{
			
 
				+	b3Assert(prev->reverse->target == next->reverse->target);
			
 
				+	if (prev->next == next)
			
 
				+	{
			
 
				+		if (prev->prev == next)
			
 
				+		{
			
 
				+			Point64 n = t.cross(s);
			
 
				+			Point64 m = (*prev->target - *next->reverse->target).cross(*next->target - *next->reverse->target);
			
 
				+			b3Assert(!m.isZero());
			
 
				+			btInt64_t dot = n.dot(m);
			
 
				+			b3Assert(dot != 0);
			
 
				+			return (dot > 0) ? COUNTER_CLOCKWISE : CLOCKWISE;
			
 
				+		}
			
 
				+		return COUNTER_CLOCKWISE;
			
 
				+	}
			
 
				+	else if (prev->prev == next)
			
 
				+	{
			
 
				+		return CLOCKWISE;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return NONE;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+b3ConvexHullInternal::Edge* b3ConvexHullInternal::findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot)
			
 
				+{
			
 
				+	Edge* minEdge = NULL;
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("find max edge for %d\n", start->point.index);
			
 
				+#endif
			
 
				+	Edge* e = start->edges;
			
 
				+	if (e)
			
 
				+	{
			
 
				+		do
			
 
				+		{
			
 
				+			if (e->copy > mergeStamp)
			
 
				+			{
			
 
				+				Point32 t = *e->target - *start;
			
 
				+				Rational64 cot(t.dot(sxrxs), t.dot(rxs));
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+				b3Printf("      Angle is %f (%d) for ", (float)b3Atan(cot.toScalar()), (int)cot.isNaN());
			
 
				+				e->print();
			
 
				+#endif
			
 
				+				if (cot.isNaN())
			
 
				+				{
			
 
				+					b3Assert(ccw ? (t.dot(s) < 0) : (t.dot(s) > 0));
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					int cmp;
			
 
				+					if (minEdge == NULL)
			
 
				+					{
			
 
				+						minCot = cot;
			
 
				+						minEdge = e;
			
 
				+					}
			
 
				+					else if ((cmp = cot.compare(minCot)) < 0)
			
 
				+					{
			
 
				+						minCot = cot;
			
 
				+						minEdge = e;
			
 
				+					}
			
 
				+					else if ((cmp == 0) && (ccw == (getOrientation(minEdge, e, s, t) == COUNTER_CLOCKWISE)))
			
 
				+					{
			
 
				+						minEdge = e;
			
 
				+					}
			
 
				+				}
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+				b3Printf("\n");
			
 
				+#endif
			
 
				+			}
			
 
				+			e = e->next;
			
 
				+		} while (e != start->edges);
			
 
				+	}
			
 
				+	return minEdge;
			
 
				+}
			
 
				+
			
 
				+void b3ConvexHullInternal::findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1)
			
 
				+{
			
 
				+	Edge* start0 = e0;
			
 
				+	Edge* start1 = e1;
			
 
				+	Point32 et0 = start0 ? start0->target->point : c0->point;
			
 
				+	Point32 et1 = start1 ? start1->target->point : c1->point;
			
 
				+	Point32 s = c1->point - c0->point;
			
 
				+	Point64 normal = ((start0 ? start0 : start1)->target->point - c0->point).cross(s);
			
 
				+	btInt64_t dist = c0->point.dot(normal);
			
 
				+	b3Assert(!start1 || (start1->target->point.dot(normal) == dist));
			
 
				+	Point64 perp = s.cross(normal);
			
 
				+	b3Assert(!perp.isZero());
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("   Advancing %d %d  (%p %p, %d %d)\n", c0->point.index, c1->point.index, start0, start1, start0 ? start0->target->point.index : -1, start1 ? start1->target->point.index : -1);
			
 
				+#endif
			
 
				+
			
 
				+	btInt64_t maxDot0 = et0.dot(perp);
			
 
				+	if (e0)
			
 
				+	{
			
 
				+		while (e0->target != stop0)
			
 
				+		{
			
 
				+			Edge* e = e0->reverse->prev;
			
 
				+			if (e->target->point.dot(normal) < dist)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+			b3Assert(e->target->point.dot(normal) == dist);
			
 
				+			if (e->copy == mergeStamp)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+			btInt64_t dot = e->target->point.dot(perp);
			
 
				+			if (dot <= maxDot0)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+			maxDot0 = dot;
			
 
				+			e0 = e;
			
 
				+			et0 = e->target->point;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	btInt64_t maxDot1 = et1.dot(perp);
			
 
				+	if (e1)
			
 
				+	{
			
 
				+		while (e1->target != stop1)
			
 
				+		{
			
 
				+			Edge* e = e1->reverse->next;
			
 
				+			if (e->target->point.dot(normal) < dist)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+			b3Assert(e->target->point.dot(normal) == dist);
			
 
				+			if (e->copy == mergeStamp)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+			btInt64_t dot = e->target->point.dot(perp);
			
 
				+			if (dot <= maxDot1)
			
 
				+			{
			
 
				+				break;
			
 
				+			}
			
 
				+			maxDot1 = dot;
			
 
				+			e1 = e;
			
 
				+			et1 = e->target->point;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("   Starting at %d %d\n", et0.index, et1.index);
			
 
				+#endif
			
 
				+
			
 
				+	btInt64_t dx = maxDot1 - maxDot0;
			
 
				+	if (dx > 0)
			
 
				+	{
			
 
				+		while (true)
			
 
				+		{
			
 
				+			btInt64_t dy = (et1 - et0).dot(s);
			
 
				+
			
 
				+			if (e0 && (e0->target != stop0))
			
 
				+			{
			
 
				+				Edge* f0 = e0->next->reverse;
			
 
				+				if (f0->copy > mergeStamp)
			
 
				+				{
			
 
				+					btInt64_t dx0 = (f0->target->point - et0).dot(perp);
			
 
				+					btInt64_t dy0 = (f0->target->point - et0).dot(s);
			
 
				+					if ((dx0 == 0) ? (dy0 < 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) >= 0)))
			
 
				+					{
			
 
				+						et0 = f0->target->point;
			
 
				+						dx = (et1 - et0).dot(perp);
			
 
				+						e0 = (e0 == start0) ? NULL : f0;
			
 
				+						continue;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (e1 && (e1->target != stop1))
			
 
				+			{
			
 
				+				Edge* f1 = e1->reverse->next;
			
 
				+				if (f1->copy > mergeStamp)
			
 
				+				{
			
 
				+					Point32 d1 = f1->target->point - et1;
			
 
				+					if (d1.dot(normal) == 0)
			
 
				+					{
			
 
				+						btInt64_t dx1 = d1.dot(perp);
			
 
				+						btInt64_t dy1 = d1.dot(s);
			
 
				+						btInt64_t dxn = (f1->target->point - et0).dot(perp);
			
 
				+						if ((dxn > 0) && ((dx1 == 0) ? (dy1 < 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) > 0))))
			
 
				+						{
			
 
				+							e1 = f1;
			
 
				+							et1 = e1->target->point;
			
 
				+							dx = dxn;
			
 
				+							continue;
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						b3Assert((e1 == start1) && (d1.dot(normal) < 0));
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	else if (dx < 0)
			
 
				+	{
			
 
				+		while (true)
			
 
				+		{
			
 
				+			btInt64_t dy = (et1 - et0).dot(s);
			
 
				+
			
 
				+			if (e1 && (e1->target != stop1))
			
 
				+			{
			
 
				+				Edge* f1 = e1->prev->reverse;
			
 
				+				if (f1->copy > mergeStamp)
			
 
				+				{
			
 
				+					btInt64_t dx1 = (f1->target->point - et1).dot(perp);
			
 
				+					btInt64_t dy1 = (f1->target->point - et1).dot(s);
			
 
				+					if ((dx1 == 0) ? (dy1 > 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) <= 0)))
			
 
				+					{
			
 
				+						et1 = f1->target->point;
			
 
				+						dx = (et1 - et0).dot(perp);
			
 
				+						e1 = (e1 == start1) ? NULL : f1;
			
 
				+						continue;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (e0 && (e0->target != stop0))
			
 
				+			{
			
 
				+				Edge* f0 = e0->reverse->prev;
			
 
				+				if (f0->copy > mergeStamp)
			
 
				+				{
			
 
				+					Point32 d0 = f0->target->point - et0;
			
 
				+					if (d0.dot(normal) == 0)
			
 
				+					{
			
 
				+						btInt64_t dx0 = d0.dot(perp);
			
 
				+						btInt64_t dy0 = d0.dot(s);
			
 
				+						btInt64_t dxn = (et1 - f0->target->point).dot(perp);
			
 
				+						if ((dxn < 0) && ((dx0 == 0) ? (dy0 > 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) < 0))))
			
 
				+						{
			
 
				+							e0 = f0;
			
 
				+							et0 = e0->target->point;
			
 
				+							dx = dxn;
			
 
				+							continue;
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						b3Assert((e0 == start0) && (d0.dot(normal) < 0));
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("   Advanced edges to %d %d\n", et0.index, et1.index);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+void b3ConvexHullInternal::merge(IntermediateHull& h0, IntermediateHull& h1)
			
 
				+{
			
 
				+	if (!h1.maxXy)
			
 
				+	{
			
 
				+		return;
			
 
				+	}
			
 
				+	if (!h0.maxXy)
			
 
				+	{
			
 
				+		h0 = h1;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	mergeStamp--;
			
 
				+
			
 
				+	Vertex* c0 = NULL;
			
 
				+	Edge* toPrev0 = NULL;
			
 
				+	Edge* firstNew0 = NULL;
			
 
				+	Edge* pendingHead0 = NULL;
			
 
				+	Edge* pendingTail0 = NULL;
			
 
				+	Vertex* c1 = NULL;
			
 
				+	Edge* toPrev1 = NULL;
			
 
				+	Edge* firstNew1 = NULL;
			
 
				+	Edge* pendingHead1 = NULL;
			
 
				+	Edge* pendingTail1 = NULL;
			
 
				+	Point32 prevPoint;
			
 
				+
			
 
				+	if (mergeProjection(h0, h1, c0, c1))
			
 
				+	{
			
 
				+		Point32 s = *c1 - *c0;
			
 
				+		Point64 normal = Point32(0, 0, -1).cross(s);
			
 
				+		Point64 t = s.cross(normal);
			
 
				+		b3Assert(!t.isZero());
			
 
				+
			
 
				+		Edge* e = c0->edges;
			
 
				+		Edge* start0 = NULL;
			
 
				+		if (e)
			
 
				+		{
			
 
				+			do
			
 
				+			{
			
 
				+				btInt64_t dot = (*e->target - *c0).dot(normal);
			
 
				+				b3Assert(dot <= 0);
			
 
				+				if ((dot == 0) && ((*e->target - *c0).dot(t) > 0))
			
 
				+				{
			
 
				+					if (!start0 || (getOrientation(start0, e, s, Point32(0, 0, -1)) == CLOCKWISE))
			
 
				+					{
			
 
				+						start0 = e;
			
 
				+					}
			
 
				+				}
			
 
				+				e = e->next;
			
 
				+			} while (e != c0->edges);
			
 
				+		}
			
 
				+
			
 
				+		e = c1->edges;
			
 
				+		Edge* start1 = NULL;
			
 
				+		if (e)
			
 
				+		{
			
 
				+			do
			
 
				+			{
			
 
				+				btInt64_t dot = (*e->target - *c1).dot(normal);
			
 
				+				b3Assert(dot <= 0);
			
 
				+				if ((dot == 0) && ((*e->target - *c1).dot(t) > 0))
			
 
				+				{
			
 
				+					if (!start1 || (getOrientation(start1, e, s, Point32(0, 0, -1)) == COUNTER_CLOCKWISE))
			
 
				+					{
			
 
				+						start1 = e;
			
 
				+					}
			
 
				+				}
			
 
				+				e = e->next;
			
 
				+			} while (e != c1->edges);
			
 
				+		}
			
 
				+
			
 
				+		if (start0 || start1)
			
 
				+		{
			
 
				+			findEdgeForCoplanarFaces(c0, c1, start0, start1, NULL, NULL);
			
 
				+			if (start0)
			
 
				+			{
			
 
				+				c0 = start0->target;
			
 
				+			}
			
 
				+			if (start1)
			
 
				+			{
			
 
				+				c1 = start1->target;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		prevPoint = c1->point;
			
 
				+		prevPoint.z++;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		prevPoint = c1->point;
			
 
				+		prevPoint.x++;
			
 
				+	}
			
 
				+
			
 
				+	Vertex* first0 = c0;
			
 
				+	Vertex* first1 = c1;
			
 
				+	bool firstRun = true;
			
 
				+
			
 
				+	while (true)
			
 
				+	{
			
 
				+		Point32 s = *c1 - *c0;
			
 
				+		Point32 r = prevPoint - c0->point;
			
 
				+		Point64 rxs = r.cross(s);
			
 
				+		Point64 sxrxs = s.cross(rxs);
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+		b3Printf("\n  Checking %d %d\n", c0->point.index, c1->point.index);
			
 
				+#endif
			
 
				+		Rational64 minCot0(0, 0);
			
 
				+		Edge* min0 = findMaxAngle(false, c0, s, rxs, sxrxs, minCot0);
			
 
				+		Rational64 minCot1(0, 0);
			
 
				+		Edge* min1 = findMaxAngle(true, c1, s, rxs, sxrxs, minCot1);
			
 
				+		if (!min0 && !min1)
			
 
				+		{
			
 
				+			Edge* e = newEdgePair(c0, c1);
			
 
				+			e->link(e);
			
 
				+			c0->edges = e;
			
 
				+
			
 
				+			e = e->reverse;
			
 
				+			e->link(e);
			
 
				+			c1->edges = e;
			
 
				+			return;
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			int cmp = !min0 ? 1 : !min1 ? -1 : minCot0.compare(minCot1);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("    -> Result %d\n", cmp);
			
 
				+#endif
			
 
				+			if (firstRun || ((cmp >= 0) ? !minCot1.isNegativeInfinity() : !minCot0.isNegativeInfinity()))
			
 
				+			{
			
 
				+				Edge* e = newEdgePair(c0, c1);
			
 
				+				if (pendingTail0)
			
 
				+				{
			
 
				+					pendingTail0->prev = e;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					pendingHead0 = e;
			
 
				+				}
			
 
				+				e->next = pendingTail0;
			
 
				+				pendingTail0 = e;
			
 
				+
			
 
				+				e = e->reverse;
			
 
				+				if (pendingTail1)
			
 
				+				{
			
 
				+					pendingTail1->next = e;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					pendingHead1 = e;
			
 
				+				}
			
 
				+				e->prev = pendingTail1;
			
 
				+				pendingTail1 = e;
			
 
				+			}
			
 
				+
			
 
				+			Edge* e0 = min0;
			
 
				+			Edge* e1 = min1;
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("   Found min edges to %d %d\n", e0 ? e0->target->point.index : -1, e1 ? e1->target->point.index : -1);
			
 
				+#endif
			
 
				+
			
 
				+			if (cmp == 0)
			
 
				+			{
			
 
				+				findEdgeForCoplanarFaces(c0, c1, e0, e1, NULL, NULL);
			
 
				+			}
			
 
				+
			
 
				+			if ((cmp >= 0) && e1)
			
 
				+			{
			
 
				+				if (toPrev1)
			
 
				+				{
			
 
				+					for (Edge *e = toPrev1->next, *n = NULL; e != min1; e = n)
			
 
				+					{
			
 
				+						n = e->next;
			
 
				+						removeEdgePair(e);
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				if (pendingTail1)
			
 
				+				{
			
 
				+					if (toPrev1)
			
 
				+					{
			
 
				+						toPrev1->link(pendingHead1);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						min1->prev->link(pendingHead1);
			
 
				+						firstNew1 = pendingHead1;
			
 
				+					}
			
 
				+					pendingTail1->link(min1);
			
 
				+					pendingHead1 = NULL;
			
 
				+					pendingTail1 = NULL;
			
 
				+				}
			
 
				+				else if (!toPrev1)
			
 
				+				{
			
 
				+					firstNew1 = min1;
			
 
				+				}
			
 
				+
			
 
				+				prevPoint = c1->point;
			
 
				+				c1 = e1->target;
			
 
				+				toPrev1 = e1->reverse;
			
 
				+			}
			
 
				+
			
 
				+			if ((cmp <= 0) && e0)
			
 
				+			{
			
 
				+				if (toPrev0)
			
 
				+				{
			
 
				+					for (Edge *e = toPrev0->prev, *n = NULL; e != min0; e = n)
			
 
				+					{
			
 
				+						n = e->prev;
			
 
				+						removeEdgePair(e);
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				if (pendingTail0)
			
 
				+				{
			
 
				+					if (toPrev0)
			
 
				+					{
			
 
				+						pendingHead0->link(toPrev0);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						pendingHead0->link(min0->next);
			
 
				+						firstNew0 = pendingHead0;
			
 
				+					}
			
 
				+					min0->link(pendingTail0);
			
 
				+					pendingHead0 = NULL;
			
 
				+					pendingTail0 = NULL;
			
 
				+				}
			
 
				+				else if (!toPrev0)
			
 
				+				{
			
 
				+					firstNew0 = min0;
			
 
				+				}
			
 
				+
			
 
				+				prevPoint = c0->point;
			
 
				+				c0 = e0->target;
			
 
				+				toPrev0 = e0->reverse;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if ((c0 == first0) && (c1 == first1))
			
 
				+		{
			
 
				+			if (toPrev0 == NULL)
			
 
				+			{
			
 
				+				pendingHead0->link(pendingTail0);
			
 
				+				c0->edges = pendingTail0;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				for (Edge *e = toPrev0->prev, *n = NULL; e != firstNew0; e = n)
			
 
				+				{
			
 
				+					n = e->prev;
			
 
				+					removeEdgePair(e);
			
 
				+				}
			
 
				+				if (pendingTail0)
			
 
				+				{
			
 
				+					pendingHead0->link(toPrev0);
			
 
				+					firstNew0->link(pendingTail0);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (toPrev1 == NULL)
			
 
				+			{
			
 
				+				pendingTail1->link(pendingHead1);
			
 
				+				c1->edges = pendingTail1;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				for (Edge *e = toPrev1->next, *n = NULL; e != firstNew1; e = n)
			
 
				+				{
			
 
				+					n = e->next;
			
 
				+					removeEdgePair(e);
			
 
				+				}
			
 
				+				if (pendingTail1)
			
 
				+				{
			
 
				+					toPrev1->link(pendingHead1);
			
 
				+					pendingTail1->link(firstNew1);
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		firstRun = false;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static bool b3PointCmp(const b3ConvexHullInternal::Point32& p, const b3ConvexHullInternal::Point32& q)
			
 
				+{
			
 
				+	return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
			
 
				+}
			
 
				+
			
 
				+void b3ConvexHullInternal::compute(const void* coords, bool doubleCoords, int stride, int count)
			
 
				+{
			
 
				+	b3Vector3 min = b3MakeVector3(b3Scalar(1e30), b3Scalar(1e30), b3Scalar(1e30)), max = b3MakeVector3(b3Scalar(-1e30), b3Scalar(-1e30), b3Scalar(-1e30));
			
 
				+	const char* ptr = (const char*)coords;
			
 
				+	if (doubleCoords)
			
 
				+	{
			
 
				+		for (int i = 0; i < count; i++)
			
 
				+		{
			
 
				+			const double* v = (const double*)ptr;
			
 
				+			b3Vector3 p = b3MakeVector3((b3Scalar)v[0], (b3Scalar)v[1], (b3Scalar)v[2]);
			
 
				+			ptr += stride;
			
 
				+			min.setMin(p);
			
 
				+			max.setMax(p);
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		for (int i = 0; i < count; i++)
			
 
				+		{
			
 
				+			const float* v = (const float*)ptr;
			
 
				+			b3Vector3 p = b3MakeVector3(v[0], v[1], v[2]);
			
 
				+			ptr += stride;
			
 
				+			min.setMin(p);
			
 
				+			max.setMax(p);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3 s = max - min;
			
 
				+	maxAxis = s.maxAxis();
			
 
				+	minAxis = s.minAxis();
			
 
				+	if (minAxis == maxAxis)
			
 
				+	{
			
 
				+		minAxis = (maxAxis + 1) % 3;
			
 
				+	}
			
 
				+	medAxis = 3 - maxAxis - minAxis;
			
 
				+
			
 
				+	s /= b3Scalar(10216);
			
 
				+	if (((medAxis + 1) % 3) != maxAxis)
			
 
				+	{
			
 
				+		s *= -1;
			
 
				+	}
			
 
				+	scaling = s;
			
 
				+
			
 
				+	if (s[0] != 0)
			
 
				+	{
			
 
				+		s[0] = b3Scalar(1) / s[0];
			
 
				+	}
			
 
				+	if (s[1] != 0)
			
 
				+	{
			
 
				+		s[1] = b3Scalar(1) / s[1];
			
 
				+	}
			
 
				+	if (s[2] != 0)
			
 
				+	{
			
 
				+		s[2] = b3Scalar(1) / s[2];
			
 
				+	}
			
 
				+
			
 
				+	center = (min + max) * b3Scalar(0.5);
			
 
				+
			
 
				+	b3AlignedObjectArray<Point32> points;
			
 
				+	points.resize(count);
			
 
				+	ptr = (const char*)coords;
			
 
				+	if (doubleCoords)
			
 
				+	{
			
 
				+		for (int i = 0; i < count; i++)
			
 
				+		{
			
 
				+			const double* v = (const double*)ptr;
			
 
				+			b3Vector3 p = b3MakeVector3((b3Scalar)v[0], (b3Scalar)v[1], (b3Scalar)v[2]);
			
 
				+			ptr += stride;
			
 
				+			p = (p - center) * s;
			
 
				+			points[i].x = (btInt32_t)p[medAxis];
			
 
				+			points[i].y = (btInt32_t)p[maxAxis];
			
 
				+			points[i].z = (btInt32_t)p[minAxis];
			
 
				+			points[i].index = i;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		for (int i = 0; i < count; i++)
			
 
				+		{
			
 
				+			const float* v = (const float*)ptr;
			
 
				+			b3Vector3 p = b3MakeVector3(v[0], v[1], v[2]);
			
 
				+			ptr += stride;
			
 
				+			p = (p - center) * s;
			
 
				+			points[i].x = (btInt32_t)p[medAxis];
			
 
				+			points[i].y = (btInt32_t)p[maxAxis];
			
 
				+			points[i].z = (btInt32_t)p[minAxis];
			
 
				+			points[i].index = i;
			
 
				+		}
			
 
				+	}
			
 
				+	points.quickSort(b3PointCmp);
			
 
				+
			
 
				+	vertexPool.reset();
			
 
				+	vertexPool.setArraySize(count);
			
 
				+	originalVertices.resize(count);
			
 
				+	for (int i = 0; i < count; i++)
			
 
				+	{
			
 
				+		Vertex* v = vertexPool.newObject();
			
 
				+		v->edges = NULL;
			
 
				+		v->point = points[i];
			
 
				+		v->copy = -1;
			
 
				+		originalVertices[i] = v;
			
 
				+	}
			
 
				+
			
 
				+	points.clear();
			
 
				+
			
 
				+	edgePool.reset();
			
 
				+	edgePool.setArraySize(6 * count);
			
 
				+
			
 
				+	usedEdgePairs = 0;
			
 
				+	maxUsedEdgePairs = 0;
			
 
				+
			
 
				+	mergeStamp = -3;
			
 
				+
			
 
				+	IntermediateHull hull;
			
 
				+	computeInternal(0, count, hull);
			
 
				+	vertexList = hull.minXy;
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("max. edges %d (3v = %d)", maxUsedEdgePairs, 3 * count);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+b3Vector3 b3ConvexHullInternal::toBtVector(const Point32& v)
			
 
				+{
			
 
				+	b3Vector3 p;
			
 
				+	p[medAxis] = b3Scalar(v.x);
			
 
				+	p[maxAxis] = b3Scalar(v.y);
			
 
				+	p[minAxis] = b3Scalar(v.z);
			
 
				+	return p * scaling;
			
 
				+}
			
 
				+
			
 
				+b3Vector3 b3ConvexHullInternal::getBtNormal(Face* face)
			
 
				+{
			
 
				+	return toBtVector(face->dir0).cross(toBtVector(face->dir1)).normalized();
			
 
				+}
			
 
				+
			
 
				+b3Vector3 b3ConvexHullInternal::getCoordinates(const Vertex* v)
			
 
				+{
			
 
				+	b3Vector3 p;
			
 
				+	p[medAxis] = v->xvalue();
			
 
				+	p[maxAxis] = v->yvalue();
			
 
				+	p[minAxis] = v->zvalue();
			
 
				+	return p * scaling + center;
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3ConvexHullInternal::shrink(b3Scalar amount, b3Scalar clampAmount)
			
 
				+{
			
 
				+	if (!vertexList)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+	int stamp = --mergeStamp;
			
 
				+	b3AlignedObjectArray<Vertex*> stack;
			
 
				+	vertexList->copy = stamp;
			
 
				+	stack.push_back(vertexList);
			
 
				+	b3AlignedObjectArray<Face*> faces;
			
 
				+
			
 
				+	Point32 ref = vertexList->point;
			
 
				+	Int128 hullCenterX(0, 0);
			
 
				+	Int128 hullCenterY(0, 0);
			
 
				+	Int128 hullCenterZ(0, 0);
			
 
				+	Int128 volume(0, 0);
			
 
				+
			
 
				+	while (stack.size() > 0)
			
 
				+	{
			
 
				+		Vertex* v = stack[stack.size() - 1];
			
 
				+		stack.pop_back();
			
 
				+		Edge* e = v->edges;
			
 
				+		if (e)
			
 
				+		{
			
 
				+			do
			
 
				+			{
			
 
				+				if (e->target->copy != stamp)
			
 
				+				{
			
 
				+					e->target->copy = stamp;
			
 
				+					stack.push_back(e->target);
			
 
				+				}
			
 
				+				if (e->copy != stamp)
			
 
				+				{
			
 
				+					Face* face = facePool.newObject();
			
 
				+					face->init(e->target, e->reverse->prev->target, v);
			
 
				+					faces.push_back(face);
			
 
				+					Edge* f = e;
			
 
				+
			
 
				+					Vertex* a = NULL;
			
 
				+					Vertex* b = NULL;
			
 
				+					do
			
 
				+					{
			
 
				+						if (a && b)
			
 
				+						{
			
 
				+							btInt64_t vol = (v->point - ref).dot((a->point - ref).cross(b->point - ref));
			
 
				+							b3Assert(vol >= 0);
			
 
				+							Point32 c = v->point + a->point + b->point + ref;
			
 
				+							hullCenterX += vol * c.x;
			
 
				+							hullCenterY += vol * c.y;
			
 
				+							hullCenterZ += vol * c.z;
			
 
				+							volume += vol;
			
 
				+						}
			
 
				+
			
 
				+						b3Assert(f->copy != stamp);
			
 
				+						f->copy = stamp;
			
 
				+						f->face = face;
			
 
				+
			
 
				+						a = b;
			
 
				+						b = f->target;
			
 
				+
			
 
				+						f = f->reverse->prev;
			
 
				+					} while (f != e);
			
 
				+				}
			
 
				+				e = e->next;
			
 
				+			} while (e != v->edges);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (volume.getSign() <= 0)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3 hullCenter;
			
 
				+	hullCenter[medAxis] = hullCenterX.toScalar();
			
 
				+	hullCenter[maxAxis] = hullCenterY.toScalar();
			
 
				+	hullCenter[minAxis] = hullCenterZ.toScalar();
			
 
				+	hullCenter /= 4 * volume.toScalar();
			
 
				+	hullCenter *= scaling;
			
 
				+
			
 
				+	int faceCount = faces.size();
			
 
				+
			
 
				+	if (clampAmount > 0)
			
 
				+	{
			
 
				+		b3Scalar minDist = B3_INFINITY;
			
 
				+		for (int i = 0; i < faceCount; i++)
			
 
				+		{
			
 
				+			b3Vector3 normal = getBtNormal(faces[i]);
			
 
				+			b3Scalar dist = normal.dot(toBtVector(faces[i]->origin) - hullCenter);
			
 
				+			if (dist < minDist)
			
 
				+			{
			
 
				+				minDist = dist;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (minDist <= 0)
			
 
				+		{
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		amount = b3Min(amount, minDist * clampAmount);
			
 
				+	}
			
 
				+
			
 
				+	unsigned int seed = 243703;
			
 
				+	for (int i = 0; i < faceCount; i++, seed = 1664525 * seed + 1013904223)
			
 
				+	{
			
 
				+		b3Swap(faces[i], faces[seed % faceCount]);
			
 
				+	}
			
 
				+
			
 
				+	for (int i = 0; i < faceCount; i++)
			
 
				+	{
			
 
				+		if (!shiftFace(faces[i], amount, stack))
			
 
				+		{
			
 
				+			return -amount;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return amount;
			
 
				+}
			
 
				+
			
 
				+bool b3ConvexHullInternal::shiftFace(Face* face, b3Scalar amount, b3AlignedObjectArray<Vertex*> stack)
			
 
				+{
			
 
				+	b3Vector3 origShift = getBtNormal(face) * -amount;
			
 
				+	if (scaling[0] != 0)
			
 
				+	{
			
 
				+		origShift[0] /= scaling[0];
			
 
				+	}
			
 
				+	if (scaling[1] != 0)
			
 
				+	{
			
 
				+		origShift[1] /= scaling[1];
			
 
				+	}
			
 
				+	if (scaling[2] != 0)
			
 
				+	{
			
 
				+		origShift[2] /= scaling[2];
			
 
				+	}
			
 
				+	Point32 shift((btInt32_t)origShift[medAxis], (btInt32_t)origShift[maxAxis], (btInt32_t)origShift[minAxis]);
			
 
				+	if (shift.isZero())
			
 
				+	{
			
 
				+		return true;
			
 
				+	}
			
 
				+	Point64 normal = face->getNormal();
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("\nShrinking face (%d %d %d) (%d %d %d) (%d %d %d) by (%d %d %d)\n",
			
 
				+			 face->origin.x, face->origin.y, face->origin.z, face->dir0.x, face->dir0.y, face->dir0.z, face->dir1.x, face->dir1.y, face->dir1.z, shift.x, shift.y, shift.z);
			
 
				+#endif
			
 
				+	btInt64_t origDot = face->origin.dot(normal);
			
 
				+	Point32 shiftedOrigin = face->origin + shift;
			
 
				+	btInt64_t shiftedDot = shiftedOrigin.dot(normal);
			
 
				+	b3Assert(shiftedDot <= origDot);
			
 
				+	if (shiftedDot >= origDot)
			
 
				+	{
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	Edge* intersection = NULL;
			
 
				+
			
 
				+	Edge* startEdge = face->nearbyVertex->edges;
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("Start edge is ");
			
 
				+	startEdge->print();
			
 
				+	b3Printf(", normal is (%lld %lld %lld), shifted dot is %lld\n", normal.x, normal.y, normal.z, shiftedDot);
			
 
				+#endif
			
 
				+	Rational128 optDot = face->nearbyVertex->dot(normal);
			
 
				+	int cmp = optDot.compare(shiftedDot);
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+	int n = 0;
			
 
				+#endif
			
 
				+	if (cmp >= 0)
			
 
				+	{
			
 
				+		Edge* e = startEdge;
			
 
				+		do
			
 
				+		{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+			n++;
			
 
				+#endif
			
 
				+			Rational128 dot = e->target->dot(normal);
			
 
				+			b3Assert(dot.compare(origDot) <= 0);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("Moving downwards, edge is ");
			
 
				+			e->print();
			
 
				+			b3Printf(", dot is %f (%f %lld)\n", (float)dot.toScalar(), (float)optDot.toScalar(), shiftedDot);
			
 
				+#endif
			
 
				+			if (dot.compare(optDot) < 0)
			
 
				+			{
			
 
				+				int c = dot.compare(shiftedDot);
			
 
				+				optDot = dot;
			
 
				+				e = e->reverse;
			
 
				+				startEdge = e;
			
 
				+				if (c < 0)
			
 
				+				{
			
 
				+					intersection = e;
			
 
				+					break;
			
 
				+				}
			
 
				+				cmp = c;
			
 
				+			}
			
 
				+			e = e->prev;
			
 
				+		} while (e != startEdge);
			
 
				+
			
 
				+		if (!intersection)
			
 
				+		{
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		Edge* e = startEdge;
			
 
				+		do
			
 
				+		{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+			n++;
			
 
				+#endif
			
 
				+			Rational128 dot = e->target->dot(normal);
			
 
				+			b3Assert(dot.compare(origDot) <= 0);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("Moving upwards, edge is ");
			
 
				+			e->print();
			
 
				+			b3Printf(", dot is %f (%f %lld)\n", (float)dot.toScalar(), (float)optDot.toScalar(), shiftedDot);
			
 
				+#endif
			
 
				+			if (dot.compare(optDot) > 0)
			
 
				+			{
			
 
				+				cmp = dot.compare(shiftedDot);
			
 
				+				if (cmp >= 0)
			
 
				+				{
			
 
				+					intersection = e;
			
 
				+					break;
			
 
				+				}
			
 
				+				optDot = dot;
			
 
				+				e = e->reverse;
			
 
				+				startEdge = e;
			
 
				+			}
			
 
				+			e = e->prev;
			
 
				+		} while (e != startEdge);
			
 
				+
			
 
				+		if (!intersection)
			
 
				+		{
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+	b3Printf("Needed %d iterations to find initial intersection\n", n);
			
 
				+#endif
			
 
				+
			
 
				+	if (cmp == 0)
			
 
				+	{
			
 
				+		Edge* e = intersection->reverse->next;
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+		n = 0;
			
 
				+#endif
			
 
				+		while (e->target->dot(normal).compare(shiftedDot) <= 0)
			
 
				+		{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+			n++;
			
 
				+#endif
			
 
				+			e = e->next;
			
 
				+			if (e == intersection->reverse)
			
 
				+			{
			
 
				+				return true;
			
 
				+			}
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("Checking for outwards edge, current edge is ");
			
 
				+			e->print();
			
 
				+			b3Printf("\n");
			
 
				+#endif
			
 
				+		}
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+		b3Printf("Needed %d iterations to check for complete containment\n", n);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	Edge* firstIntersection = NULL;
			
 
				+	Edge* faceEdge = NULL;
			
 
				+	Edge* firstFaceEdge = NULL;
			
 
				+
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+	int m = 0;
			
 
				+#endif
			
 
				+	while (true)
			
 
				+	{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+		m++;
			
 
				+#endif
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+		b3Printf("Intersecting edge is ");
			
 
				+		intersection->print();
			
 
				+		b3Printf("\n");
			
 
				+#endif
			
 
				+		if (cmp == 0)
			
 
				+		{
			
 
				+			Edge* e = intersection->reverse->next;
			
 
				+			startEdge = e;
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+			n = 0;
			
 
				+#endif
			
 
				+			while (true)
			
 
				+			{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+				n++;
			
 
				+#endif
			
 
				+				if (e->target->dot(normal).compare(shiftedDot) >= 0)
			
 
				+				{
			
 
				+					break;
			
 
				+				}
			
 
				+				intersection = e->reverse;
			
 
				+				e = e->next;
			
 
				+				if (e == startEdge)
			
 
				+				{
			
 
				+					return true;
			
 
				+				}
			
 
				+			}
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+			b3Printf("Needed %d iterations to advance intersection\n", n);
			
 
				+#endif
			
 
				+		}
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+		b3Printf("Advanced intersecting edge to ");
			
 
				+		intersection->print();
			
 
				+		b3Printf(", cmp = %d\n", cmp);
			
 
				+#endif
			
 
				+
			
 
				+		if (!firstIntersection)
			
 
				+		{
			
 
				+			firstIntersection = intersection;
			
 
				+		}
			
 
				+		else if (intersection == firstIntersection)
			
 
				+		{
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		int prevCmp = cmp;
			
 
				+		Edge* prevIntersection = intersection;
			
 
				+		Edge* prevFaceEdge = faceEdge;
			
 
				+
			
 
				+		Edge* e = intersection->reverse;
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+		n = 0;
			
 
				+#endif
			
 
				+		while (true)
			
 
				+		{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+			n++;
			
 
				+#endif
			
 
				+			e = e->reverse->prev;
			
 
				+			b3Assert(e != intersection->reverse);
			
 
				+			cmp = e->target->dot(normal).compare(shiftedDot);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("Testing edge ");
			
 
				+			e->print();
			
 
				+			b3Printf(" -> cmp = %d\n", cmp);
			
 
				+#endif
			
 
				+			if (cmp >= 0)
			
 
				+			{
			
 
				+				intersection = e;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+		b3Printf("Needed %d iterations to find other intersection of face\n", n);
			
 
				+#endif
			
 
				+
			
 
				+		if (cmp > 0)
			
 
				+		{
			
 
				+			Vertex* removed = intersection->target;
			
 
				+			e = intersection->reverse;
			
 
				+			if (e->prev == e)
			
 
				+			{
			
 
				+				removed->edges = NULL;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				removed->edges = e->prev;
			
 
				+				e->prev->link(e->next);
			
 
				+				e->link(e);
			
 
				+			}
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("1: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
			
 
				+#endif
			
 
				+
			
 
				+			Point64 n0 = intersection->face->getNormal();
			
 
				+			Point64 n1 = intersection->reverse->face->getNormal();
			
 
				+			btInt64_t m00 = face->dir0.dot(n0);
			
 
				+			btInt64_t m01 = face->dir1.dot(n0);
			
 
				+			btInt64_t m10 = face->dir0.dot(n1);
			
 
				+			btInt64_t m11 = face->dir1.dot(n1);
			
 
				+			btInt64_t r0 = (intersection->face->origin - shiftedOrigin).dot(n0);
			
 
				+			btInt64_t r1 = (intersection->reverse->face->origin - shiftedOrigin).dot(n1);
			
 
				+			Int128 det = Int128::mul(m00, m11) - Int128::mul(m01, m10);
			
 
				+			b3Assert(det.getSign() != 0);
			
 
				+			Vertex* v = vertexPool.newObject();
			
 
				+			v->point.index = -1;
			
 
				+			v->copy = -1;
			
 
				+			v->point128 = PointR128(Int128::mul(face->dir0.x * r0, m11) - Int128::mul(face->dir0.x * r1, m01) + Int128::mul(face->dir1.x * r1, m00) - Int128::mul(face->dir1.x * r0, m10) + det * shiftedOrigin.x,
			
 
				+									Int128::mul(face->dir0.y * r0, m11) - Int128::mul(face->dir0.y * r1, m01) + Int128::mul(face->dir1.y * r1, m00) - Int128::mul(face->dir1.y * r0, m10) + det * shiftedOrigin.y,
			
 
				+									Int128::mul(face->dir0.z * r0, m11) - Int128::mul(face->dir0.z * r1, m01) + Int128::mul(face->dir1.z * r1, m00) - Int128::mul(face->dir1.z * r0, m10) + det * shiftedOrigin.z,
			
 
				+									det);
			
 
				+			v->point.x = (btInt32_t)v->point128.xvalue();
			
 
				+			v->point.y = (btInt32_t)v->point128.yvalue();
			
 
				+			v->point.z = (btInt32_t)v->point128.zvalue();
			
 
				+			intersection->target = v;
			
 
				+			v->edges = e;
			
 
				+
			
 
				+			stack.push_back(v);
			
 
				+			stack.push_back(removed);
			
 
				+			stack.push_back(NULL);
			
 
				+		}
			
 
				+
			
 
				+		if (cmp || prevCmp || (prevIntersection->reverse->next->target != intersection->target))
			
 
				+		{
			
 
				+			faceEdge = newEdgePair(prevIntersection->target, intersection->target);
			
 
				+			if (prevCmp == 0)
			
 
				+			{
			
 
				+				faceEdge->link(prevIntersection->reverse->next);
			
 
				+			}
			
 
				+			if ((prevCmp == 0) || prevFaceEdge)
			
 
				+			{
			
 
				+				prevIntersection->reverse->link(faceEdge);
			
 
				+			}
			
 
				+			if (cmp == 0)
			
 
				+			{
			
 
				+				intersection->reverse->prev->link(faceEdge->reverse);
			
 
				+			}
			
 
				+			faceEdge->reverse->link(intersection->reverse);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			faceEdge = prevIntersection->reverse->next;
			
 
				+		}
			
 
				+
			
 
				+		if (prevFaceEdge)
			
 
				+		{
			
 
				+			if (prevCmp > 0)
			
 
				+			{
			
 
				+				faceEdge->link(prevFaceEdge->reverse);
			
 
				+			}
			
 
				+			else if (faceEdge != prevFaceEdge->reverse)
			
 
				+			{
			
 
				+				stack.push_back(prevFaceEdge->target);
			
 
				+				while (faceEdge->next != prevFaceEdge->reverse)
			
 
				+				{
			
 
				+					Vertex* removed = faceEdge->next->target;
			
 
				+					removeEdgePair(faceEdge->next);
			
 
				+					stack.push_back(removed);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+					b3Printf("2: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
			
 
				+#endif
			
 
				+				}
			
 
				+				stack.push_back(NULL);
			
 
				+			}
			
 
				+		}
			
 
				+		faceEdge->face = face;
			
 
				+		faceEdge->reverse->face = intersection->face;
			
 
				+
			
 
				+		if (!firstFaceEdge)
			
 
				+		{
			
 
				+			firstFaceEdge = faceEdge;
			
 
				+		}
			
 
				+	}
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+	b3Printf("Needed %d iterations to process all intersections\n", m);
			
 
				+#endif
			
 
				+
			
 
				+	if (cmp > 0)
			
 
				+	{
			
 
				+		firstFaceEdge->reverse->target = faceEdge->target;
			
 
				+		firstIntersection->reverse->link(firstFaceEdge);
			
 
				+		firstFaceEdge->link(faceEdge->reverse);
			
 
				+	}
			
 
				+	else if (firstFaceEdge != faceEdge->reverse)
			
 
				+	{
			
 
				+		stack.push_back(faceEdge->target);
			
 
				+		while (firstFaceEdge->next != faceEdge->reverse)
			
 
				+		{
			
 
				+			Vertex* removed = firstFaceEdge->next->target;
			
 
				+			removeEdgePair(firstFaceEdge->next);
			
 
				+			stack.push_back(removed);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			b3Printf("3: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
			
 
				+#endif
			
 
				+		}
			
 
				+		stack.push_back(NULL);
			
 
				+	}
			
 
				+
			
 
				+	b3Assert(stack.size() > 0);
			
 
				+	vertexList = stack[0];
			
 
				+
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+	b3Printf("Removing part\n");
			
 
				+#endif
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+	n = 0;
			
 
				+#endif
			
 
				+	int pos = 0;
			
 
				+	while (pos < stack.size())
			
 
				+	{
			
 
				+		int end = stack.size();
			
 
				+		while (pos < end)
			
 
				+		{
			
 
				+			Vertex* kept = stack[pos++];
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+			kept->print();
			
 
				+#endif
			
 
				+			bool deeper = false;
			
 
				+			Vertex* removed;
			
 
				+			while ((removed = stack[pos++]) != NULL)
			
 
				+			{
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+				n++;
			
 
				+#endif
			
 
				+				kept->receiveNearbyFaces(removed);
			
 
				+				while (removed->edges)
			
 
				+				{
			
 
				+					if (!deeper)
			
 
				+					{
			
 
				+						deeper = true;
			
 
				+						stack.push_back(kept);
			
 
				+					}
			
 
				+					stack.push_back(removed->edges->target);
			
 
				+					removeEdgePair(removed->edges);
			
 
				+				}
			
 
				+			}
			
 
				+			if (deeper)
			
 
				+			{
			
 
				+				stack.push_back(NULL);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+#ifdef SHOW_ITERATIONS
			
 
				+	b3Printf("Needed %d iterations to remove part\n", n);
			
 
				+#endif
			
 
				+
			
 
				+	stack.resize(0);
			
 
				+	face->origin = shiftedOrigin;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static int getVertexCopy(b3ConvexHullInternal::Vertex* vertex, b3AlignedObjectArray<b3ConvexHullInternal::Vertex*>& vertices)
			
 
				+{
			
 
				+	int index = vertex->copy;
			
 
				+	if (index < 0)
			
 
				+	{
			
 
				+		index = vertices.size();
			
 
				+		vertex->copy = index;
			
 
				+		vertices.push_back(vertex);
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+		b3Printf("Vertex %d gets index *%d\n", vertex->point.index, index);
			
 
				+#endif
			
 
				+	}
			
 
				+	return index;
			
 
				+}
			
 
				+
			
 
				+b3Scalar b3ConvexHullComputer::compute(const void* coords, bool doubleCoords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
			
 
				+{
			
 
				+	if (count <= 0)
			
 
				+	{
			
 
				+		vertices.clear();
			
 
				+		edges.clear();
			
 
				+		faces.clear();
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	b3ConvexHullInternal hull;
			
 
				+	hull.compute(coords, doubleCoords, stride, count);
			
 
				+
			
 
				+	b3Scalar shift = 0;
			
 
				+	if ((shrink > 0) && ((shift = hull.shrink(shrink, shrinkClamp)) < 0))
			
 
				+	{
			
 
				+		vertices.clear();
			
 
				+		edges.clear();
			
 
				+		faces.clear();
			
 
				+		return shift;
			
 
				+	}
			
 
				+
			
 
				+	vertices.resize(0);
			
 
				+	edges.resize(0);
			
 
				+	faces.resize(0);
			
 
				+
			
 
				+	b3AlignedObjectArray<b3ConvexHullInternal::Vertex*> oldVertices;
			
 
				+	getVertexCopy(hull.vertexList, oldVertices);
			
 
				+	int copied = 0;
			
 
				+	while (copied < oldVertices.size())
			
 
				+	{
			
 
				+		b3ConvexHullInternal::Vertex* v = oldVertices[copied];
			
 
				+		vertices.push_back(hull.getCoordinates(v));
			
 
				+		b3ConvexHullInternal::Edge* firstEdge = v->edges;
			
 
				+		if (firstEdge)
			
 
				+		{
			
 
				+			int firstCopy = -1;
			
 
				+			int prevCopy = -1;
			
 
				+			b3ConvexHullInternal::Edge* e = firstEdge;
			
 
				+			do
			
 
				+			{
			
 
				+				if (e->copy < 0)
			
 
				+				{
			
 
				+					int s = edges.size();
			
 
				+					edges.push_back(Edge());
			
 
				+					edges.push_back(Edge());
			
 
				+					Edge* c = &edges[s];
			
 
				+					Edge* r = &edges[s + 1];
			
 
				+					e->copy = s;
			
 
				+					e->reverse->copy = s + 1;
			
 
				+					c->reverse = 1;
			
 
				+					r->reverse = -1;
			
 
				+					c->targetVertex = getVertexCopy(e->target, oldVertices);
			
 
				+					r->targetVertex = copied;
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+					b3Printf("      CREATE: Vertex *%d has edge to *%d\n", copied, c->getTargetVertex());
			
 
				+#endif
			
 
				+				}
			
 
				+				if (prevCopy >= 0)
			
 
				+				{
			
 
				+					edges[e->copy].next = prevCopy - e->copy;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					firstCopy = e->copy;
			
 
				+				}
			
 
				+				prevCopy = e->copy;
			
 
				+				e = e->next;
			
 
				+			} while (e != firstEdge);
			
 
				+			edges[firstCopy].next = prevCopy - firstCopy;
			
 
				+		}
			
 
				+		copied++;
			
 
				+	}
			
 
				+
			
 
				+	for (int i = 0; i < copied; i++)
			
 
				+	{
			
 
				+		b3ConvexHullInternal::Vertex* v = oldVertices[i];
			
 
				+		b3ConvexHullInternal::Edge* firstEdge = v->edges;
			
 
				+		if (firstEdge)
			
 
				+		{
			
 
				+			b3ConvexHullInternal::Edge* e = firstEdge;
			
 
				+			do
			
 
				+			{
			
 
				+				if (e->copy >= 0)
			
 
				+				{
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+					b3Printf("Vertex *%d has edge to *%d\n", i, edges[e->copy].getTargetVertex());
			
 
				+#endif
			
 
				+					faces.push_back(e->copy);
			
 
				+					b3ConvexHullInternal::Edge* f = e;
			
 
				+					do
			
 
				+					{
			
 
				+#ifdef DEBUG_CONVEX_HULL
			
 
				+						b3Printf("   Face *%d\n", edges[f->copy].getTargetVertex());
			
 
				+#endif
			
 
				+						f->copy = -1;
			
 
				+						f = f->reverse->prev;
			
 
				+					} while (f != e);
			
 
				+				}
			
 
				+				e = e->next;
			
 
				+			} while (e != firstEdge);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return shift;
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/b3ConvexHullComputer.h
+++ b/Dependencies/include/bullet3/Bullet3Geometry/b3ConvexHullComputer.h
@@ -0,0 +1,99 @@
 
				+/*
			
 
				+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONVEX_HULL_COMPUTER_H
			
 
				+#define B3_CONVEX_HULL_COMPUTER_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+/// Convex hull implementation based on Preparata and Hong
			
 
				+/// See http://code.google.com/p/bullet/issues/detail?id=275
			
 
				+/// Ole Kniemeyer, MAXON Computer GmbH
			
 
				+class b3ConvexHullComputer
			
 
				+{
			
 
				+private:
			
 
				+	b3Scalar compute(const void* coords, bool doubleCoords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp);
			
 
				+
			
 
				+public:
			
 
				+	class Edge
			
 
				+	{
			
 
				+	private:
			
 
				+		int next;
			
 
				+		int reverse;
			
 
				+		int targetVertex;
			
 
				+
			
 
				+		friend class b3ConvexHullComputer;
			
 
				+
			
 
				+	public:
			
 
				+		int getSourceVertex() const
			
 
				+		{
			
 
				+			return (this + reverse)->targetVertex;
			
 
				+		}
			
 
				+
			
 
				+		int getTargetVertex() const
			
 
				+		{
			
 
				+			return targetVertex;
			
 
				+		}
			
 
				+
			
 
				+		const Edge* getNextEdgeOfVertex() const  // clockwise list of all edges of a vertex
			
 
				+		{
			
 
				+			return this + next;
			
 
				+		}
			
 
				+
			
 
				+		const Edge* getNextEdgeOfFace() const  // counter-clockwise list of all edges of a face
			
 
				+		{
			
 
				+			return (this + reverse)->getNextEdgeOfVertex();
			
 
				+		}
			
 
				+
			
 
				+		const Edge* getReverseEdge() const
			
 
				+		{
			
 
				+			return this + reverse;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	// Vertices of the output hull
			
 
				+	b3AlignedObjectArray<b3Vector3> vertices;
			
 
				+
			
 
				+	// Edges of the output hull
			
 
				+	b3AlignedObjectArray<Edge> edges;
			
 
				+
			
 
				+	// Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons
			
 
				+	b3AlignedObjectArray<int> faces;
			
 
				+
			
 
				+	/*
			
 
				+		Compute convex hull of "count" vertices stored in "coords". "stride" is the difference in bytes
			
 
				+		between the addresses of consecutive vertices. If "shrink" is positive, the convex hull is shrunken
			
 
				+		by that amount (each face is moved by "shrink" length units towards the center along its normal).
			
 
				+		If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius"
			
 
				+		is the minimum distance of a face to the center of the convex hull.
			
 
				+
			
 
				+		The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large
			
 
				+		that the resulting convex hull is empty.
			
 
				+
			
 
				+		The output convex hull can be found in the member variables "vertices", "edges", "faces".
			
 
				+		*/
			
 
				+	b3Scalar compute(const float* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
			
 
				+	{
			
 
				+		return compute(coords, false, stride, count, shrink, shrinkClamp);
			
 
				+	}
			
 
				+
			
 
				+	// same as above, but double precision
			
 
				+	b3Scalar compute(const double* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
			
 
				+	{
			
 
				+		return compute(coords, true, stride, count, shrink, shrinkClamp);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_CONVEX_HULL_COMPUTER_H
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/b3GeometryUtil.cpp
+++ b/Dependencies/include/bullet3/Bullet3Geometry/b3GeometryUtil.cpp
@@ -0,0 +1,174 @@
 
				+/*
			
 
				+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#include "b3GeometryUtil.h"
			
 
				+
			
 
				+/*
			
 
				+  Make sure this dummy function never changes so that it
			
 
				+  can be used by probes that are checking whether the
			
 
				+  library is actually installed.
			
 
				+*/
			
 
				+extern "C"
			
 
				+{
			
 
				+	void b3BulletMathProbe();
			
 
				+
			
 
				+	void b3BulletMathProbe() {}
			
 
				+}
			
 
				+
			
 
				+bool b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar margin)
			
 
				+{
			
 
				+	int numbrushes = planeEquations.size();
			
 
				+	for (int i = 0; i < numbrushes; i++)
			
 
				+	{
			
 
				+		const b3Vector3& N1 = planeEquations[i];
			
 
				+		b3Scalar dist = b3Scalar(N1.dot(point)) + b3Scalar(N1[3]) - margin;
			
 
				+		if (dist > b3Scalar(0.))
			
 
				+		{
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+bool b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar margin)
			
 
				+{
			
 
				+	int numvertices = vertices.size();
			
 
				+	for (int i = 0; i < numvertices; i++)
			
 
				+	{
			
 
				+		const b3Vector3& N1 = vertices[i];
			
 
				+		b3Scalar dist = b3Scalar(planeNormal.dot(N1)) + b3Scalar(planeNormal[3]) - margin;
			
 
				+		if (dist > b3Scalar(0.))
			
 
				+		{
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+bool notExist(const b3Vector3& planeEquation, const b3AlignedObjectArray<b3Vector3>& planeEquations);
			
 
				+
			
 
				+bool notExist(const b3Vector3& planeEquation, const b3AlignedObjectArray<b3Vector3>& planeEquations)
			
 
				+{
			
 
				+	int numbrushes = planeEquations.size();
			
 
				+	for (int i = 0; i < numbrushes; i++)
			
 
				+	{
			
 
				+		const b3Vector3& N1 = planeEquations[i];
			
 
				+		if (planeEquation.dot(N1) > b3Scalar(0.999))
			
 
				+		{
			
 
				+			return false;
			
 
				+		}
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+void b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut)
			
 
				+{
			
 
				+	const int numvertices = vertices.size();
			
 
				+	// brute force:
			
 
				+	for (int i = 0; i < numvertices; i++)
			
 
				+	{
			
 
				+		const b3Vector3& N1 = vertices[i];
			
 
				+
			
 
				+		for (int j = i + 1; j < numvertices; j++)
			
 
				+		{
			
 
				+			const b3Vector3& N2 = vertices[j];
			
 
				+
			
 
				+			for (int k = j + 1; k < numvertices; k++)
			
 
				+			{
			
 
				+				const b3Vector3& N3 = vertices[k];
			
 
				+
			
 
				+				b3Vector3 planeEquation, edge0, edge1;
			
 
				+				edge0 = N2 - N1;
			
 
				+				edge1 = N3 - N1;
			
 
				+				b3Scalar normalSign = b3Scalar(1.);
			
 
				+				for (int ww = 0; ww < 2; ww++)
			
 
				+				{
			
 
				+					planeEquation = normalSign * edge0.cross(edge1);
			
 
				+					if (planeEquation.length2() > b3Scalar(0.0001))
			
 
				+					{
			
 
				+						planeEquation.normalize();
			
 
				+						if (notExist(planeEquation, planeEquationsOut))
			
 
				+						{
			
 
				+							planeEquation[3] = -planeEquation.dot(N1);
			
 
				+
			
 
				+							//check if inside, and replace supportingVertexOut if needed
			
 
				+							if (areVerticesBehindPlane(planeEquation, vertices, b3Scalar(0.01)))
			
 
				+							{
			
 
				+								planeEquationsOut.push_back(planeEquation);
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+					normalSign = b3Scalar(-1.);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations, b3AlignedObjectArray<b3Vector3>& verticesOut)
			
 
				+{
			
 
				+	const int numbrushes = planeEquations.size();
			
 
				+	// brute force:
			
 
				+	for (int i = 0; i < numbrushes; i++)
			
 
				+	{
			
 
				+		const b3Vector3& N1 = planeEquations[i];
			
 
				+
			
 
				+		for (int j = i + 1; j < numbrushes; j++)
			
 
				+		{
			
 
				+			const b3Vector3& N2 = planeEquations[j];
			
 
				+
			
 
				+			for (int k = j + 1; k < numbrushes; k++)
			
 
				+			{
			
 
				+				const b3Vector3& N3 = planeEquations[k];
			
 
				+
			
 
				+				b3Vector3 n2n3;
			
 
				+				n2n3 = N2.cross(N3);
			
 
				+				b3Vector3 n3n1;
			
 
				+				n3n1 = N3.cross(N1);
			
 
				+				b3Vector3 n1n2;
			
 
				+				n1n2 = N1.cross(N2);
			
 
				+
			
 
				+				if ((n2n3.length2() > b3Scalar(0.0001)) &&
			
 
				+					(n3n1.length2() > b3Scalar(0.0001)) &&
			
 
				+					(n1n2.length2() > b3Scalar(0.0001)))
			
 
				+				{
			
 
				+					//point P out of 3 plane equations:
			
 
				+
			
 
				+					//	d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 )
			
 
				+					//P =  -------------------------------------------------------------------------
			
 
				+					//   N1 . ( N2 * N3 )
			
 
				+
			
 
				+					b3Scalar quotient = (N1.dot(n2n3));
			
 
				+					if (b3Fabs(quotient) > b3Scalar(0.000001))
			
 
				+					{
			
 
				+						quotient = b3Scalar(-1.) / quotient;
			
 
				+						n2n3 *= N1[3];
			
 
				+						n3n1 *= N2[3];
			
 
				+						n1n2 *= N3[3];
			
 
				+						b3Vector3 potentialVertex = n2n3;
			
 
				+						potentialVertex += n3n1;
			
 
				+						potentialVertex += n1n2;
			
 
				+						potentialVertex *= quotient;
			
 
				+
			
 
				+						//check if inside, and replace supportingVertexOut if needed
			
 
				+						if (isPointInsidePlanes(planeEquations, potentialVertex, b3Scalar(0.01)))
			
 
				+						{
			
 
				+							verticesOut.push_back(potentialVertex);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/b3GeometryUtil.h
+++ b/Dependencies/include/bullet3/Bullet3Geometry/b3GeometryUtil.h
@@ -0,0 +1,36 @@
 
				+/*
			
 
				+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  https://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_GEOMETRY_UTIL_H
			
 
				+#define B3_GEOMETRY_UTIL_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+///The b3GeometryUtil helper class provides a few methods to convert between plane equations and vertices.
			
 
				+class b3GeometryUtil
			
 
				+{
			
 
				+public:
			
 
				+	static void getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut);
			
 
				+
			
 
				+	static void getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations, b3AlignedObjectArray<b3Vector3>& verticesOut);
			
 
				+
			
 
				+	static bool isInside(const b3AlignedObjectArray<b3Vector3>& vertices, const b3Vector3& planeNormal, b3Scalar margin);
			
 
				+
			
 
				+	static bool isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar margin);
			
 
				+
			
 
				+	static bool areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar margin);
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_GEOMETRY_UTIL_H
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/b3GrahamScan2dConvexHull.h
+++ b/Dependencies/include/bullet3/Bullet3Geometry/b3GrahamScan2dConvexHull.h
@@ -0,0 +1,116 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
			
 
				+#define B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+struct b3GrahamVector3 : public b3Vector3
			
 
				+{
			
 
				+	b3GrahamVector3(const b3Vector3& org, int orgIndex)
			
 
				+		: b3Vector3(org),
			
 
				+		  m_orgIndex(orgIndex)
			
 
				+	{
			
 
				+	}
			
 
				+	b3Scalar m_angle;
			
 
				+	int m_orgIndex;
			
 
				+};
			
 
				+
			
 
				+struct b3AngleCompareFunc
			
 
				+{
			
 
				+	b3Vector3 m_anchor;
			
 
				+	b3AngleCompareFunc(const b3Vector3& anchor)
			
 
				+		: m_anchor(anchor)
			
 
				+	{
			
 
				+	}
			
 
				+	bool operator()(const b3GrahamVector3& a, const b3GrahamVector3& b) const
			
 
				+	{
			
 
				+		if (a.m_angle != b.m_angle)
			
 
				+			return a.m_angle < b.m_angle;
			
 
				+		else
			
 
				+		{
			
 
				+			b3Scalar al = (a - m_anchor).length2();
			
 
				+			b3Scalar bl = (b - m_anchor).length2();
			
 
				+			if (al != bl)
			
 
				+				return al < bl;
			
 
				+			else
			
 
				+			{
			
 
				+				return a.m_orgIndex < b.m_orgIndex;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+inline void b3GrahamScanConvexHull2D(b3AlignedObjectArray<b3GrahamVector3>& originalPoints, b3AlignedObjectArray<b3GrahamVector3>& hull, const b3Vector3& normalAxis)
			
 
				+{
			
 
				+	b3Vector3 axis0, axis1;
			
 
				+	b3PlaneSpace1(normalAxis, axis0, axis1);
			
 
				+
			
 
				+	if (originalPoints.size() <= 1)
			
 
				+	{
			
 
				+		for (int i = 0; i < originalPoints.size(); i++)
			
 
				+			hull.push_back(originalPoints[0]);
			
 
				+		return;
			
 
				+	}
			
 
				+	//step1 : find anchor point with smallest projection on axis0 and move it to first location
			
 
				+	for (int i = 0; i < originalPoints.size(); i++)
			
 
				+	{
			
 
				+		//		const b3Vector3& left = originalPoints[i];
			
 
				+		//		const b3Vector3& right = originalPoints[0];
			
 
				+		b3Scalar projL = originalPoints[i].dot(axis0);
			
 
				+		b3Scalar projR = originalPoints[0].dot(axis0);
			
 
				+		if (projL < projR)
			
 
				+		{
			
 
				+			originalPoints.swap(0, i);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//also precompute angles
			
 
				+	originalPoints[0].m_angle = -1e30f;
			
 
				+	for (int i = 1; i < originalPoints.size(); i++)
			
 
				+	{
			
 
				+		b3Vector3 xvec = axis0;
			
 
				+		b3Vector3 ar = originalPoints[i] - originalPoints[0];
			
 
				+		originalPoints[i].m_angle = b3Cross(xvec, ar).dot(normalAxis) / ar.length();
			
 
				+	}
			
 
				+
			
 
				+	//step 2: sort all points, based on 'angle' with this anchor
			
 
				+	b3AngleCompareFunc comp(originalPoints[0]);
			
 
				+	originalPoints.quickSortInternal(comp, 1, originalPoints.size() - 1);
			
 
				+
			
 
				+	int i;
			
 
				+	for (i = 0; i < 2; i++)
			
 
				+		hull.push_back(originalPoints[i]);
			
 
				+
			
 
				+	//step 3: keep all 'convex' points and discard concave points (using back tracking)
			
 
				+	for (; i != originalPoints.size(); i++)
			
 
				+	{
			
 
				+		bool isConvex = false;
			
 
				+		while (!isConvex && hull.size() > 1)
			
 
				+		{
			
 
				+			b3Vector3& a = hull[hull.size() - 2];
			
 
				+			b3Vector3& b = hull[hull.size() - 1];
			
 
				+			isConvex = b3Cross(a - b, a - originalPoints[i]).dot(normalAxis) > 0;
			
 
				+			if (!isConvex)
			
 
				+				hull.pop_back();
			
 
				+			else
			
 
				+				hull.push_back(originalPoints[i]);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif  //B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
			
--- a/Dependencies/include/bullet3/Bullet3Geometry/premake4.lua
+++ b/Dependencies/include/bullet3/Bullet3Geometry/premake4.lua
@@ -0,0 +1,16 @@
 
				+	project "Bullet3Geometry"
			
 
				+
			
 
				+	language "C++"
			
 
				+				
			
 
				+	kind "StaticLib"
			
 
				+		
			
 
				+	includedirs {".."}
			
 
				+	
			
 
				+    if os.is("Linux") then
			
 
				+        buildoptions{"-fPIC"}
			
 
				+    end
			
 
				+
			
 
				+	files {
			
 
				+		"**.cpp",
			
 
				+		"**.h"
			
 
				+	}
			
--- a/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h
+++ b/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h
@@ -0,0 +1,42 @@
 
				+
			
 
				+#ifndef B3_GPU_BROADPHASE_INTERFACE_H
			
 
				+#define B3_GPU_BROADPHASE_INTERFACE_H
			
 
				+
			
 
				+#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "b3SapAabb.h"
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
			
 
				+
			
 
				+class b3GpuBroadphaseInterface
			
 
				+{
			
 
				+public:
			
 
				+	typedef class b3GpuBroadphaseInterface*(CreateFunc)(cl_context ctx, cl_device_id device, cl_command_queue q);
			
 
				+
			
 
				+	virtual ~b3GpuBroadphaseInterface()
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) = 0;
			
 
				+	virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) = 0;
			
 
				+
			
 
				+	virtual void calculateOverlappingPairs(int maxPairs) = 0;
			
 
				+	virtual void calculateOverlappingPairsHost(int maxPairs) = 0;
			
 
				+
			
 
				+	//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+	virtual void writeAabbsToGpu() = 0;
			
 
				+
			
 
				+	virtual cl_mem getAabbBufferWS() = 0;
			
 
				+	virtual int getNumOverlap() = 0;
			
 
				+	virtual cl_mem getOverlappingPairBuffer() = 0;
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() = 0;
			
 
				+	virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() = 0;
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() = 0;
			
 
				+	virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() = 0;
			
 
				+	virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() = 0;
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_GPU_BROADPHASE_INTERFACE_H
			
--- a/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp
+++ b/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp
@@ -0,0 +1,338 @@
 
				+
			
 
				+#include "b3GpuGridBroadphase.h"
			
 
				+#include "Bullet3Geometry/b3AabbUtil.h"
			
 
				+#include "kernels/gridBroadphaseKernels.h"
			
 
				+#include "kernels/sapKernels.h"
			
 
				+//#include "kernels/gridBroadphase.cl"
			
 
				+
			
 
				+#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
			
 
				+
			
 
				+#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl"
			
 
				+#define B3_GRID_BROADPHASE_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl"
			
 
				+
			
 
				+cl_kernel kCalcHashAABB;
			
 
				+cl_kernel kClearCellStart;
			
 
				+cl_kernel kFindCellStart;
			
 
				+cl_kernel kFindOverlappingPairs;
			
 
				+cl_kernel m_copyAabbsKernel;
			
 
				+cl_kernel m_sap2Kernel;
			
 
				+
			
 
				+//int maxPairsPerBody = 64;
			
 
				+int maxBodiesPerCell = 256;  //??
			
 
				+
			
 
				+b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q)
			
 
				+	: m_context(ctx),
			
 
				+	  m_device(device),
			
 
				+	  m_queue(q),
			
 
				+	  m_allAabbsGPU1(ctx, q),
			
 
				+	  m_smallAabbsMappingGPU(ctx, q),
			
 
				+	  m_largeAabbsMappingGPU(ctx, q),
			
 
				+	  m_gpuPairs(ctx, q),
			
 
				+
			
 
				+	  m_hashGpu(ctx, q),
			
 
				+
			
 
				+	  m_cellStartGpu(ctx, q),
			
 
				+	  m_paramsGPU(ctx, q)
			
 
				+{
			
 
				+	b3Vector3 gridSize = b3MakeVector3(3, 3, 3);
			
 
				+	b3Vector3 invGridSize = b3MakeVector3(1.f / gridSize[0], 1.f / gridSize[1], 1.f / gridSize[2]);
			
 
				+
			
 
				+	m_paramsCPU.m_gridSize[0] = 128;
			
 
				+	m_paramsCPU.m_gridSize[1] = 128;
			
 
				+	m_paramsCPU.m_gridSize[2] = 128;
			
 
				+	m_paramsCPU.m_gridSize[3] = maxBodiesPerCell;
			
 
				+	m_paramsCPU.setMaxBodiesPerCell(maxBodiesPerCell);
			
 
				+	m_paramsCPU.m_invCellSize[0] = invGridSize[0];
			
 
				+	m_paramsCPU.m_invCellSize[1] = invGridSize[1];
			
 
				+	m_paramsCPU.m_invCellSize[2] = invGridSize[2];
			
 
				+	m_paramsCPU.m_invCellSize[3] = 0.f;
			
 
				+	m_paramsGPU.push_back(m_paramsCPU);
			
 
				+
			
 
				+	cl_int errNum = 0;
			
 
				+
			
 
				+	{
			
 
				+		const char* sapSrc = sapCL;
			
 
				+		cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+		m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg);
			
 
				+		m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		cl_program gridProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, gridBroadphaseCL, &errNum, "", B3_GRID_BROADPHASE_PATH);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+
			
 
				+		kCalcHashAABB = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kCalcHashAABB", &errNum, gridProg);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+
			
 
				+		kClearCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kClearCellStart", &errNum, gridProg);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+
			
 
				+		kFindCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kFindCellStart", &errNum, gridProg);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+
			
 
				+		kFindOverlappingPairs = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kFindOverlappingPairs", &errNum, gridProg);
			
 
				+		b3Assert(errNum == CL_SUCCESS);
			
 
				+	}
			
 
				+
			
 
				+	m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue);
			
 
				+}
			
 
				+b3GpuGridBroadphase::~b3GpuGridBroadphase()
			
 
				+{
			
 
				+	clReleaseKernel(kCalcHashAABB);
			
 
				+	clReleaseKernel(kClearCellStart);
			
 
				+	clReleaseKernel(kFindCellStart);
			
 
				+	clReleaseKernel(kFindOverlappingPairs);
			
 
				+	clReleaseKernel(m_sap2Kernel);
			
 
				+	clReleaseKernel(m_copyAabbsKernel);
			
 
				+
			
 
				+	delete m_sorter;
			
 
				+}
			
 
				+
			
 
				+void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask)
			
 
				+{
			
 
				+	b3SapAabb aabb;
			
 
				+	aabb.m_minVec = aabbMin;
			
 
				+	aabb.m_maxVec = aabbMax;
			
 
				+	aabb.m_minIndices[3] = userPtr;
			
 
				+	aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();  //NOT userPtr;
			
 
				+	m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size());
			
 
				+
			
 
				+	m_allAabbsCPU1.push_back(aabb);
			
 
				+}
			
 
				+void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask)
			
 
				+{
			
 
				+	b3SapAabb aabb;
			
 
				+	aabb.m_minVec = aabbMin;
			
 
				+	aabb.m_maxVec = aabbMax;
			
 
				+	aabb.m_minIndices[3] = userPtr;
			
 
				+	aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();  //NOT userPtr;
			
 
				+	m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size());
			
 
				+
			
 
				+	m_allAabbsCPU1.push_back(aabb);
			
 
				+}
			
 
				+
			
 
				+void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
			
 
				+{
			
 
				+	B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs");
			
 
				+
			
 
				+	if (0)
			
 
				+	{
			
 
				+		calculateOverlappingPairsHost(maxPairs);
			
 
				+		/*
			
 
				+		b3AlignedObjectArray<b3Int4> cpuPairs;
			
 
				+		m_gpuPairs.copyToHost(cpuPairs);
			
 
				+		printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size());
			
 
				+		for (int i=0;i<m_gpuPairs.size();i++)
			
 
				+		{
			
 
				+			printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y);
			
 
				+		}
			
 
				+		*/
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	int numSmallAabbs = m_smallAabbsMappingGPU.size();
			
 
				+
			
 
				+	b3OpenCLArray<int> pairCount(m_context, m_queue);
			
 
				+	pairCount.push_back(0);
			
 
				+	m_gpuPairs.resize(maxPairs);  //numSmallAabbs*maxPairsPerBody);
			
 
				+
			
 
				+	{
			
 
				+		int numLargeAabbs = m_largeAabbsMappingGPU.size();
			
 
				+		if (numLargeAabbs && numSmallAabbs)
			
 
				+		{
			
 
				+			B3_PROFILE("sap2Kernel");
			
 
				+			b3BufferInfoCL bInfo[] = {
			
 
				+				b3BufferInfoCL(m_allAabbsGPU1.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_gpuPairs.getBufferCL()),
			
 
				+				b3BufferInfoCL(pairCount.getBufferCL())};
			
 
				+			b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel");
			
 
				+			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
			
 
				+			launcher.setConst(numLargeAabbs);
			
 
				+			launcher.setConst(numSmallAabbs);
			
 
				+			launcher.setConst(0);  //axis is not used
			
 
				+			launcher.setConst(maxPairs);
			
 
				+			//@todo: use actual maximum work item sizes of the device instead of hardcoded values
			
 
				+			launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64);
			
 
				+
			
 
				+			int numPairs = pairCount.at(0);
			
 
				+
			
 
				+			if (numPairs > maxPairs)
			
 
				+			{
			
 
				+				b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
			
 
				+				numPairs = maxPairs;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (numSmallAabbs)
			
 
				+	{
			
 
				+		B3_PROFILE("gridKernel");
			
 
				+		m_hashGpu.resize(numSmallAabbs);
			
 
				+		{
			
 
				+			B3_PROFILE("kCalcHashAABB");
			
 
				+			b3LauncherCL launch(m_queue, kCalcHashAABB, "kCalcHashAABB");
			
 
				+			launch.setConst(numSmallAabbs);
			
 
				+			launch.setBuffer(m_allAabbsGPU1.getBufferCL());
			
 
				+			launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
			
 
				+			launch.setBuffer(m_hashGpu.getBufferCL());
			
 
				+			launch.setBuffer(this->m_paramsGPU.getBufferCL());
			
 
				+			launch.launch1D(numSmallAabbs);
			
 
				+		}
			
 
				+
			
 
				+		m_sorter->execute(m_hashGpu);
			
 
				+
			
 
				+		int numCells = this->m_paramsCPU.m_gridSize[0] * this->m_paramsCPU.m_gridSize[1] * this->m_paramsCPU.m_gridSize[2];
			
 
				+		m_cellStartGpu.resize(numCells);
			
 
				+		//b3AlignedObjectArray<int >			cellStartCpu;
			
 
				+
			
 
				+		{
			
 
				+			B3_PROFILE("kClearCellStart");
			
 
				+			b3LauncherCL launch(m_queue, kClearCellStart, "kClearCellStart");
			
 
				+			launch.setConst(numCells);
			
 
				+			launch.setBuffer(m_cellStartGpu.getBufferCL());
			
 
				+			launch.launch1D(numCells);
			
 
				+			//m_cellStartGpu.copyToHost(cellStartCpu);
			
 
				+			//printf("??\n");
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			B3_PROFILE("kFindCellStart");
			
 
				+			b3LauncherCL launch(m_queue, kFindCellStart, "kFindCellStart");
			
 
				+			launch.setConst(numSmallAabbs);
			
 
				+			launch.setBuffer(m_hashGpu.getBufferCL());
			
 
				+			launch.setBuffer(m_cellStartGpu.getBufferCL());
			
 
				+			launch.launch1D(numSmallAabbs);
			
 
				+			//m_cellStartGpu.copyToHost(cellStartCpu);
			
 
				+			//printf("??\n");
			
 
				+		}
			
 
				+
			
 
				+		{
			
 
				+			B3_PROFILE("kFindOverlappingPairs");
			
 
				+
			
 
				+			b3LauncherCL launch(m_queue, kFindOverlappingPairs, "kFindOverlappingPairs");
			
 
				+			launch.setConst(numSmallAabbs);
			
 
				+			launch.setBuffer(m_allAabbsGPU1.getBufferCL());
			
 
				+			launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
			
 
				+			launch.setBuffer(m_hashGpu.getBufferCL());
			
 
				+			launch.setBuffer(m_cellStartGpu.getBufferCL());
			
 
				+
			
 
				+			launch.setBuffer(m_paramsGPU.getBufferCL());
			
 
				+			//launch.setBuffer(0);
			
 
				+			launch.setBuffer(pairCount.getBufferCL());
			
 
				+			launch.setBuffer(m_gpuPairs.getBufferCL());
			
 
				+
			
 
				+			launch.setConst(maxPairs);
			
 
				+			launch.launch1D(numSmallAabbs);
			
 
				+
			
 
				+			int numPairs = pairCount.at(0);
			
 
				+			if (numPairs > maxPairs)
			
 
				+			{
			
 
				+				b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
			
 
				+				numPairs = maxPairs;
			
 
				+			}
			
 
				+
			
 
				+			m_gpuPairs.resize(numPairs);
			
 
				+
			
 
				+			if (0)
			
 
				+			{
			
 
				+				b3AlignedObjectArray<b3Int4> pairsCpu;
			
 
				+				m_gpuPairs.copyToHost(pairsCpu);
			
 
				+
			
 
				+				int sz = m_gpuPairs.size();
			
 
				+				printf("m_gpuPairs.size()=%d\n", sz);
			
 
				+				for (int i = 0; i < m_gpuPairs.size(); i++)
			
 
				+				{
			
 
				+					printf("pair %d = %d,%d\n", i, pairsCpu[i].x, pairsCpu[i].y);
			
 
				+				}
			
 
				+
			
 
				+				printf("?!?\n");
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//calculateOverlappingPairsHost(maxPairs);
			
 
				+}
			
 
				+void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
			
 
				+{
			
 
				+	m_hostPairs.resize(0);
			
 
				+	m_allAabbsGPU1.copyToHost(m_allAabbsCPU1);
			
 
				+	for (int i = 0; i < m_allAabbsCPU1.size(); i++)
			
 
				+	{
			
 
				+		for (int j = i + 1; j < m_allAabbsCPU1.size(); j++)
			
 
				+		{
			
 
				+			if (b3TestAabbAgainstAabb2(m_allAabbsCPU1[i].m_minVec, m_allAabbsCPU1[i].m_maxVec,
			
 
				+									   m_allAabbsCPU1[j].m_minVec, m_allAabbsCPU1[j].m_maxVec))
			
 
				+			{
			
 
				+				b3Int4 pair;
			
 
				+				int a = m_allAabbsCPU1[j].m_minIndices[3];
			
 
				+				int b = m_allAabbsCPU1[i].m_minIndices[3];
			
 
				+				if (a <= b)
			
 
				+				{
			
 
				+					pair.x = a;
			
 
				+					pair.y = b;  //store the original index in the unsorted aabb array
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					pair.x = b;
			
 
				+					pair.y = a;  //store the original index in the unsorted aabb array
			
 
				+				}
			
 
				+
			
 
				+				if (m_hostPairs.size() < maxPairs)
			
 
				+				{
			
 
				+					m_hostPairs.push_back(pair);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	m_gpuPairs.copyFromHost(m_hostPairs);
			
 
				+}
			
 
				+
			
 
				+//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+void b3GpuGridBroadphase::writeAabbsToGpu()
			
 
				+{
			
 
				+	m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1);
			
 
				+	m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU);
			
 
				+	m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU);
			
 
				+}
			
 
				+
			
 
				+cl_mem b3GpuGridBroadphase::getAabbBufferWS()
			
 
				+{
			
 
				+	return this->m_allAabbsGPU1.getBufferCL();
			
 
				+}
			
 
				+int b3GpuGridBroadphase::getNumOverlap()
			
 
				+{
			
 
				+	return m_gpuPairs.size();
			
 
				+}
			
 
				+cl_mem b3GpuGridBroadphase::getOverlappingPairBuffer()
			
 
				+{
			
 
				+	return m_gpuPairs.getBufferCL();
			
 
				+}
			
 
				+
			
 
				+b3OpenCLArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsGPU()
			
 
				+{
			
 
				+	return m_allAabbsGPU1;
			
 
				+}
			
 
				+
			
 
				+b3AlignedObjectArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsCPU()
			
 
				+{
			
 
				+	return m_allAabbsCPU1;
			
 
				+}
			
 
				+
			
 
				+b3OpenCLArray<b3Int4>& b3GpuGridBroadphase::getOverlappingPairsGPU()
			
 
				+{
			
 
				+	return m_gpuPairs;
			
 
				+}
			
 
				+b3OpenCLArray<int>& b3GpuGridBroadphase::getSmallAabbIndicesGPU()
			
 
				+{
			
 
				+	return m_smallAabbsMappingGPU;
			
 
				+}
			
 
				+b3OpenCLArray<int>& b3GpuGridBroadphase::getLargeAabbIndicesGPU()
			
 
				+{
			
 
				+	return m_largeAabbsMappingGPU;
			
 
				+}
			
--- a/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h
+++ b/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h
@@ -0,0 +1,80 @@
 
				+#ifndef B3_GPU_GRID_BROADPHASE_H
			
 
				+#define B3_GPU_GRID_BROADPHASE_H
			
 
				+
			
 
				+#include "b3GpuBroadphaseInterface.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
			
 
				+
			
 
				+struct b3ParamsGridBroadphaseCL
			
 
				+{
			
 
				+	float m_invCellSize[4];
			
 
				+	int m_gridSize[4];
			
 
				+
			
 
				+	int getMaxBodiesPerCell() const
			
 
				+	{
			
 
				+		return m_gridSize[3];
			
 
				+	}
			
 
				+
			
 
				+	void setMaxBodiesPerCell(int maxOverlap)
			
 
				+	{
			
 
				+		m_gridSize[3] = maxOverlap;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+class b3GpuGridBroadphase : public b3GpuBroadphaseInterface
			
 
				+{
			
 
				+protected:
			
 
				+	cl_context m_context;
			
 
				+	cl_device_id m_device;
			
 
				+	cl_command_queue m_queue;
			
 
				+
			
 
				+	b3OpenCLArray<b3SapAabb> m_allAabbsGPU1;
			
 
				+	b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU1;
			
 
				+
			
 
				+	b3OpenCLArray<int> m_smallAabbsMappingGPU;
			
 
				+	b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
			
 
				+
			
 
				+	b3OpenCLArray<int> m_largeAabbsMappingGPU;
			
 
				+	b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Int4> m_hostPairs;
			
 
				+	b3OpenCLArray<b3Int4> m_gpuPairs;
			
 
				+
			
 
				+	b3OpenCLArray<b3SortData> m_hashGpu;
			
 
				+	b3OpenCLArray<int> m_cellStartGpu;
			
 
				+
			
 
				+	b3ParamsGridBroadphaseCL m_paramsCPU;
			
 
				+	b3OpenCLArray<b3ParamsGridBroadphaseCL> m_paramsGPU;
			
 
				+
			
 
				+	class b3RadixSort32CL* m_sorter;
			
 
				+
			
 
				+public:
			
 
				+	b3GpuGridBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q);
			
 
				+	virtual ~b3GpuGridBroadphase();
			
 
				+
			
 
				+	static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx, cl_device_id device, cl_command_queue q)
			
 
				+	{
			
 
				+		return new b3GpuGridBroadphase(ctx, device, q);
			
 
				+	}
			
 
				+
			
 
				+	virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask);
			
 
				+	virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask);
			
 
				+
			
 
				+	virtual void calculateOverlappingPairs(int maxPairs);
			
 
				+	virtual void calculateOverlappingPairsHost(int maxPairs);
			
 
				+
			
 
				+	//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+	virtual void writeAabbsToGpu();
			
 
				+
			
 
				+	virtual cl_mem getAabbBufferWS();
			
 
				+	virtual int getNumOverlap();
			
 
				+	virtual cl_mem getOverlappingPairBuffer();
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU();
			
 
				+	virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU();
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
			
 
				+	virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
			
 
				+	virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
			
 
				+};
			
 
				+
			
 
				+#endif  //B3_GPU_GRID_BROADPHASE_H
			
--- a/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp
+++ b/Dependencies/include/bullet3/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp
@@ -0,0 +1,557 @@
 
				+/*
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Initial Author Jackson Lee, 2014
			
 
				+
			
 
				+#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
			
 
				+
			
 
				+#include "b3GpuParallelLinearBvh.h"
			
 
				+
			
 
				+b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) : m_queue(queue),
			
 
				+																												  m_radixSorter(context, device, queue),
			
 
				+
			
 
				+																												  m_rootNodeIndex(context, queue),
			
 
				+																												  m_maxDistanceFromRoot(context, queue),
			
 
				+																												  m_temp(context, queue),
			
 
				+
			
 
				+																												  m_internalNodeAabbs(context, queue),
			
 
				+																												  m_internalNodeLeafIndexRanges(context, queue),
			
 
				+																												  m_internalNodeChildNodes(context, queue),
			
 
				+																												  m_internalNodeParentNodes(context, queue),
			
 
				+
			
 
				+																												  m_commonPrefixes(context, queue),
			
 
				+																												  m_commonPrefixLengths(context, queue),
			
 
				+																												  m_distanceFromRoot(context, queue),
			
 
				+
			
 
				+																												  m_leafNodeParentNodes(context, queue),
			
 
				+																												  m_mortonCodesAndAabbIndicies(context, queue),
			
 
				+																												  m_mergedAabb(context, queue),
			
 
				+																												  m_leafNodeAabbs(context, queue),
			
 
				+
			
 
				+																												  m_largeAabbs(context, queue)
			
 
				+{
			
 
				+	m_rootNodeIndex.resize(1);
			
 
				+	m_maxDistanceFromRoot.resize(1);
			
 
				+	m_temp.resize(1);
			
 
				+
			
 
				+	//
			
 
				+	const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
			
 
				+
			
 
				+	const char* kernelSource = parallelLinearBvhCL;  //parallelLinearBvhCL.h
			
 
				+	cl_int error;
			
 
				+	char* additionalMacros = 0;
			
 
				+	m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
			
 
				+	b3Assert(m_parallelLinearBvhProgram);
			
 
				+
			
 
				+	m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_separateAabbsKernel);
			
 
				+	m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_findAllNodesMergedAabbKernel);
			
 
				+	m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_assignMortonCodesAndAabbIndiciesKernel);
			
 
				+
			
 
				+	m_computeAdjacentPairCommonPrefixKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "computeAdjacentPairCommonPrefix", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_computeAdjacentPairCommonPrefixKernel);
			
 
				+	m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_buildBinaryRadixTreeLeafNodesKernel);
			
 
				+	m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_buildBinaryRadixTreeInternalNodesKernel);
			
 
				+	m_findDistanceFromRootKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findDistanceFromRoot", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_findDistanceFromRootKernel);
			
 
				+	m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel);
			
 
				+
			
 
				+	m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_findLeafIndexRangesKernel);
			
 
				+
			
 
				+	m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_plbvhCalculateOverlappingPairsKernel);
			
 
				+	m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_plbvhRayTraverseKernel);
			
 
				+	m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_plbvhLargeAabbAabbTestKernel);
			
 
				+	m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros);
			
 
				+	b3Assert(m_plbvhLargeAabbRayTestKernel);
			
 
				+}
			
 
				+
			
 
				+b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
			
 
				+{
			
 
				+	clReleaseKernel(m_separateAabbsKernel);
			
 
				+	clReleaseKernel(m_findAllNodesMergedAabbKernel);
			
 
				+	clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
			
 
				+
			
 
				+	clReleaseKernel(m_computeAdjacentPairCommonPrefixKernel);
			
 
				+	clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel);
			
 
				+	clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel);
			
 
				+	clReleaseKernel(m_findDistanceFromRootKernel);
			
 
				+	clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel);
			
 
				+
			
 
				+	clReleaseKernel(m_findLeafIndexRangesKernel);
			
 
				+
			
 
				+	clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
			
 
				+	clReleaseKernel(m_plbvhRayTraverseKernel);
			
 
				+	clReleaseKernel(m_plbvhLargeAabbAabbTestKernel);
			
 
				+	clReleaseKernel(m_plbvhLargeAabbRayTestKernel);
			
 
				+
			
 
				+	clReleaseProgram(m_parallelLinearBvhProgram);
			
 
				+}
			
 
				+
			
 
				+void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
			
 
				+								   const b3OpenCLArray<int>& largeAabbIndices)
			
 
				+{
			
 
				+	B3_PROFILE("b3ParallelLinearBvh::build()");
			
 
				+
			
 
				+	int numLargeAabbs = largeAabbIndices.size();
			
 
				+	int numSmallAabbs = smallAabbIndices.size();
			
 
				+
			
 
				+	//Since all AABBs(both large and small) are input as a contiguous array,
			
 
				+	//with 2 additional arrays used to indicate the indices of large and small AABBs,
			
 
				+	//it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH.
			
 
				+	{
			
 
				+		B3_PROFILE("Separate large and small AABBs");
			
 
				+
			
 
				+		m_largeAabbs.resize(numLargeAabbs);
			
 
				+		m_leafNodeAabbs.resize(numSmallAabbs);
			
 
				+
			
 
				+		//Write large AABBs into m_largeAabbs
			
 
				+		{
			
 
				+			b3BufferInfoCL bufferInfo[] =
			
 
				+				{
			
 
				+					b3BufferInfoCL(worldSpaceAabbs.getBufferCL()),
			
 
				+					b3BufferInfoCL(largeAabbIndices.getBufferCL()),
			
 
				+
			
 
				+					b3BufferInfoCL(m_largeAabbs.getBufferCL())};
			
 
				+
			
 
				+			b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel");
			
 
				+			launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+			launcher.setConst(numLargeAabbs);
			
 
				+
			
 
				+			launcher.launch1D(numLargeAabbs);
			
 
				+		}
			
 
				+
			
 
				+		//Write small AABBs into m_leafNodeAabbs
			
 
				+		{
			
 
				+			b3BufferInfoCL bufferInfo[] =
			
 
				+				{
			
 
				+					b3BufferInfoCL(worldSpaceAabbs.getBufferCL()),
			
 
				+					b3BufferInfoCL(smallAabbIndices.getBufferCL()),
			
 
				+
			
 
				+					b3BufferInfoCL(m_leafNodeAabbs.getBufferCL())};
			
 
				+
			
 
				+			b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel");
			
 
				+			launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+			launcher.setConst(numSmallAabbs);
			
 
				+
			
 
				+			launcher.launch1D(numSmallAabbs);
			
 
				+		}
			
 
				+
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//
			
 
				+	int numLeaves = numSmallAabbs;  //Number of leaves in the BVH == Number of rigid bodies with small AABBs
			
 
				+	int numInternalNodes = numLeaves - 1;
			
 
				+
			
 
				+	if (numLeaves < 2)
			
 
				+	{
			
 
				+		//Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(),
			
 
				+		//so it does not matter if numLeaves == 0 and rootNodeIndex == -1
			
 
				+		int rootNodeIndex = numLeaves - 1;
			
 
				+		m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1);
			
 
				+
			
 
				+		//Since the AABBs need to be rearranged(sorted) for the BVH construction algorithm,
			
 
				+		//m_mortonCodesAndAabbIndicies.m_value is used to map a sorted AABB index to the unsorted AABB index
			
 
				+		//instead of directly moving the AABBs. It needs to be set for the ray cast traversal kernel to work.
			
 
				+		//( m_mortonCodesAndAabbIndicies[].m_value == unsorted index == index of m_leafNodeAabbs )
			
 
				+		if (numLeaves == 1)
			
 
				+		{
			
 
				+			b3SortData leaf;
			
 
				+			leaf.m_value = 0;  //1 leaf so index is always 0; leaf.m_key does not need to be set
			
 
				+
			
 
				+			m_mortonCodesAndAabbIndicies.resize(1);
			
 
				+			m_mortonCodesAndAabbIndicies.copyFromHostPointer(&leaf, 1);
			
 
				+		}
			
 
				+
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	//
			
 
				+	{
			
 
				+		m_internalNodeAabbs.resize(numInternalNodes);
			
 
				+		m_internalNodeLeafIndexRanges.resize(numInternalNodes);
			
 
				+		m_internalNodeChildNodes.resize(numInternalNodes);
			
 
				+		m_internalNodeParentNodes.resize(numInternalNodes);
			
 
				+
			
 
				+		m_commonPrefixes.resize(numInternalNodes);
			
 
				+		m_commonPrefixLengths.resize(numInternalNodes);
			
 
				+		m_distanceFromRoot.resize(numInternalNodes);
			
 
				+
			
 
				+		m_leafNodeParentNodes.resize(numLeaves);
			
 
				+		m_mortonCodesAndAabbIndicies.resize(numLeaves);
			
 
				+		m_mergedAabb.resize(numLeaves);
			
 
				+	}
			
 
				+
			
 
				+	//Find the merged AABB of all small AABBs; this is used to define the size of
			
 
				+	//each cell in the virtual grid for the next kernel(2^10 cells in each dimension).
			
 
				+	{
			
 
				+		B3_PROFILE("Find AABB of merged nodes");
			
 
				+
			
 
				+		m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs);  //Need to make a copy since the kernel modifies the array
			
 
				+
			
 
				+		for (int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2;
			
 
				+			 numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2)
			
 
				+		{
			
 
				+			b3BufferInfoCL bufferInfo[] =
			
 
				+				{
			
 
				+					b3BufferInfoCL(m_mergedAabb.getBufferCL())  //Resulting AABB is stored in m_mergedAabb[0]
			
 
				+				};
			
 
				+
			
 
				+			b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabbKernel, "m_findAllNodesMergedAabbKernel");
			
 
				+			launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+			launcher.setConst(numAabbsNeedingMerge);
			
 
				+
			
 
				+			launcher.launch1D(numAabbsNeedingMerge);
			
 
				+		}
			
 
				+
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//Insert the center of the AABBs into a virtual grid,
			
 
				+	//then convert the discrete grid coordinates into a morton code
			
 
				+	//For each element in m_mortonCodesAndAabbIndicies, set
			
 
				+	//	m_key == morton code (value to sort by)
			
 
				+	//	m_value == small AABB index
			
 
				+	{
			
 
				+		B3_PROFILE("Assign morton codes");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_mergedAabb.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numLeaves);
			
 
				+
			
 
				+		launcher.launch1D(numLeaves);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//
			
 
				+	{
			
 
				+		B3_PROFILE("Sort leaves by morton codes");
			
 
				+
			
 
				+		m_radixSorter.execute(m_mortonCodesAndAabbIndicies);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//
			
 
				+	constructBinaryRadixTree();
			
 
				+
			
 
				+	//Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices.
			
 
				+	//The root node contains leaf node indices in the range [0, numLeafNodes - 1].
			
 
				+	//The child nodes of each node split their parent's index range into 2 contiguous halves.
			
 
				+	//
			
 
				+	//For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31].
			
 
				+	//The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31].
			
 
				+	//
			
 
				+	//This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice
			
 
				+	{
			
 
				+		B3_PROFILE("m_findLeafIndexRangesKernel");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numInternalNodes);
			
 
				+
			
 
				+		launcher.launch1D(numInternalNodes);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs)
			
 
				+{
			
 
				+	int maxPairs = out_overlappingPairs.size();
			
 
				+	b3OpenCLArray<int>& numPairsGpu = m_temp;
			
 
				+
			
 
				+	int reset = 0;
			
 
				+	numPairsGpu.copyFromHostPointer(&reset, 1);
			
 
				+
			
 
				+	//
			
 
				+	if (m_leafNodeAabbs.size() > 1)
			
 
				+	{
			
 
				+		B3_PROFILE("PLBVH small-small AABB test");
			
 
				+
			
 
				+		int numQueryAabbs = m_leafNodeAabbs.size();
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(m_rootNodeIndex.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeAabbs.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(numPairsGpu.getBufferCL()),
			
 
				+				b3BufferInfoCL(out_overlappingPairs.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_plbvhCalculateOverlappingPairsKernel, "m_plbvhCalculateOverlappingPairsKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(maxPairs);
			
 
				+		launcher.setConst(numQueryAabbs);
			
 
				+
			
 
				+		launcher.launch1D(numQueryAabbs);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	int numLargeAabbRigids = m_largeAabbs.size();
			
 
				+	if (numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0)
			
 
				+	{
			
 
				+		B3_PROFILE("PLBVH large-small AABB test");
			
 
				+
			
 
				+		int numQueryAabbs = m_leafNodeAabbs.size();
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_largeAabbs.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(numPairsGpu.getBufferCL()),
			
 
				+				b3BufferInfoCL(out_overlappingPairs.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(maxPairs);
			
 
				+		launcher.setConst(numLargeAabbRigids);
			
 
				+		launcher.setConst(numQueryAabbs);
			
 
				+
			
 
				+		launcher.launch1D(numQueryAabbs);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//
			
 
				+	int numPairs = -1;
			
 
				+	numPairsGpu.copyToHostPointer(&numPairs, 1);
			
 
				+	if (numPairs > maxPairs)
			
 
				+	{
			
 
				+		b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
			
 
				+		numPairs = maxPairs;
			
 
				+		numPairsGpu.copyFromHostPointer(&maxPairs, 1);
			
 
				+	}
			
 
				+
			
 
				+	out_overlappingPairs.resize(numPairs);
			
 
				+}
			
 
				+
			
 
				+void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays,
			
 
				+													 b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs)
			
 
				+{
			
 
				+	B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()");
			
 
				+
			
 
				+	int numRays = rays.size();
			
 
				+	int maxRayRigidPairs = out_rayRigidPairs.size();
			
 
				+
			
 
				+	int reset = 0;
			
 
				+	out_numRayRigidPairs.copyFromHostPointer(&reset, 1);
			
 
				+
			
 
				+	//
			
 
				+	if (m_leafNodeAabbs.size() > 0)
			
 
				+	{
			
 
				+		B3_PROFILE("PLBVH ray test small AABB");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(m_rootNodeIndex.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeAabbs.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(rays.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(out_numRayRigidPairs.getBufferCL()),
			
 
				+				b3BufferInfoCL(out_rayRigidPairs.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(maxRayRigidPairs);
			
 
				+		launcher.setConst(numRays);
			
 
				+
			
 
				+		launcher.launch1D(numRays);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	int numLargeAabbRigids = m_largeAabbs.size();
			
 
				+	if (numLargeAabbRigids > 0)
			
 
				+	{
			
 
				+		B3_PROFILE("PLBVH ray test large AABB");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_largeAabbs.getBufferCL()),
			
 
				+				b3BufferInfoCL(rays.getBufferCL()),
			
 
				+
			
 
				+				b3BufferInfoCL(out_numRayRigidPairs.getBufferCL()),
			
 
				+				b3BufferInfoCL(out_rayRigidPairs.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numLargeAabbRigids);
			
 
				+		launcher.setConst(maxRayRigidPairs);
			
 
				+		launcher.setConst(numRays);
			
 
				+
			
 
				+		launcher.launch1D(numRays);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//
			
 
				+	int numRayRigidPairs = -1;
			
 
				+	out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1);
			
 
				+
			
 
				+	if (numRayRigidPairs > maxRayRigidPairs)
			
 
				+		b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs);
			
 
				+}
			
 
				+
			
 
				+void b3GpuParallelLinearBvh::constructBinaryRadixTree()
			
 
				+{
			
 
				+	B3_PROFILE("b3GpuParallelLinearBvh::constructBinaryRadixTree()");
			
 
				+
			
 
				+	int numLeaves = m_leafNodeAabbs.size();
			
 
				+	int numInternalNodes = numLeaves - 1;
			
 
				+
			
 
				+	//Each internal node is placed in between 2 leaf nodes.
			
 
				+	//By using this arrangement and computing the common prefix between
			
 
				+	//these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree.
			
 
				+	{
			
 
				+		B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_commonPrefixes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_commonPrefixLengths.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_computeAdjacentPairCommonPrefixKernel, "m_computeAdjacentPairCommonPrefixKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numInternalNodes);
			
 
				+
			
 
				+		launcher.launch1D(numInternalNodes);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//For each leaf node, select its parent node by
			
 
				+	//comparing the 2 nearest internal nodes and assign child node indices
			
 
				+	{
			
 
				+		B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_commonPrefixLengths.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_leafNodeParentNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numLeaves);
			
 
				+
			
 
				+		launcher.launch1D(numLeaves);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//For each internal node, perform 2 binary searches among the other internal nodes
			
 
				+	//to its left and right to find its potential parent nodes and assign child node indices
			
 
				+	{
			
 
				+		B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_commonPrefixes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_commonPrefixLengths.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeParentNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_rootNodeIndex.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numInternalNodes);
			
 
				+
			
 
				+		launcher.launch1D(numInternalNodes);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//Find the number of nodes separating each internal node and the root node
			
 
				+	//so that the AABBs can be set using the next kernel.
			
 
				+	//Also determine the maximum number of nodes separating an internal node and the root node.
			
 
				+	{
			
 
				+		B3_PROFILE("m_findDistanceFromRootKernel");
			
 
				+
			
 
				+		b3BufferInfoCL bufferInfo[] =
			
 
				+			{
			
 
				+				b3BufferInfoCL(m_rootNodeIndex.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_internalNodeParentNodes.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_maxDistanceFromRoot.getBufferCL()),
			
 
				+				b3BufferInfoCL(m_distanceFromRoot.getBufferCL())};
			
 
				+
			
 
				+		b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel");
			
 
				+		launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+		launcher.setConst(numInternalNodes);
			
 
				+
			
 
				+		launcher.launch1D(numInternalNodes);
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+
			
 
				+	//Starting from the internal nodes nearest to the leaf nodes, recursively move up
			
 
				+	//the tree towards the root to set the AABBs of each internal node; each internal node
			
 
				+	//checks its children and merges their AABBs
			
 
				+	{
			
 
				+		B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel");
			
 
				+
			
 
				+		int maxDistanceFromRoot = -1;
			
 
				+		{
			
 
				+			B3_PROFILE("copy maxDistanceFromRoot to CPU");
			
 
				+			m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1);
			
 
				+			clFinish(m_queue);
			
 
				+		}
			
 
				+
			
 
				+		for (int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot)
			
 
				+		{
			
 
				+			b3BufferInfoCL bufferInfo[] =
			
 
				+				{
			
 
				+					b3BufferInfoCL(m_distanceFromRoot.getBufferCL()),
			
 
				+					b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()),
			
 
				+					b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()),
			
 
				+					b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()),
			
 
				+					b3BufferInfoCL(m_internalNodeAabbs.getBufferCL())};
			
 
				+
			
 
				+			b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel");
			
 
				+			launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
			
 
				+			launcher.setConst(maxDistanceFromRoot);
			
 
				+			launcher.setConst(distanceFromRoot);
			
 
				+			launcher.setConst(numInternalNodes);
			
 
				+
			
 
				+			//It may seem inefficent to launch a thread for each internal node when a
			
 
				+			//much smaller number of nodes is actually processed, but this is actually
			
 
				+			//faster than determining the exact nodes that are ready to merge their child AABBs.
			
 
				+			launcher.launch1D(numInternalNodes);
			
 
				+		}
			
 
				+
			
 
				+		clFinish(m_queue);
			
 
				+	}
			
 
				+}