Explorar o código

Merge pull request #1276 from marauder2k9-torque/update-convexdecom-to-vhacd

Remove nvConvexDecomp and replace it with v-hacd
Brian Roberts hai 1 ano
pai
achega
f3a7263503
Modificáronse 37 ficheiros con 14442 adicións e 17250 borrados
  1. 2 1
      Engine/lib/CMakeLists.txt
  2. 1 1
      Engine/lib/Torque_postBuild.cmake
  3. 0 7
      Engine/lib/convexDecomp/CMakeLists.txt
  4. 0 252
      Engine/lib/convexDecomp/NvConcavityVolume.cpp
  5. 0 78
      Engine/lib/convexDecomp/NvConcavityVolume.h
  6. 0 788
      Engine/lib/convexDecomp/NvConvexDecomposition.cpp
  7. 0 111
      Engine/lib/convexDecomp/NvConvexDecomposition.h
  8. 0 74
      Engine/lib/convexDecomp/NvFloatMath.cpp
  9. 0 586
      Engine/lib/convexDecomp/NvFloatMath.h
  10. 0 5607
      Engine/lib/convexDecomp/NvFloatMath.inl
  11. 0 1905
      Engine/lib/convexDecomp/NvHashMap.h
  12. 0 783
      Engine/lib/convexDecomp/NvMeshIslandGeneration.cpp
  13. 0 91
      Engine/lib/convexDecomp/NvMeshIslandGeneration.h
  14. 0 153
      Engine/lib/convexDecomp/NvRayCast.cpp
  15. 0 79
      Engine/lib/convexDecomp/NvRayCast.h
  16. 0 713
      Engine/lib/convexDecomp/NvRemoveTjunctions.cpp
  17. 0 110
      Engine/lib/convexDecomp/NvRemoveTjunctions.h
  18. 0 189
      Engine/lib/convexDecomp/NvSimpleTypes.h
  19. 0 224
      Engine/lib/convexDecomp/NvSplitMesh.cpp
  20. 0 88
      Engine/lib/convexDecomp/NvSplitMesh.h
  21. 0 3464
      Engine/lib/convexDecomp/NvStanHull.cpp
  22. 0 201
      Engine/lib/convexDecomp/NvStanHull.h
  23. 0 511
      Engine/lib/convexDecomp/NvThreadConfig.cpp
  24. 0 119
      Engine/lib/convexDecomp/NvThreadConfig.h
  25. 0 81
      Engine/lib/convexDecomp/NvUserMemAlloc.h
  26. 0 38
      Engine/lib/convexDecomp/readme.txt
  27. 0 852
      Engine/lib/convexDecomp/wavefront.cpp
  28. 0 77
      Engine/lib/convexDecomp/wavefront.h
  29. 3 0
      Engine/lib/convexMath/CMakeLists.txt
  30. 17 0
      Engine/lib/convexMath/FloatMath.cpp
  31. 525 0
      Engine/lib/convexMath/FloatMath.h
  32. 5280 0
      Engine/lib/convexMath/FloatMath.inl
  33. 1 1
      Engine/source/CMakeLists.txt
  34. 2 0
      Engine/source/platformWin32/winRedbook.cpp
  35. 163 65
      Engine/source/ts/tsMeshFit.cpp
  36. 8447 0
      Engine/source/ts/vhacd/VHACD.h
  37. 1 1
      Tools/CMake/torque_configs.cmake

+ 2 - 1
Engine/lib/CMakeLists.txt

@@ -112,6 +112,7 @@ mark_as_advanced(SDL_XINPUT)
 add_subdirectory(sdl ${TORQUE_LIB_TARG_DIRECTORY}/sdl2 EXCLUDE_FROM_ALL)
 add_subdirectory(sdl ${TORQUE_LIB_TARG_DIRECTORY}/sdl2 EXCLUDE_FROM_ALL)
 
 
 add_subdirectory(nativeFileDialogs ${TORQUE_LIB_TARG_DIRECTORY}/nfd EXCLUDE_FROM_ALL)
 add_subdirectory(nativeFileDialogs ${TORQUE_LIB_TARG_DIRECTORY}/nfd EXCLUDE_FROM_ALL)
+add_subdirectory(convexMath ${TORQUE_LIB_TARG_DIRECTORY}/convexMath EXCLUDE_FROM_ALL)
 
 
 # Assimp
 # Assimp
 advanced_option(ASSIMP_HUNTER_ENABLED "Enable Hunter package manager support" OFF)
 advanced_option(ASSIMP_HUNTER_ENABLED "Enable Hunter package manager support" OFF)
@@ -189,7 +190,7 @@ mark_as_advanced(PNG_PREFIX)
 add_subdirectory(tinyxml ${TORQUE_LIB_TARG_DIRECTORY}/tinyxml EXCLUDE_FROM_ALL)
 add_subdirectory(tinyxml ${TORQUE_LIB_TARG_DIRECTORY}/tinyxml EXCLUDE_FROM_ALL)
 add_subdirectory(opcode ${TORQUE_LIB_TARG_DIRECTORY}/opcode EXCLUDE_FROM_ALL)
 add_subdirectory(opcode ${TORQUE_LIB_TARG_DIRECTORY}/opcode EXCLUDE_FROM_ALL)
 add_subdirectory(pcre ${TORQUE_LIB_TARG_DIRECTORY}/pcre EXCLUDE_FROM_ALL)
 add_subdirectory(pcre ${TORQUE_LIB_TARG_DIRECTORY}/pcre EXCLUDE_FROM_ALL)
-add_subdirectory(convexDecomp ${TORQUE_LIB_TARG_DIRECTORY}/convexDecomp EXCLUDE_FROM_ALL)
+
 add_subdirectory(squish ${TORQUE_LIB_TARG_DIRECTORY}/squish EXCLUDE_FROM_ALL)
 add_subdirectory(squish ${TORQUE_LIB_TARG_DIRECTORY}/squish EXCLUDE_FROM_ALL)
 add_subdirectory(collada ${TORQUE_LIB_TARG_DIRECTORY}/collada EXCLUDE_FROM_ALL)
 add_subdirectory(collada ${TORQUE_LIB_TARG_DIRECTORY}/collada EXCLUDE_FROM_ALL)
 add_subdirectory(glad ${TORQUE_LIB_TARG_DIRECTORY}/glad EXCLUDE_FROM_ALL)
 add_subdirectory(glad ${TORQUE_LIB_TARG_DIRECTORY}/glad EXCLUDE_FROM_ALL)

+ 1 - 1
Engine/lib/Torque_postBuild.cmake

@@ -2,7 +2,7 @@
 
 
 # When on Windows, we need to link against winsock and windows codecs
 # When on Windows, we need to link against winsock and windows codecs
 if (WIN32)
 if (WIN32)
-	set(TORQUE_LINK_WINDOWS ${TORQUE_LINK_WINDOWS} WS2_32.LIB windowscodecs.lib)	
+	set(TORQUE_LINK_WINDOWS ${TORQUE_LINK_WINDOWS} WS2_32.LIB windowscodecs.lib winmm.lib)	
 	if (TORQUE_D3D11)
 	if (TORQUE_D3D11)
 		set(TORQUE_LINK_WINDOWS ${TORQUE_LINK_WINDOWS} dxguid.lib)
 		set(TORQUE_LINK_WINDOWS ${TORQUE_LINK_WINDOWS} dxguid.lib)
 	endif (TORQUE_D3D11)
 	endif (TORQUE_D3D11)

+ 0 - 7
Engine/lib/convexDecomp/CMakeLists.txt

@@ -1,7 +0,0 @@
-file(GLOB CONVEX_DECOMP_SOURCES "*.cpp")
-add_library(convexDecomp STATIC ${CONVEX_DECOMP_SOURCES})
-target_include_directories(convexDecomp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-
-if (UNIX AND NOT APPLE)
-    target_compile_definitions(convexDecomp PUBLIC LINUX)
-endif (UNIX AND NOT APPLE)

+ 0 - 252
Engine/lib/convexDecomp/NvConcavityVolume.cpp

@@ -1,252 +0,0 @@
-/*
-
-NvConcavityVolume.cpp : This is a code snippet that computes the volume of concavity of a traingle mesh.
-
-*/
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#define SHOW_DEBUG 0
-#if SHOW_DEBUG
-#include "RenderDebug.h"
-#endif
-#include "NvConcavityVolume.h"
-#include "NvFloatMath.h"
-#include "NvRayCast.h"
-#include <stdio.h>
-
-#pragma warning(disable:4100 4189 4505 4127 4101)
-
-namespace CONVEX_DECOMPOSITION
-{
-
-bool raycast(const NxF32 *p1,const NxF32 *normal,NxF32 *dest,iRayCast *cast_hull,iRayCast *cast_mesh)
-{
-	bool ret = true;
-
-	NxF32 hit_hull[3];
-	NxF32 hit_hullNormal[3];
-
-	NxF32 hit_mesh[3];
-	NxF32 hit_meshNormal[3];
-
-	bool hitHull = cast_hull->castRay(p1,normal,hit_hull,hit_hullNormal);
-	bool hitMesh = cast_mesh->castRay(p1,normal,hit_mesh,hit_meshNormal);
-
-	if ( hitMesh )
-	{
-		float dot = fm_dot(normal,hit_meshNormal);
-		if ( dot < 0 ) // skip if we hit an internal face of the mesh when projection out towards the convex hull.
-		{
-			ret = false;
-		}
-		else
-		{
-			NxF32 d1 = fm_distanceSquared(p1,hit_mesh);
-			NxF32 d2 = fm_distanceSquared(p1,hit_hull);
-			if ( d1 < d2 )
-			{
-				dest[0] = hit_mesh[0];
-				dest[1] = hit_mesh[1];
-				dest[2] = hit_mesh[2];
-			}
-			else
-			{
-				dest[0] = hit_hull[0];
-				dest[1] = hit_hull[1];
-				dest[2] = hit_hull[2];
-			}
-		}
-	}
-	else if ( hitHull )
-	{
-		dest[0] = hit_hull[0];
-		dest[1] = hit_hull[1];
-		dest[2] = hit_hull[2];
-	}
-	else
-	{
-		ret = false;
-	}
-
-
-	return ret;
-}
-
-void addTri(NxU32 *indices,NxU32 i1,NxU32 i2,NxU32 i3,NxU32 &tcount)
-{
-	indices[tcount*3+0] = i1;
-	indices[tcount*3+1] = i2;
-	indices[tcount*3+2] = i3;
-	tcount++;
-}
-
-NxF32 computeConcavityVolume(NxU32 vcount_hull,
-						     const NxF32 *vertices_hull,
-						     NxU32 tcount_hull,
-						     const NxU32 *indices_hull,
-						     NxU32 vcount_mesh,
-						     const NxF32 *vertices_mesh,
-						     NxU32 tcount_mesh,
-						     const NxU32 *indices_mesh)
-{
-	NxF32 total_volume = 0;
-
-#if SHOW_DEBUG
-	NVSHARE::gRenderDebug->pushRenderState();
-	NVSHARE::gRenderDebug->setCurrentDisplayTime(150.0f);
-#endif
-
-	iRayCast *cast_hull = createRayCast(vertices_hull,tcount_hull,indices_hull);
-	iRayCast *cast_mesh = createRayCast(vertices_mesh,tcount_mesh,indices_mesh);
-
-
-	const NxU32 *indices = indices_mesh;
-#if 0
-	static NxU32 index = 0;
-	NxU32 i = index++;
-	indices = &indices[i*3];
-#else
-	for (NxU32 i=0; i<tcount_mesh; i++)
-#endif
-	{
-		NxU32 i1 = indices[0];
-		NxU32 i2 = indices[1];
-		NxU32 i3 = indices[2];
-
-		const NxF32 *p1 = &vertices_mesh[i1*3];
-		const NxF32 *p2 = &vertices_mesh[i2*3];
-		const NxF32 *p3 = &vertices_mesh[i3*3];
-
-		NxF32 normal[3];
-		NxF32 d = fm_computePlane(p3,p2,p1,normal);
-
-		NxF32  vertices[6*3];
-
-		vertices[0] = p1[0];
-		vertices[1] = p1[1];
-		vertices[2] = p1[2];
-
-		vertices[3] = p2[0];
-		vertices[4] = p2[1];
-		vertices[5] = p2[2];
-
-		vertices[6] = p3[0];
-		vertices[7] = p3[1];
-		vertices[8] = p3[2];
-
-		NxF32 midPoint[3];
-		midPoint[0] = (p1[0]+p2[0]+p3[0])/3;
-		midPoint[1] = (p1[1]+p2[1]+p3[1])/3;
-		midPoint[2] = (p1[2]+p2[2]+p3[2])/3;
-
-		fm_lerp(midPoint,p1,&vertices[0],0.9999f);
-		fm_lerp(midPoint,p2,&vertices[3],0.9999f);
-		fm_lerp(midPoint,p3,&vertices[6],0.9999f);
-
-		NxF32 *_p1 = &vertices[3*3];
-		NxF32 *_p2 = &vertices[4*3];
-		NxF32 *_p3 = &vertices[5*3];
-
-		NxU32 hitCount = 0;
-
-		if ( raycast(&vertices[0],normal, _p1,cast_hull,cast_mesh) ) hitCount++;
-		if ( raycast(&vertices[3],normal, _p2,cast_hull,cast_mesh) ) hitCount++;
-		if ( raycast(&vertices[6],normal, _p3,cast_hull,cast_mesh) ) hitCount++;
-
-		// form triangle mesh!
-		if ( hitCount == 3 )
-		{
-			NxU32 tcount = 0;
-			NxU32 tindices[8*3];
-
-			addTri(tindices,2,1,0,tcount);
-			addTri(tindices,3,4,5,tcount);
-
-			addTri(tindices,0,3,2,tcount);
-			addTri(tindices,2,3,5,tcount);
-
-			addTri(tindices,1,3,0,tcount);
-			addTri(tindices,4,3,1,tcount);
-
-			addTri(tindices,5,4,1,tcount);
-			addTri(tindices,2,5,1,tcount);
-
-			NxF32 volume = fm_computeMeshVolume(vertices,tcount,tindices);
-			total_volume+=volume;
-#if SHOW_DEBUG
-			NVSHARE::gRenderDebug->setCurrentColor(0x0000FF,0xFFFFFF);
-			NVSHARE::gRenderDebug->addToCurrentState(NVSHARE::DebugRenderState::SolidWireShaded);
-
-			for (NxU32 i=0; i<tcount; i++)
-			{
-				NxU32 i1 = tindices[i*3+0];
-				NxU32 i2 = tindices[i*3+1];
-				NxU32 i3 = tindices[i*3+2];
-
-				const NxF32 *p1 = &vertices[i1*3];
-				const NxF32 *p2 = &vertices[i2*3];
-				const NxF32 *p3 = &vertices[i3*3];
-
-				NVSHARE::gRenderDebug->DebugTri(p1,p2,p3);
-			}
-#endif
-		}
-		indices+=3;
-	}
-#if SHOW_DEBUG
-	NVSHARE::gRenderDebug->popRenderState();
-#endif
-
-	releaseRayCast(cast_hull);
-	releaseRayCast(cast_mesh);
-
-	return total_volume;
-}
-
-}; // end of namespace

+ 0 - 78
Engine/lib/convexDecomp/NvConcavityVolume.h

@@ -1,78 +0,0 @@
-#ifndef NV_CONCAVITY_H
-
-#define NV_CONCAVITY_H
-
-/*
-
-NvConcavityVolume.h : This is a code snippet that computes the volume of concavity of a traingle mesh.
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#include "NvUserMemAlloc.h"
-
-namespace CONVEX_DECOMPOSITION
-{
-
-// computes the 'volume of concavity' of a triangle mesh projected against its surrounding convex hull.
-
-NxF32 computeConcavityVolume(NxU32 vcount_hull,
-						     const NxF32 *vertices_hull,
-						     NxU32 tcount_hull,
-						     const NxU32 *indices_hull,
-						     NxU32 vcount_mesh,
-						     const NxF32 *vertices_mesh,
-						     NxU32 tcount_mesh,
-						     const NxU32 *indices_mesh);
-
-}; // end of namespace
-
-#endif

+ 0 - 788
Engine/lib/convexDecomp/NvConvexDecomposition.cpp

@@ -1,788 +0,0 @@
-
-/*
-
-NvConvexDecomposition.cpp : The main interface to the convex decomposition library.
-
-*/
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include <math.h>
-#include <float.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "NvConvexDecomposition.h"
-#include "NvHashMap.h"
-#include "NvFloatMath.h"
-#include "NvRemoveTjunctions.h"
-#include "NvMeshIslandGeneration.h"
-#include "NvStanHull.h"
-#include "NvConcavityVolume.h"
-#include "NvSplitMesh.h"
-#include "NvThreadConfig.h"
-
-
-#pragma warning(disable:4996 4100 4189)
-
-namespace CONVEX_DECOMPOSITION
-{
-
-
-#define GRANULARITY 0.0000000001f
-
-typedef CONVEX_DECOMPOSITION::Array< NxU32 > NxU32Array;
-
-class ConvexHull : public Memalloc
-{
-public:
-	ConvexHull(NxU32 vcount,const NxF32 *vertices,NxU32 tcount,const NxU32 *indices)
-	{
-		mTested = false;
-		mVcount = vcount;
-		mTcount = tcount;
-		mVertices = 0;
-		mIndices = 0;
-		mHullVolume = 0;
-		if ( vcount )
-		{
-			mVertices = (NxF32 *)MEMALLOC_MALLOC(sizeof(NxF32)*3*vcount);
-			memcpy(mVertices,vertices,sizeof(NxF32)*3*vcount);
-		}
-		if ( tcount )
-		{
-			mIndices = (NxU32 *)MEMALLOC_MALLOC(sizeof(NxU32)*3*tcount);
-			memcpy(mIndices,indices,sizeof(NxU32)*3*tcount);
-		}
-		if ( mVcount && mTcount )
-		{
-			mHullVolume = fm_computeMeshVolume( mVertices, mTcount, mIndices);
-		}
-	}
-
-	~ConvexHull(void)
-	{
-		reset();
-	}
-
-    void reset(void)
-    {
-		MEMALLOC_FREE(mVertices);
-		MEMALLOC_FREE(mIndices);
-		mVertices = 0;
-		mIndices = 0;
-		mVcount = 0;
-		mTcount = 0;
-		mHullVolume = 0;
-	}
-
-	// return true if merging this hull with the 'mergeHull' produces a new convex hull which is no greater in volume than the
-	// mergeThresholdPercentage
-	bool canMerge(ConvexHull *mergeHull,NxF32 mergeThresholdPercent,NxU32 maxVertices,NxF32 skinWidth,NxF32 &percent)
-	{
-		bool ret = false;
-
-		if ( mHullVolume > 0 && mergeHull->mHullVolume > 0 )
-		{
-			NxU32 combineVcount = mVcount + mergeHull->mVcount;
-			NxF32 *vertices = (NxF32 *)MEMALLOC_MALLOC(sizeof(NxF32)*combineVcount*3);
-			NxF32 *dest = vertices;
-			const NxF32 *source = mVertices;
-
-			for (NxU32 i=0; i<mVcount; i++)
-			{
-				dest[0] = source[0];
-				dest[1] = source[1];
-				dest[2] = source[2];
-				dest+=3;
-				source+=3;
-			}
-			source = mergeHull->mVertices;
-			for (NxU32 i=0; i<mergeHull->mVcount; i++)
-			{
-				dest[0] = source[0];
-				dest[1] = source[1];
-				dest[2] = source[2];
-				dest+=3;
-				source+=3;
-			}
-
-			// create the combined convex hull.
-    		HullDesc hd;
-    		hd.mVcount 			= combineVcount;
-    		hd.mVertices 		= vertices;
-    		hd.mVertexStride 	= sizeof(NxF32)*3;
-    		hd.mMaxVertices 	= maxVertices;
-    		hd.mSkinWidth		= skinWidth;
-    		HullLibrary hl;
-    		HullResult result;
-    		hl.CreateConvexHull(hd,result);
-
-			NxF32 combinedVolume = fm_computeMeshVolume(result.mOutputVertices, result.mNumFaces, result.mIndices );
-			NxF32 seperateVolume = mHullVolume+mergeHull->mHullVolume;
-
-			NxF32 percentMerge = 100 - (seperateVolume*100 / combinedVolume );
-
-			if ( percentMerge <= mergeThresholdPercent )
-			{
-				percent = percentMerge;
-				ret = true;
-			}
-			MEMALLOC_FREE(vertices);
-			hl.ReleaseResult(result);
-		}
-		return ret;
-	}
-
-	void merge(ConvexHull *mergeHull,NxU32 maxVertices,NxF32 skinWidth)
-	{
-		NxU32 combineVcount = mVcount + mergeHull->mVcount;
-		NxF32 *vertices = (NxF32 *)MEMALLOC_MALLOC(sizeof(NxF32)*combineVcount*3);
-		NxF32 *dest = vertices;
-		const NxF32 *source = mVertices;
-
-		for (NxU32 i=0; i<mVcount; i++)
-		{
-			dest[0] = source[0];
-			dest[1] = source[1];
-			dest[2] = source[2];
-			dest+=3;
-			source+=3;
-		}
-		source = mergeHull->mVertices;
-		for (NxU32 i=0; i<mergeHull->mVcount; i++)
-		{
-			dest[0] = source[0];
-			dest[1] = source[1];
-			dest[2] = source[2];
-			dest+=3;
-			source+=3;
-		}
-
-		// create the combined convex hull.
-   		HullDesc hd;
-   		hd.mVcount 			= combineVcount;
-   		hd.mVertices 		= vertices;
-   		hd.mVertexStride 	= sizeof(NxF32)*3;
-   		hd.mMaxVertices 	= maxVertices;
-   		hd.mSkinWidth		= skinWidth;
-   		HullLibrary hl;
-   		HullResult result;
-   		hl.CreateConvexHull(hd,result);
-
-		reset();
-		mergeHull->reset();
-		mergeHull->mTested = true; // it's been tested.
-		mVcount = result.mNumOutputVertices;
-		mVertices = (NxF32 *)MEMALLOC_MALLOC(sizeof(NxF32)*3*mVcount);
-		memcpy(mVertices,result.mOutputVertices,sizeof(NxF32)*3*mVcount);
-		mTcount = result.mNumFaces;
-		mIndices = (NxU32 *)MEMALLOC_MALLOC(sizeof(NxU32)*mTcount*3);
-		memcpy(mIndices, result.mIndices, sizeof(NxU32)*mTcount*3);
-
-		MEMALLOC_FREE(vertices);
-		hl.ReleaseResult(result);
-	}
-
-	void setTested(bool state)
-	{
-		mTested = state;
-	}
-
-    bool beenTested(void) const { return mTested; };
-
-	bool    mTested;
-	NxF32	mHullVolume;
-	NxU32	mVcount;
-	NxF32	*mVertices;
-	NxU32	mTcount;
-	NxU32	*mIndices;
-};
-
-typedef Array< ConvexHull *> ConvexHullVector;
-
-class ConvexDecomposition : public iConvexDecomposition, public CONVEX_DECOMPOSITION::Memalloc, public ThreadInterface
-{
-public:
-	ConvexDecomposition(void)
-	{
-		mVertexIndex = 0;
-		mComplete = false;
-		mCancel = false;
-		mThread = 0;
-	}
-
-	~ConvexDecomposition(void)
-	{
-		wait();
-		reset();
-		if ( mThread )
-		{
-			tc_releaseThread(mThread);
-		}
-	}
-
-	void wait(void) const
-	{
-		while ( mThread && !mComplete );
-	}
-
-	virtual void reset(void)  // reset the input mesh data.
-	{
-		wait();
-		if ( mVertexIndex )
-		{
-			fm_releaseVertexIndex(mVertexIndex);
-			mVertexIndex = 0;
-		}
-		mIndices.clear();
-		ConvexHullVector::Iterator i;
-		for (i=mHulls.begin(); i!=mHulls.end(); ++i)
-		{
-			ConvexHull *ch = (*i);
-			delete ch;
-		}
-		mHulls.clear();
-	}
-
-	virtual bool addTriangle(const NxF32 *p1,const NxF32 *p2,const NxF32 *p3)
-	{
-		bool ret = true;
-		wait();
-		if ( mVertexIndex == 0 )
-		{
-			mVertexIndex = fm_createVertexIndex(GRANULARITY,false);
-		}
-
-		bool newPos;
-		NxU32 i1 = mVertexIndex->getIndex(p1,newPos);
-		NxU32 i2 = mVertexIndex->getIndex(p2,newPos);
-		NxU32 i3 = mVertexIndex->getIndex(p3,newPos);
-
-		if ( i1 == i2 || i1 == i3 || i2 == i3 )
-		{
-			ret = false; // triangle is degenerate
-		}
-		else
-		{
-			mIndices.pushBack(i1);
-			mIndices.pushBack(i2);
-			mIndices.pushBack(i3);
-		}
-		return ret;
-	}
-
-	ConvexHull * getNonTested(void) const
-	{
-		ConvexHull *ret = 0;
-		for (NxU32 i=0; i<mHulls.size(); i++)
-		{
-			ConvexHull *ch = mHulls[i];
-			if ( !ch->beenTested() )
-			{
-				ret = ch;
-				break;
-			}
-		}
-		return ret;
-	}
-
-	virtual NxU32 computeConvexDecomposition(NxF32 skinWidth,
-											 NxU32 decompositionDepth,
-											 NxU32 maxHullVertices,
-											 NxF32 concavityThresholdPercent,
-											 NxF32 mergeThresholdPercent,
-											 NxF32 volumeSplitThresholdPercent,
-											 bool  useInitialIslandGeneration,
-											 bool  useIslandGeneration,
-											 bool  useThreads)
-	{
-		NxU32 ret = 0;
-
-		if ( mThread )
-			return 0;
-
-		if ( mVertexIndex )
-		{
-
-			mSkinWidth = skinWidth;
-			mDecompositionDepth = decompositionDepth;
-			mMaxHullVertices = maxHullVertices;
-			mConcavityThresholdPercent = concavityThresholdPercent;
-			mMergeThresholdPercent = mergeThresholdPercent;
-			mVolumeSplitThresholdPercent = volumeSplitThresholdPercent;
-			mUseInitialIslandGeneration = useInitialIslandGeneration;
-			mUseIslandGeneration = false; // Not currently supported. useIslandGeneration;
-			mComplete = false;
-			mCancel   = false;
-
-			if ( useThreads )
-			{
-				mThread = tc_createThread(this);
-			}
-			else
-			{
-				threadMain();
-				ret = getHullCount();
-       		}
-    	}
-		return ret;
-	}
-
-	void performConvexDecomposition(NxU32 vcount,
-									 const NxF32 *vertices,
-									 NxU32 tcount,
-									 const NxU32 *indices,
-									 NxF32 skinWidth,
-									 NxU32 decompositionDepth,
-									 NxU32 maxHullVertices,
-									 NxF32 concavityThresholdPercent,
-									 NxF32 mergeThresholdPercent,
-									 NxF32 volumeSplitThresholdPercent,
-									 bool  useInitialIslandGeneration,
-									 bool  useIslandGeneration,
-									 NxU32 depth)
-	{
-		if ( mCancel ) return;
-		if ( depth >= decompositionDepth ) return;
-
-		RemoveTjunctionsDesc desc;
-		desc.mVcount 	= vcount;
-		desc.mVertices 	= vertices;
-		desc.mTcount	= tcount;
-		desc.mIndices	= indices;
-
-#if 0
-		RemoveTjunctions *rt = createRemoveTjunctions();
-		rt->removeTjunctions(desc);
-#else
-
-		desc.mTcountOut = desc.mTcount;
-		desc.mIndicesOut = desc.mIndices;
-
-#endif
-   	    // ok..we now have a clean mesh without any tjunctions.
-		bool island = (depth == 0 ) ? useInitialIslandGeneration : useIslandGeneration;
-   	    if ( island )
-   	    {
-   	    	MeshIslandGeneration *mi = createMeshIslandGeneration();
-   	    	NxU32 icount = mi->islandGenerate(desc.mTcountOut,desc.mIndicesOut,desc.mVertices);
-   	    	for (NxU32 i=0; i<icount && !mCancel; i++)
-   	    	{
-				NxU32 tcount;
-   	    		NxU32 *indices = mi->getIsland(i,tcount);
-
-   	    		baseConvexDecomposition(desc.mVcount,desc.mVertices,
-											tcount,indices,
-   	    									skinWidth,
-   	    									decompositionDepth,
-   	    									maxHullVertices,
-   											concavityThresholdPercent,
-   											mergeThresholdPercent,
-   											volumeSplitThresholdPercent,
-											useInitialIslandGeneration,
-   											useIslandGeneration,depth);
-   			}
-   			releaseMeshIslandGeneration(mi);
-   	    }
-   	    else
-   	    {
-       		baseConvexDecomposition(desc.mVcount,desc.mVertices,desc.mTcountOut,
-									desc.mIndicesOut,
-   									skinWidth,
-   									decompositionDepth,
-   									maxHullVertices,
-									concavityThresholdPercent,
-									mergeThresholdPercent,
-									volumeSplitThresholdPercent,
-									useInitialIslandGeneration,
-   									useIslandGeneration,depth);
-   	    }
-#if 0
-   	    releaseRemoveTjunctions(rt);
-#endif
-	}
-
-	virtual void baseConvexDecomposition(NxU32 vcount,
-										 const NxF32 *vertices,
-										 NxU32 tcount,
-										 const NxU32 *indices,
-										 NxF32 skinWidth,
-										 NxU32 decompositionDepth,
-										 NxU32 maxHullVertices,
-										 NxF32 concavityThresholdPercent,
-										 NxF32 mergeThresholdPercent,
-										 NxF32 volumeSplitThresholdPercent,
-										 bool  useInitialIslandGeneration,
-										 bool  useIslandGeneration,
-										 NxU32 depth)
-	{
-
-		if ( mCancel ) return;
-
-		bool split = false; // by default we do not split
-
-
-		NxU32 *out_indices 	= (NxU32 *)MEMALLOC_MALLOC( sizeof(NxU32)*tcount*3 );
-		NxF32 *out_vertices = (NxF32 *)MEMALLOC_MALLOC( sizeof(NxF32)*3*vcount );
-
-		NxU32 out_vcount = fm_copyUniqueVertices( vcount, vertices, out_vertices, tcount, indices, out_indices );
-		// get a copy of only the unique vertices which are actually being used.
-
-		HullDesc hd;
-		hd.mVcount 			= out_vcount;
-		hd.mVertices 		= out_vertices;
-		hd.mVertexStride 	= sizeof(NxF32)*3;
-		hd.mMaxVertices 	= maxHullVertices;
-		hd.mSkinWidth		= skinWidth;
-		HullLibrary hl;
-		HullResult result;
-		hl.CreateConvexHull(hd,result);
-
-		NxF32 meshVolume = fm_computeMeshVolume(result.mOutputVertices, result.mNumFaces, result.mIndices );
-
-		if ( (depth+1) < decompositionDepth )
-		{
-			// compute the volume of this mesh...
-			NxF32 percentVolume = (meshVolume*100)/mOverallMeshVolume; // what percentage of the overall mesh volume are we?
-			if ( percentVolume > volumeSplitThresholdPercent ) // this piece must be greater thant he volume split threshold percent
-			{
-				// ok..now we will compute the concavity...
-				NxF32 concave_volume = computeConcavityVolume(result.mNumOutputVertices, result.mOutputVertices, result.mNumFaces, result.mIndices, out_vcount, out_vertices,	tcount, out_indices );
-				NxF32 concave_percent = (concave_volume*100) / meshVolume;
-				if ( concave_percent >=	concavityThresholdPercent )
-				{
-					// ready to do split here..
-					split = true;
-				}
-			}
-		}
-
-		if ( !split )
-		{
-			saveConvexHull(result.mNumOutputVertices,result.mOutputVertices,result.mNumFaces,result.mIndices);
-		}
-
-		// Compute the best fit plane relative to the computed convex hull.
-		NxF32 plane[4];
-		bool ok = fm_computeSplitPlane(result.mNumOutputVertices,result.mOutputVertices,result.mNumFaces,result.mIndices,plane);
-		assert(ok);
-
-		hl.ReleaseResult(result);
-		MEMALLOC_FREE(out_indices);
-		MEMALLOC_FREE(out_vertices);
-
-		if ( split )
-		{
-			iSplitMesh *sm = createSplitMesh();
-
-			NvSplitMesh n;
-			n.mVcount 	=	vcount;
-			n.mVertices =  vertices;
-			n.mTcount	= tcount;
-			n.mIndices	= indices;
-			if ( ok )
-			{
-				NvSplitMesh leftMesh;
-				NvSplitMesh rightMesh;
-
-				sm->splitMesh(n,leftMesh,rightMesh,plane,GRANULARITY);
-
-				if ( leftMesh.mTcount )
-				{
-					performConvexDecomposition(leftMesh.mVcount,
-											   leftMesh.mVertices,
-											   leftMesh.mTcount,
-											   leftMesh.mIndices,
-											   skinWidth,
-											   decompositionDepth,
-											   maxHullVertices,
-											   concavityThresholdPercent,
-											   mergeThresholdPercent,
-											   volumeSplitThresholdPercent,
-											   useInitialIslandGeneration,
-											   useIslandGeneration,
-											   depth+1);
-
-				}
-				if ( rightMesh.mTcount )
-				{
-					performConvexDecomposition(rightMesh.mVcount,
-											   rightMesh.mVertices,
-											   rightMesh.mTcount,
-											   rightMesh.mIndices,
-											   skinWidth,
-											   decompositionDepth,
-											   maxHullVertices,
-											   concavityThresholdPercent,
-											   mergeThresholdPercent,
-											   volumeSplitThresholdPercent,
-											   useInitialIslandGeneration,
-											   useIslandGeneration,
-											   depth+1);
-				}
-			}
-			releaseSplitMesh(sm);
-		}
-	}
-
-	// Copies only the vertices which are actually used.
-	// Then computes the convex hull around these used vertices.
-	// Next computes the volume of this convex hull.
-	// Frees up scratch memory and returns the volume of the convex hull around the source triangle mesh.
-	NxF32 computeHullMeshVolume(NxU32 vcount,const NxF32 *vertices,NxU32 tcount,const NxU32 *indices,NxU32 maxVertices,NxF32 skinWidth)
-	{
-		if ( mCancel ) return 0;
-		// first thing we should do is compute the overall mesh volume.
-		NxU32 *out_indices 	= (NxU32 *)MEMALLOC_MALLOC( sizeof(NxU32)*tcount*3 );
-		NxF32 *out_vertices = (NxF32 *)MEMALLOC_MALLOC( sizeof(NxF32)*3*vcount );
-
-		NxU32 out_vcount = fm_copyUniqueVertices( vcount, vertices, out_vertices, tcount, indices, out_indices );
-		// get a copy of only the unique vertices which are actually being used.
-
-		HullDesc hd;
-		hd.mVcount 			= out_vcount;
-		hd.mVertices 		= out_vertices;
-		hd.mVertexStride 	= sizeof(NxF32)*3;
-		hd.mMaxVertices 	= maxVertices;
-		hd.mSkinWidth		= skinWidth;
-		HullLibrary hl;
-		HullResult result;
-		hl.CreateConvexHull(hd,result);
-
-		NxF32 volume = fm_computeMeshVolume(result.mOutputVertices, result.mNumFaces, result.mIndices );
-
-		hl.ReleaseResult(result);
-		MEMALLOC_FREE(out_indices);
-		MEMALLOC_FREE(out_vertices);
-
-		return volume;
-	}
-
-
-	virtual bool isComputeComplete(void)  // if building the convex hulls in a background thread, this returns true if it is complete.
-	{
-		bool ret = true;
-
-		if ( mThread )
-		{
-			ret = mComplete;
-			if ( ret )
-			{
-				tc_releaseThread(mThread);
-				mThread = 0;
-			}
-		}
-
-		return ret;
-	}
-
-
-	virtual NxU32 getHullCount(void) 
-	{
-		NxU32 hullCount = 0;
-		wait();
-		if ( mCancel )
-		{
-			reset();
-		}
-		for (NxU32 i=0; i<mHulls.size(); i++)
-		{
-			ConvexHull *ch = mHulls[i];
-			if ( ch->mTcount )
-			{
-				hullCount++;
-			}
-		}
-		return hullCount;
-	}
-
-	virtual bool  getConvexHullResult(NxU32 hullIndex,ConvexHullResult &result)
-	{
-		bool ret = false;
-
-		wait();
-		NxU32 index = 0;
-		for (NxU32 i=0; i<mHulls.size(); i++)
-		{
-			ConvexHull *ch = mHulls[i];
-			if ( ch->mTcount )
-			{
-				if ( hullIndex == index )
-				{
-					ret = true;
-					result.mVcount = ch->mVcount;
-					result.mTcount = ch->mTcount;
-					result.mVertices = ch->mVertices;
-					result.mIndices = ch->mIndices;
-					break;
-				}
-				index++;
-			}
-		}
-
-		return ret;
-	}
-
-	void saveConvexHull(NxU32 vcount,const NxF32 *vertices,NxU32 tcount,const NxU32 *indices)
-	{
-		ConvexHull *ch = MEMALLOC_NEW(ConvexHull)(vcount,vertices,tcount,indices);
-		mHulls.pushBack(ch);
-	}
-
-  	virtual void threadMain(void)
-  	{
-    	mOverallMeshVolume = computeHullMeshVolume( mVertexIndex->getVcount(),
-    												mVertexIndex->getVerticesFloat(),
-    												mIndices.size()/3,
-    												&mIndices[0],
-    												mMaxHullVertices, mSkinWidth );
-
-   		performConvexDecomposition(mVertexIndex->getVcount(),mVertexIndex->getVerticesFloat(),
-         							       	mIndices.size()/3,&mIndices[0],
-         									mSkinWidth,
-         									mDecompositionDepth,
-         									mMaxHullVertices,
-     										mConcavityThresholdPercent,
-     										mMergeThresholdPercent,
-     										mVolumeSplitThresholdPercent,
-    										mUseInitialIslandGeneration,
-     										mUseIslandGeneration,0);
-
-		if ( mHulls.size() && !mCancel )
-		{
-			// While convex hulls can be merged...
-			ConvexHull *ch = getNonTested();
-			while ( ch && !mCancel )
-			{
-				// Sort all convex hulls by volume, largest to smallest.
-				NxU32 hullCount = mHulls.size();
-				ConvexHull *bestHull = 0;
-				NxF32 bestPercent = 100;
-
-				for (NxU32 i=0; i<hullCount; i++)
-				{
-					ConvexHull *mergeHull = mHulls[i];
-					if ( !mergeHull->beenTested() && mergeHull != ch )
-					{
-						NxF32 percent;
-            			if ( ch->canMerge(mergeHull,mMergeThresholdPercent,mMaxHullVertices,mSkinWidth,percent) )
-            			{
-            				if ( percent < bestPercent )
-            				{
-            					bestHull = mergeHull;
-            					bestPercent = percent;
-            				}
-            			}
-            		}
-				}
-
-				if ( bestHull )
-				{
-            		ch->merge(bestHull,mMaxHullVertices,mSkinWidth);
-				}
-				else
-				{
-					ch->setTested(true);
-				}
-
-				ch = getNonTested();
-			}
-		}
-    	mComplete = true;
-  	}
-
-	virtual bool cancelCompute(void)  // cause background thread computation to abort early.  Will return no results. Use 'isComputeComplete' to confirm the thread is done.
-	{
-		bool ret = false;
-
-		if ( mThread && !mComplete )
-		{
-			mCancel = true;
-			ret = true;
-		}
-
-		return ret;
-	}
-
-private:
-	bool				mComplete;
-	bool				mCancel;
-	fm_VertexIndex 		*mVertexIndex;
-	NxU32Array			mIndices;
-	NxF32				mOverallMeshVolume;
-	ConvexHullVector	mHulls;
-	Thread				*mThread;
-
-	NxF32 				mSkinWidth;
-	NxU32 				mDecompositionDepth;
-	NxU32 				mMaxHullVertices;
-	NxF32 				mConcavityThresholdPercent;
-	NxF32 				mMergeThresholdPercent;
-	NxF32 				mVolumeSplitThresholdPercent;
-	bool  				mUseInitialIslandGeneration;
-	bool  				mUseIslandGeneration;
-
-};
-
-
-iConvexDecomposition * createConvexDecomposition(void)
-{
-	ConvexDecomposition *cd = MEMALLOC_NEW(ConvexDecomposition);
-	return static_cast< iConvexDecomposition *>(cd);
-
-}
-
-void				   releaseConvexDecomposition(iConvexDecomposition *ic)
-{
-	ConvexDecomposition *cd = static_cast< ConvexDecomposition *>(ic);
-	delete cd;
-}
-
-}; // end of namespace

+ 0 - 111
Engine/lib/convexDecomp/NvConvexDecomposition.h

@@ -1,111 +0,0 @@
-#ifndef CONVEX_DECOMPOSITION_H
-
-#define CONVEX_DECOMPOSITION_H
-
-/*
-
-NvConvexDecomposition.h : The main interface to the convex decomposition library.
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#include "NvSimpleTypes.h"
-
-namespace CONVEX_DECOMPOSITION
-{
-
-struct ConvexHullResult
-{
-	NxU32	mVcount;				// number of vertices.
-	NxF32	*mVertices;				// vertex positions.
-	NxU32	mTcount;				// number of triangles.
-	NxU32	*mIndices;				// indexed triangle list.
-};
-
-class iConvexDecomposition
-{
-public:
-	virtual void reset(void) = 0; // reset the input mesh data.
-
-	virtual bool addTriangle(const NxF32 *p1,const NxF32 *p2,const NxF32 *p3) = 0; // add the input mesh one triangle at a time.
-
-	virtual NxU32 computeConvexDecomposition(NxF32 skinWidth=0,			// Skin width on the convex hulls generated
-											 NxU32 decompositionDepth=8, // recursion depth for convex decomposition.
-											 NxU32 maxHullVertices=64,	// maximum number of vertices in output convex hulls.
-											 NxF32 concavityThresholdPercent=0.1f, // The percentage of concavity allowed without causing a split to occur.
-											 NxF32 mergeThresholdPercent=30.0f,    // The percentage of volume difference allowed to merge two convex hulls.
-											 NxF32 volumeSplitThresholdPercent=0.1f, // The percentage of the total volume of the object above which splits will still occur.
-											 bool  useInitialIslandGeneration=true,	// whether or not to perform initial island generation on the input mesh.
-											 bool  useIslandGeneration=false,		// Whether or not to perform island generation at each split.  Currently disabled due to bug in RemoveTjunctions
-											 bool  useBackgroundThread=true) = 0;	// Whether or not to compute the convex decomposition in a background thread, the default is true.
-
-	virtual bool isComputeComplete(void) = 0; // if building the convex hulls in a background thread, this returns true if it is complete.
-
-	virtual bool cancelCompute(void) = 0; // cause background thread computation to abort early.  Will return no results. Use 'isComputeComplete' to confirm the thread is done.
-
-
-	virtual NxU32 getHullCount(void)  = 0; // returns the number of convex hulls produced.
-	virtual bool  getConvexHullResult(NxU32 hullIndex,ConvexHullResult &result) = 0; // returns each convex hull result.
-
-protected:
-   	virtual ~iConvexDecomposition(void)
-   	{
-	}
-
-};
-
-
-iConvexDecomposition * createConvexDecomposition(void);
-void				   releaseConvexDecomposition(iConvexDecomposition *ic);
-
-}; // end of namespace
-
-#endif

+ 0 - 74
Engine/lib/convexDecomp/NvFloatMath.cpp

@@ -1,74 +0,0 @@
-// a set of routines that let you do common 3d math
-// operations without any vector, matrix, or quaternion
-// classes or templates.
-//
-// a vector (or point) is a 'NxF32 *' to 3 floating point numbers.
-// a matrix is a 'NxF32 *' to an array of 16 floating point numbers representing a 4x4 transformation matrix compatible with D3D or OGL
-// a quaternion is a 'NxF32 *' to 4 floats representing a quaternion x,y,z,w
-//
-//
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <math.h>
-#include <float.h>
-
-#include "NvFloatMath.h"
-
-#define REAL NxF32
-
-#include "NvFloatMath.inl"
-
-#undef REAL
-#define REAL NxF64
-
-#include "NvFloatMath.inl"

+ 0 - 586
Engine/lib/convexDecomp/NvFloatMath.h

@@ -1,586 +0,0 @@
-#ifndef NV_FLOAT_MATH_H
-
-#define NV_FLOAT_MATH_H
-
-#include "NvUserMemAlloc.h"
-
-// a set of routines that let you do common 3d math
-// operations without any vector, matrix, or quaternion
-// classes or templates.
-//
-// a vector (or point) is a 'NxF32 *' to 3 floating point numbers.
-// a matrix is a 'NxF32 *' to an array of 16 floating point numbers representing a 4x4 transformation matrix compatible with D3D or OGL
-// a quaternion is a 'NxF32 *' to 4 floats representing a quaternion x,y,z,w
-//
-//
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#include <float.h>
-
-namespace CONVEX_DECOMPOSITION
-{
-
-enum FM_ClipState
-{
-  FMCS_XMIN       = (1<<0),
-  FMCS_XMAX       = (1<<1),
-  FMCS_YMIN       = (1<<2),
-  FMCS_YMAX       = (1<<3),
-  FMCS_ZMIN       = (1<<4),
-  FMCS_ZMAX       = (1<<5),
-};
-
-enum FM_Axis
-{
-  FM_XAXIS   = (1<<0),
-  FM_YAXIS   = (1<<1),
-  FM_ZAXIS   = (1<<2)
-};
-
-enum LineSegmentType
-{
-  LS_START,
-  LS_MIDDLE,
-  LS_END
-};
-
-
-const NxF32 FM_PI = 3.1415926535897932384626433832795028841971693993751f;
-const NxF32 FM_DEG_TO_RAD = ((2.0f * FM_PI) / 360.0f);
-const NxF32 FM_RAD_TO_DEG = (360.0f / (2.0f * FM_PI));
-
-//***************** Float versions
-//***
-//*** vectors are assumed to be 3 floats or 3 doubles representing X, Y, Z
-//*** quaternions are assumed to be 4 floats or 4 doubles representing X,Y,Z,W
-//*** matrices are assumed to be 16 floats or 16 doubles representing a standard D3D or OpenGL style 4x4 matrix
-//*** bounding volumes are expressed as two sets of 3 floats/NxF64 representing bmin(x,y,z) and bmax(x,y,z)
-//*** Plane equations are assumed to be 4 floats or 4 doubles representing Ax,By,Cz,D
-
-FM_Axis fm_getDominantAxis(const NxF32 normal[3]);
-FM_Axis fm_getDominantAxis(const NxF64 normal[3]);
-
-void fm_decomposeTransform(const NxF32 local_transform[16],NxF32 trans[3],NxF32 rot[4],NxF32 scale[3]);
-void fm_decomposeTransform(const NxF64 local_transform[16],NxF64 trans[3],NxF64 rot[4],NxF64 scale[3]);
-
-void  fm_multiplyTransform(const NxF32 *pA,const NxF32 *pB,NxF32 *pM);
-void  fm_multiplyTransform(const NxF64 *pA,const NxF64 *pB,NxF64 *pM);
-
-void  fm_inverseTransform(const NxF32 matrix[16],NxF32 inverse_matrix[16]);
-void  fm_inverseTransform(const NxF64 matrix[16],NxF64 inverse_matrix[16]);
-
-void  fm_identity(NxF32 matrix[16]); // set 4x4 matrix to identity.
-void  fm_identity(NxF64 matrix[16]); // set 4x4 matrix to identity.
-
-void  fm_inverseRT(const NxF32 matrix[16], const NxF32 pos[3], NxF32 t[3]); // inverse rotate translate the point.
-void  fm_inverseRT(const NxF64 matrix[16],const NxF64 pos[3],NxF64 t[3]); // inverse rotate translate the point.
-
-void  fm_transform(const NxF32 matrix[16], const NxF32 pos[3], NxF32 t[3]); // rotate and translate this point.
-void  fm_transform(const NxF64 matrix[16],const NxF64 pos[3],NxF64 t[3]); // rotate and translate this point.
-
-NxF32  fm_getDeterminant(const NxF32 matrix[16]);
-NxF64 fm_getDeterminant(const NxF64 matrix[16]);
-
-void fm_getSubMatrix(NxI32 ki,NxI32 kj,NxF32 pDst[16],const NxF32 matrix[16]);
-void fm_getSubMatrix(NxI32 ki,NxI32 kj,NxF64 pDst[16],const NxF32 matrix[16]);
-
-void  fm_rotate(const NxF32 matrix[16],const NxF32 pos[3],NxF32 t[3]); // only rotate the point by a 4x4 matrix, don't translate.
-void  fm_rotate(const NxF64 matri[16],const NxF64 pos[3],NxF64 t[3]); // only rotate the point by a 4x4 matrix, don't translate.
-
-void  fm_eulerToMatrix(NxF32 ax,NxF32 ay,NxF32 az,NxF32 matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
-void  fm_eulerToMatrix(NxF64 ax,NxF64 ay,NxF64 az,NxF64 matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
-
-void  fm_getAABB(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 bmin[3],NxF32 bmax[3]);
-void  fm_getAABB(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF64 bmin[3],NxF64 bmax[3]);
-
-void  fm_getAABBCenter(const NxF32 bmin[3],const NxF32 bmax[3],NxF32 center[3]);
-void  fm_getAABBCenter(const NxF64 bmin[3],const NxF64 bmax[3],NxF64 center[3]);
-
-void  fm_eulerToQuat(NxF32 x,NxF32 y,NxF32 z,NxF32 quat[4]); // convert euler angles to quaternion.
-void  fm_eulerToQuat(NxF64 x,NxF64 y,NxF64 z,NxF64 quat[4]); // convert euler angles to quaternion.
-
-void  fm_quatToEuler(const NxF32 quat[4],NxF32 &ax,NxF32 &ay,NxF32 &az);
-void  fm_quatToEuler(const NxF64 quat[4],NxF64 &ax,NxF64 &ay,NxF64 &az);
-
-void  fm_eulerToQuat(const NxF32 euler[3],NxF32 quat[4]); // convert euler angles to quaternion. Angles must be radians not degrees!
-void  fm_eulerToQuat(const NxF64 euler[3],NxF64 quat[4]); // convert euler angles to quaternion.
-
-void  fm_scale(NxF32 x,NxF32 y,NxF32 z,NxF32 matrix[16]); // apply scale to the matrix.
-void  fm_scale(NxF64 x,NxF64 y,NxF64 z,NxF64 matrix[16]); // apply scale to the matrix.
-
-void  fm_eulerToQuatDX(NxF32 x,NxF32 y,NxF32 z,NxF32 quat[4]); // convert euler angles to quaternion using the fucked up DirectX method
-void  fm_eulerToQuatDX(NxF64 x,NxF64 y,NxF64 z,NxF64 quat[4]); // convert euler angles to quaternion using the fucked up DirectX method
-
-void  fm_eulerToMatrixDX(NxF32 x,NxF32 y,NxF32 z,NxF32 matrix[16]); // convert euler angles to quaternion using the fucked up DirectX method.
-void  fm_eulerToMatrixDX(NxF64 x,NxF64 y,NxF64 z,NxF64 matrix[16]); // convert euler angles to quaternion using the fucked up DirectX method.
-
-void  fm_quatToMatrix(const NxF32 quat[4],NxF32 matrix[16]); // convert quaterinion rotation to matrix, translation set to zero.
-void  fm_quatToMatrix(const NxF64 quat[4],NxF64 matrix[16]); // convert quaterinion rotation to matrix, translation set to zero.
-
-void  fm_quatRotate(const NxF32 quat[4],const NxF32 v[3],NxF32 r[3]); // rotate a vector directly by a quaternion.
-void  fm_quatRotate(const NxF64 quat[4],const NxF64 v[3],NxF64 r[3]); // rotate a vector directly by a quaternion.
-
-void  fm_getTranslation(const NxF32 matrix[16],NxF32 t[3]);
-void  fm_getTranslation(const NxF64 matrix[16],NxF64 t[3]);
-
-void  fm_setTranslation(const NxF32 *translation,NxF32 matrix[16]);
-void  fm_setTranslation(const NxF64 *translation,NxF64 matrix[16]);
-
-void  fm_multiplyQuat(const NxF32 *qa,const NxF32 *qb,NxF32 *quat);
-void  fm_multiplyQuat(const NxF64 *qa,const NxF64 *qb,NxF64 *quat);
-
-void  fm_matrixToQuat(const NxF32 matrix[16],NxF32 quat[4]); // convert the 3x3 portion of a 4x4 matrix into a quaterion as x,y,z,w
-void  fm_matrixToQuat(const NxF64 matrix[16],NxF64 quat[4]); // convert the 3x3 portion of a 4x4 matrix into a quaterion as x,y,z,w
-
-NxF32 fm_sphereVolume(NxF32 radius); // return's the volume of a sphere of this radius (4/3 PI * R cubed )
-NxF64 fm_sphereVolume(NxF64 radius); // return's the volume of a sphere of this radius (4/3 PI * R cubed )
-
-NxF32 fm_cylinderVolume(NxF32 radius,NxF32 h);
-NxF64 fm_cylinderVolume(NxF64 radius,NxF64 h);
-
-NxF32 fm_capsuleVolume(NxF32 radius,NxF32 h);
-NxF64 fm_capsuleVolume(NxF64 radius,NxF64 h);
-
-NxF32 fm_distance(const NxF32 p1[3],const NxF32 p2[3]);
-NxF64 fm_distance(const NxF64 p1[3],const NxF64 p2[3]);
-
-NxF32 fm_distanceSquared(const NxF32 p1[3],const NxF32 p2[3]);
-NxF64 fm_distanceSquared(const NxF64 p1[3],const NxF64 p2[3]);
-
-NxF32 fm_distanceSquaredXZ(const NxF32 p1[3],const NxF32 p2[3]);
-NxF64 fm_distanceSquaredXZ(const NxF64 p1[3],const NxF64 p2[3]);
-
-NxF32 fm_computePlane(const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3],NxF32 *n); // return D
-NxF64 fm_computePlane(const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3],NxF64 *n); // return D
-
-NxF32 fm_distToPlane(const NxF32 plane[4],const NxF32 pos[3]); // computes the distance of this point from the plane.
-NxF64 fm_distToPlane(const NxF64 plane[4],const NxF64 pos[3]); // computes the distance of this point from the plane.
-
-NxF32 fm_dot(const NxF32 p1[3],const NxF32 p2[3]);
-NxF64 fm_dot(const NxF64 p1[3],const NxF64 p2[3]);
-
-void  fm_cross(NxF32 cross[3],const NxF32 a[3],const NxF32 b[3]);
-void  fm_cross(NxF64 cross[3],const NxF64 a[3],const NxF64 b[3]);
-
-void  fm_computeNormalVector(NxF32 n[3],const NxF32 p1[3],const NxF32 p2[3]); // as P2-P1 normalized.
-void  fm_computeNormalVector(NxF64 n[3],const NxF64 p1[3],const NxF64 p2[3]); // as P2-P1 normalized.
-
-bool  fm_computeWindingOrder(const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3]); // returns true if the triangle is clockwise.
-bool  fm_computeWindingOrder(const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3]); // returns true if the triangle is clockwise.
-
-NxF32  fm_normalize(NxF32 n[3]); // normalize this vector and return the distance
-NxF64  fm_normalize(NxF64 n[3]); // normalize this vector and return the distance
-
-void  fm_matrixMultiply(const NxF32 A[16],const NxF32 B[16],NxF32 dest[16]);
-void  fm_matrixMultiply(const NxF64 A[16],const NxF64 B[16],NxF64 dest[16]);
-
-void  fm_composeTransform(const NxF32 position[3],const NxF32 quat[4],const NxF32 scale[3],NxF32 matrix[16]);
-void  fm_composeTransform(const NxF64 position[3],const NxF64 quat[4],const NxF64 scale[3],NxF64 matrix[16]);
-
-NxF32 fm_computeArea(const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3]);
-NxF64 fm_computeArea(const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3]);
-
-void  fm_lerp(const NxF32 p1[3],const NxF32 p2[3],NxF32 dest[3],NxF32 lerpValue);
-void  fm_lerp(const NxF64 p1[3],const NxF64 p2[3],NxF64 dest[3],NxF64 lerpValue);
-
-bool  fm_insideTriangleXZ(const NxF32 test[3],const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3]);
-bool  fm_insideTriangleXZ(const NxF64 test[3],const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3]);
-
-bool  fm_insideAABB(const NxF32 pos[3],const NxF32 bmin[3],const NxF32 bmax[3]);
-bool  fm_insideAABB(const NxF64 pos[3],const NxF64 bmin[3],const NxF64 bmax[3]);
-
-bool  fm_insideAABB(const NxF32 obmin[3],const NxF32 obmax[3],const NxF32 tbmin[3],const NxF32 tbmax[3]); // test if bounding box tbmin/tmbax is fully inside obmin/obmax
-bool  fm_insideAABB(const NxF64 obmin[3],const NxF64 obmax[3],const NxF64 tbmin[3],const NxF64 tbmax[3]); // test if bounding box tbmin/tmbax is fully inside obmin/obmax
-
-NxU32 fm_clipTestPoint(const NxF32 bmin[3],const NxF32 bmax[3],const NxF32 pos[3]);
-NxU32 fm_clipTestPoint(const NxF64 bmin[3],const NxF64 bmax[3],const NxF64 pos[3]);
-
-NxU32 fm_clipTestPointXZ(const NxF32 bmin[3],const NxF32 bmax[3],const NxF32 pos[3]); // only tests X and Z, not Y
-NxU32 fm_clipTestPointXZ(const NxF64 bmin[3],const NxF64 bmax[3],const NxF64 pos[3]); // only tests X and Z, not Y
-
-
-NxU32 fm_clipTestAABB(const NxF32 bmin[3],const NxF32 bmax[3],const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3],NxU32 &andCode);
-NxU32 fm_clipTestAABB(const NxF64 bmin[3],const NxF64 bmax[3],const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3],NxU32 &andCode);
-
-
-bool     fm_lineTestAABBXZ(const NxF32 p1[3],const NxF32 p2[3],const NxF32 bmin[3],const NxF32 bmax[3],NxF32 &time);
-bool     fm_lineTestAABBXZ(const NxF64 p1[3],const NxF64 p2[3],const NxF64 bmin[3],const NxF64 bmax[3],NxF64 &time);
-
-bool     fm_lineTestAABB(const NxF32 p1[3],const NxF32 p2[3],const NxF32 bmin[3],const NxF32 bmax[3],NxF32 &time);
-bool     fm_lineTestAABB(const NxF64 p1[3],const NxF64 p2[3],const NxF64 bmin[3],const NxF64 bmax[3],NxF64 &time);
-
-
-void  fm_initMinMax(const NxF32 p[3],NxF32 bmin[3],NxF32 bmax[3]);
-void  fm_initMinMax(const NxF64 p[3],NxF64 bmin[3],NxF64 bmax[3]);
-
-void  fm_initMinMax(NxF32 bmin[3],NxF32 bmax[3]);
-void  fm_initMinMax(NxF64 bmin[3],NxF64 bmax[3]);
-
-void  fm_minmax(const NxF32 p[3],NxF32 bmin[3],NxF32 bmax[3]); // accmulate to a min-max value
-void  fm_minmax(const NxF64 p[3],NxF64 bmin[3],NxF64 bmax[3]); // accmulate to a min-max value
-
-
-NxF32 fm_solveX(const NxF32 plane[4],NxF32 y,NxF32 z); // solve for X given this plane equation and the other two components.
-NxF64 fm_solveX(const NxF64 plane[4],NxF64 y,NxF64 z); // solve for X given this plane equation and the other two components.
-
-NxF32 fm_solveY(const NxF32 plane[4],NxF32 x,NxF32 z); // solve for Y given this plane equation and the other two components.
-NxF64 fm_solveY(const NxF64 plane[4],NxF64 x,NxF64 z); // solve for Y given this plane equation and the other two components.
-
-NxF32 fm_solveZ(const NxF32 plane[4],NxF32 x,NxF32 y); // solve for Z given this plane equation and the other two components.
-NxF64 fm_solveZ(const NxF64 plane[4],NxF64 x,NxF64 y); // solve for Z given this plane equation and the other two components.
-
-bool  fm_computeBestFitPlane(NxU32 vcount,     // number of input data points
-                     const NxF32 *points,     // starting address of points array.
-                     NxU32 vstride,    // stride between input points.
-                     const NxF32 *weights,    // *optional point weighting values.
-                     NxU32 wstride,    // weight stride for each vertex.
-                     NxF32 plane[4]);
-
-bool  fm_computeBestFitPlane(NxU32 vcount,     // number of input data points
-                     const NxF64 *points,     // starting address of points array.
-                     NxU32 vstride,    // stride between input points.
-                     const NxF64 *weights,    // *optional point weighting values.
-                     NxU32 wstride,    // weight stride for each vertex.
-                     NxF64 plane[4]);
-
-
-NxF32  fm_computeBestFitAABB(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 bmin[3],NxF32 bmax[3]); // returns the diagonal distance
-NxF64 fm_computeBestFitAABB(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF64 bmin[3],NxF64 bmax[3]); // returns the diagonal distance
-
-NxF32  fm_computeBestFitSphere(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 center[3]);
-NxF64  fm_computeBestFitSphere(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF64 center[3]);
-
-bool fm_lineSphereIntersect(const NxF32 center[3],NxF32 radius,const NxF32 p1[3],const NxF32 p2[3],NxF32 intersect[3]);
-bool fm_lineSphereIntersect(const NxF64 center[3],NxF64 radius,const NxF64 p1[3],const NxF64 p2[3],NxF64 intersect[3]);
-
-bool fm_intersectRayAABB(const NxF32 bmin[3],const NxF32 bmax[3],const NxF32 pos[3],const NxF32 dir[3],NxF32 intersect[3]);
-bool fm_intersectLineSegmentAABB(const NxF32 bmin[3],const NxF32 bmax[3],const NxF32 p1[3],const NxF32 p2[3],NxF32 intersect[3]);
-
-bool fm_lineIntersectsTriangle(const NxF32 rayStart[3],const NxF32 rayEnd[3],const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3],NxF32 sect[3]);
-bool fm_lineIntersectsTriangle(const NxF64 rayStart[3],const NxF64 rayEnd[3],const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3],NxF64 sect[3]);
-
-bool fm_rayIntersectsTriangle(const NxF32 origin[3],const NxF32 dir[3],const NxF32 v0[3],const NxF32 v1[3],const NxF32 v2[3],NxF32 &t);
-bool fm_rayIntersectsTriangle(const NxF64 origin[3],const NxF64 dir[3],const NxF64 v0[3],const NxF64 v1[3],const NxF64 v2[3],NxF64 &t);
-
-bool fm_raySphereIntersect(const NxF32 center[3],NxF32 radius,const NxF32 pos[3],const NxF32 dir[3],NxF32 distance,NxF32 intersect[3]);
-bool fm_raySphereIntersect(const NxF64 center[3],NxF64 radius,const NxF64 pos[3],const NxF64 dir[3],NxF64 distance,NxF64 intersect[3]);
-
-void fm_catmullRom(NxF32 out_vector[3],const NxF32 p1[3],const NxF32 p2[3],const NxF32 p3[3],const NxF32 *p4, const NxF32 s);
-void fm_catmullRom(NxF64 out_vector[3],const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3],const NxF64 *p4, const NxF64 s);
-
-bool fm_intersectAABB(const NxF32 bmin1[3],const NxF32 bmax1[3],const NxF32 bmin2[3],const NxF32 bmax2[3]);
-bool fm_intersectAABB(const NxF64 bmin1[3],const NxF64 bmax1[3],const NxF64 bmin2[3],const NxF64 bmax2[3]);
-
-
-// computes the rotation quaternion to go from unit-vector v0 to unit-vector v1
-void fm_rotationArc(const NxF32 v0[3],const NxF32 v1[3],NxF32 quat[4]);
-void fm_rotationArc(const NxF64 v0[3],const NxF64 v1[3],NxF64 quat[4]);
-
-NxF32  fm_distancePointLineSegment(const NxF32 Point[3],const NxF32 LineStart[3],const NxF32 LineEnd[3],NxF32 intersection[3],LineSegmentType &type,NxF32 epsilon);
-NxF64 fm_distancePointLineSegment(const NxF64 Point[3],const NxF64 LineStart[3],const NxF64 LineEnd[3],NxF64 intersection[3],LineSegmentType &type,NxF64 epsilon);
-
-
-bool fm_colinear(const NxF64 p1[3],const NxF64 p2[3],const NxF64 p3[3],NxF64 epsilon=0.999);               // true if these three points in a row are co-linear
-bool fm_colinear(const NxF32  p1[3],const NxF32  p2[3],const NxF32 p3[3],NxF32 epsilon=0.999f);
-
-bool fm_colinear(const NxF32 a1[3],const NxF32 a2[3],const NxF32 b1[3],const NxF32 b2[3],NxF32 epsilon=0.999f);  // true if these two line segments are co-linear.
-bool fm_colinear(const NxF64 a1[3],const NxF64 a2[3],const NxF64 b1[3],const NxF64 b2[3],NxF64 epsilon=0.999);  // true if these two line segments are co-linear.
-
-enum IntersectResult
-{
-  IR_DONT_INTERSECT,
-  IR_DO_INTERSECT,
-  IR_COINCIDENT,
-  IR_PARALLEL,
-};
-
-IntersectResult fm_intersectLineSegments2d(const NxF32 a1[3], const NxF32 a2[3], const NxF32 b1[3], const NxF32 b2[3], NxF32 intersectionPoint[3]);
-IntersectResult fm_intersectLineSegments2d(const NxF64 a1[3],const NxF64 a2[3],const NxF64 b1[3],const NxF64 b2[3],NxF64 intersectionPoint[3]);
-
-IntersectResult fm_intersectLineSegments2dTime(const NxF32 a1[3], const NxF32 a2[3], const NxF32 b1[3], const NxF32 b2[3],NxF32 &t1,NxF32 &t2);
-IntersectResult fm_intersectLineSegments2dTime(const NxF64 a1[3],const NxF64 a2[3],const NxF64 b1[3],const NxF64 b2[3],NxF64 &t1,NxF64 &t2);
-
-// Plane-Triangle splitting
-
-enum PlaneTriResult
-{
-  PTR_ON_PLANE,
-  PTR_FRONT,
-  PTR_BACK,
-  PTR_SPLIT,
-};
-
-PlaneTriResult fm_planeTriIntersection(const NxF32 plane[4],    // the plane equation in Ax+By+Cz+D format
-                                    const NxF32 *triangle, // the source triangle.
-                                    NxU32 tstride,  // stride in bytes of the input and output *vertices*
-                                    NxF32        epsilon,  // the co-planer epsilon value.
-                                    NxF32       *front,    // the triangle in front of the
-                                    NxU32 &fcount,  // number of vertices in the 'front' triangle
-                                    NxF32       *back,     // the triangle in back of the plane
-                                    NxU32 &bcount); // the number of vertices in the 'back' triangle.
-
-
-PlaneTriResult fm_planeTriIntersection(const NxF64 plane[4],    // the plane equation in Ax+By+Cz+D format
-                                    const NxF64 *triangle, // the source triangle.
-                                    NxU32 tstride,  // stride in bytes of the input and output *vertices*
-                                    NxF64        epsilon,  // the co-planer epsilon value.
-                                    NxF64       *front,    // the triangle in front of the
-                                    NxU32 &fcount,  // number of vertices in the 'front' triangle
-                                    NxF64       *back,     // the triangle in back of the plane
-                                    NxU32 &bcount); // the number of vertices in the 'back' triangle.
-
-
-void fm_intersectPointPlane(const NxF32 p1[3],const NxF32 p2[3],NxF32 *split,const NxF32 plane[4]);
-void fm_intersectPointPlane(const NxF64 p1[3],const NxF64 p2[3],NxF64 *split,const NxF64 plane[4]);
-
-PlaneTriResult fm_getSidePlane(const NxF32 p[3],const NxF32 plane[4],NxF32 epsilon);
-PlaneTriResult fm_getSidePlane(const NxF64 p[3],const NxF64 plane[4],NxF64 epsilon);
-
-
-void fm_computeBestFitOBB(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 *sides,NxF32 matrix[16],bool bruteForce=true);
-void fm_computeBestFitOBB(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF64 *sides,NxF64 matrix[16],bool bruteForce=true);
-
-void fm_computeBestFitOBB(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 *sides,NxF32 pos[3],NxF32 quat[4],bool bruteForce=true);
-void fm_computeBestFitOBB(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF64 *sides,NxF64 pos[3],NxF64 quat[4],bool bruteForce=true);
-
-void fm_computeBestFitABB(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 *sides,NxF32 pos[3]);
-void fm_computeBestFitABB(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF64 *sides,NxF64 pos[3]);
-
-
-//** Note, if the returned capsule height is less than zero, then you must represent it is a sphere of size radius.
-void fm_computeBestFitCapsule(NxU32 vcount,const NxF32 *points,NxU32 pstride,NxF32 &radius,NxF32 &height,NxF32 matrix[16],bool bruteForce=true);
-void fm_computeBestFitCapsule(NxU32 vcount,const NxF64 *points,NxU32 pstride,NxF32 &radius,NxF32 &height,NxF64 matrix[16],bool bruteForce=true);
-
-
-void fm_planeToMatrix(const NxF32 plane[4],NxF32 matrix[16]); // convert a plane equation to a 4x4 rotation matrix.  Reference vector is 0,1,0
-void fm_planeToQuat(const NxF32 plane[4],NxF32 quat[4],NxF32 pos[3]); // convert a plane equation to a quaternion and translation
-
-void fm_planeToMatrix(const NxF64 plane[4],NxF64 matrix[16]); // convert a plane equation to a 4x4 rotation matrix
-void fm_planeToQuat(const NxF64 plane[4],NxF64 quat[4],NxF64 pos[3]); // convert a plane equation to a quaternion and translation
-
-inline void fm_doubleToFloat3(const NxF64 p[3],NxF32 t[3]) { t[0] = (NxF32) p[0]; t[1] = (NxF32)p[1]; t[2] = (NxF32)p[2]; };
-inline void fm_floatToDouble3(const NxF32 p[3],NxF64 t[3]) { t[0] = (NxF64)p[0]; t[1] = (NxF64)p[1]; t[2] = (NxF64)p[2]; };
-
-
-void  fm_eulerMatrix(NxF32 ax,NxF32 ay,NxF32 az,NxF32 matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
-void  fm_eulerMatrix(NxF64 ax,NxF64 ay,NxF64 az,NxF64 matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
-
-
-NxF32  fm_computeMeshVolume(const NxF32 *vertices,NxU32 tcount,const NxU32 *indices);
-NxF64 fm_computeMeshVolume(const NxF64 *vertices,NxU32 tcount,const NxU32 *indices);
-
-
-#define FM_DEFAULT_GRANULARITY 0.001f  // 1 millimeter is the default granularity
-
-class fm_VertexIndex
-{
-public:
-  virtual NxU32          getIndex(const NxF32 pos[3],bool &newPos) = 0;  // get welded index for this NxF32 vector[3]
-  virtual NxU32          getIndex(const NxF64 pos[3],bool &newPos) = 0;  // get welded index for this NxF64 vector[3]
-  virtual const NxF32 *   getVerticesFloat(void) const = 0;
-  virtual const NxF64 *  getVerticesDouble(void) const = 0;
-  virtual const NxF32 *   getVertexFloat(NxU32 index) const = 0;
-  virtual const NxF64 *  getVertexDouble(NxU32 index) const = 0;
-  virtual NxU32          getVcount(void) const = 0;
-  virtual bool            isDouble(void) const = 0;
-  virtual bool            saveAsObj(const char *fname,NxU32 tcount,NxU32 *indices) = 0;
-};
-
-fm_VertexIndex * fm_createVertexIndex(NxF64 granularity,bool snapToGrid); // create an indexed vertex system for doubles
-fm_VertexIndex * fm_createVertexIndex(NxF32 granularity,bool snapToGrid);  // create an indexed vertext system for floats
-void             fm_releaseVertexIndex(fm_VertexIndex *vindex);
-
-
-
-#if 0 // currently disabled
-
-class fm_LineSegment
-{
-public:
-  fm_LineSegment(void)
-  {
-    mE1 = mE2 = 0;
-  }
-
-  fm_LineSegment(NxU32 e1,NxU32 e2)
-  {
-    mE1 = e1;
-    mE2 = e2;
-  }
-
-  NxU32 mE1;
-  NxU32 mE2;
-};
-
-
-// LineSweep *only* supports doublees.  As a geometric operation it needs as much precision as possible.
-class fm_LineSweep
-{
-public:
-
- virtual fm_LineSegment * performLineSweep(const fm_LineSegment *segments,
-                                   NxU32 icount,
-                                   const NxF64 *planeEquation,
-                                   fm_VertexIndex *pool,
-                                   NxU32 &scount) = 0;
-
-
-};
-
-fm_LineSweep * fm_createLineSweep(void);
-void           fm_releaseLineSweep(fm_LineSweep *sweep);
-
-#endif
-
-class fm_Triangulate
-{
-public:
-  virtual const NxF64 *       triangulate3d(NxU32 pcount,
-                                             const NxF64 *points,
-                                             NxU32 vstride,
-                                             NxU32 &tcount,
-                                             bool consolidate,
-                                             NxF64 epsilon) = 0;
-
-  virtual const NxF32  *       triangulate3d(NxU32 pcount,
-                                             const NxF32  *points,
-                                             NxU32 vstride,
-                                             NxU32 &tcount,
-                                             bool consolidate,
-                                             NxF32 epsilon) = 0;
-};
-
-fm_Triangulate * fm_createTriangulate(void);
-void             fm_releaseTriangulate(fm_Triangulate *t);
-
-
-const NxF32 * fm_getPoint(const NxF32 *points,NxU32 pstride,NxU32 index);
-const NxF64 * fm_getPoint(const NxF64 *points,NxU32 pstride,NxU32 index);
-
-bool   fm_insideTriangle(NxF32 Ax, NxF32 Ay,NxF32 Bx, NxF32 By,NxF32 Cx, NxF32 Cy,NxF32 Px, NxF32 Py);
-bool   fm_insideTriangle(NxF64 Ax, NxF64 Ay,NxF64 Bx, NxF64 By,NxF64 Cx, NxF64 Cy,NxF64 Px, NxF64 Py);
-NxF32  fm_areaPolygon2d(NxU32 pcount,const NxF32 *points,NxU32 pstride);
-NxF64 fm_areaPolygon2d(NxU32 pcount,const NxF64 *points,NxU32 pstride);
-
-bool  fm_pointInsidePolygon2d(NxU32 pcount,const NxF32 *points,NxU32 pstride,const NxF32 *point,NxU32 xindex=0,NxU32 yindex=1);
-bool  fm_pointInsidePolygon2d(NxU32 pcount,const NxF64 *points,NxU32 pstride,const NxF64 *point,NxU32 xindex=0,NxU32 yindex=1);
-
-NxU32 fm_consolidatePolygon(NxU32 pcount,const NxF32 *points,NxU32 pstride,NxF32 *dest,NxF32 epsilon=0.999999f); // collapses co-linear edges.
-NxU32 fm_consolidatePolygon(NxU32 pcount,const NxF64 *points,NxU32 pstride,NxF64 *dest,NxF64 epsilon=0.999999); // collapses co-linear edges.
-
-
-bool fm_computeSplitPlane(NxU32 vcount,const NxF64 *vertices,NxU32 tcount,const NxU32 *indices,NxF64 *plane);
-bool fm_computeSplitPlane(NxU32 vcount,const NxF32 *vertices,NxU32 tcount,const NxU32 *indices,NxF32 *plane);
-
-void fm_nearestPointInTriangle(const NxF32 *pos,const NxF32 *p1,const NxF32 *p2,const NxF32 *p3,NxF32 *nearest);
-void fm_nearestPointInTriangle(const NxF64 *pos,const NxF64 *p1,const NxF64 *p2,const NxF64 *p3,NxF64 *nearest);
-
-NxF32  fm_areaTriangle(const NxF32 *p1,const NxF32 *p2,const NxF32 *p3);
-NxF64 fm_areaTriangle(const NxF64 *p1,const NxF64 *p2,const NxF64 *p3);
-
-void fm_subtract(const NxF32 *A,const NxF32 *B,NxF32 *diff); // compute A-B and store the result in 'diff'
-void fm_subtract(const NxF64 *A,const NxF64 *B,NxF64 *diff); // compute A-B and store the result in 'diff'
-
-void fm_multiply(NxF32 *A,NxF32 scaler);
-void fm_multiply(NxF64 *A,NxF64 scaler);
-
-void fm_add(const NxF32 *A,const NxF32 *B,NxF32 *sum);
-void fm_add(const NxF64 *A,const NxF64 *B,NxF64 *sum);
-
-void fm_copy3(const NxF32 *source,NxF32 *dest);
-void fm_copy3(const NxF64 *source,NxF64 *dest);
-
-// re-indexes an indexed triangle mesh but drops unused vertices.  The output_indices can be the same pointer as the input indices.
-// the output_vertices can point to the input vertices if you desire.  The output_vertices buffer should be at least the same size
-// is the input buffer.  The routine returns the new vertex count after re-indexing.
-NxU32  fm_copyUniqueVertices(NxU32 vcount,const NxF32 *input_vertices,NxF32 *output_vertices,NxU32 tcount,const NxU32 *input_indices,NxU32 *output_indices);
-NxU32  fm_copyUniqueVertices(NxU32 vcount,const NxF64 *input_vertices,NxF64 *output_vertices,NxU32 tcount,const NxU32 *input_indices,NxU32 *output_indices);
-
-bool    fm_isMeshCoplanar(NxU32 tcount,const NxU32 *indices,const NxF32 *vertices,bool doubleSided); // returns true if this collection of indexed triangles are co-planar!
-bool    fm_isMeshCoplanar(NxU32 tcount,const NxU32 *indices,const NxF64 *vertices,bool doubleSided); // returns true if this collection of indexed triangles are co-planar!
-
-bool    fm_samePlane(const NxF32 p1[4],const NxF32 p2[4],NxF32 normalEpsilon=0.01f,NxF32 dEpsilon=0.001f,bool doubleSided=false); // returns true if these two plane equations are identical within an epsilon
-bool    fm_samePlane(const NxF64 p1[4],const NxF64 p2[4],NxF64 normalEpsilon=0.01,NxF64 dEpsilon=0.001,bool doubleSided=false);
-
-void    fm_OBBtoAABB(const NxF32 obmin[3],const NxF32 obmax[3],const NxF32 matrix[16],NxF32 abmin[3],NxF32 abmax[3]);
-
-// a utility class that will tesseleate a mesh.
-class fm_Tesselate
-{
-public:
-  virtual const NxU32 * tesselate(fm_VertexIndex *vindex,NxU32 tcount,const NxU32 *indices,NxF32 longEdge,NxU32 maxDepth,NxU32 &outcount) = 0;
-};
-
-fm_Tesselate * fm_createTesselate(void);
-void           fm_releaseTesselate(fm_Tesselate *t);
-
-void fm_computeMeanNormals(NxU32 vcount,       // the number of vertices
-                           const NxF32 *vertices,     // the base address of the vertex position data.
-                           NxU32 vstride,      // the stride between position data.
-                           NxF32 *normals,            // the base address  of the destination for mean vector normals
-                           NxU32 nstride,      // the stride between normals
-                           NxU32 tcount,       // the number of triangles
-                           const NxU32 *indices);     // the triangle indices
-
-void fm_computeMeanNormals(NxU32 vcount,       // the number of vertices
-                           const NxF64 *vertices,     // the base address of the vertex position data.
-                           NxU32 vstride,      // the stride between position data.
-                           NxF64 *normals,            // the base address  of the destination for mean vector normals
-                           NxU32 nstride,      // the stride between normals
-                           NxU32 tcount,       // the number of triangles
-                           const NxU32 *indices);     // the triangle indices
-
-
-bool fm_isValidTriangle(const NxF32 *p1,const NxF32 *p2,const NxF32 *p3,NxF32 epsilon=0.00001f);
-bool fm_isValidTriangle(const NxF64 *p1,const NxF64 *p2,const NxF64 *p3,NxF64 epsilon=0.00001f);
-
-}; // end of namespace
-
-#endif

+ 0 - 5607
Engine/lib/convexDecomp/NvFloatMath.inl

@@ -1,5607 +0,0 @@
-// a set of routines that let you do common 3d math
-// operations without any vector, matrix, or quaternion
-// classes or templates.
-//
-// a vector (or point) is a 'NxF32 *' to 3 floating point numbers.
-// a matrix is a 'NxF32 *' to an array of 16 floating point numbers representing a 4x4 transformation matrix compatible with D3D or OGL
-// a quaternion is a 'NxF32 *' to 4 floats representing a quaternion x,y,z,w
-//
-//
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#pragma warning(disable:4996)
-
-#include "NvUserMemAlloc.h"
-#include "NvHashMap.h"
-
-namespace CONVEX_DECOMPOSITION
-{
-
-void fm_inverseRT(const REAL matrix[16],const REAL pos[3],REAL t[3]) // inverse rotate translate the point.
-{
-
-	REAL _x = pos[0] - matrix[3*4+0];
-	REAL _y = pos[1] - matrix[3*4+1];
-	REAL _z = pos[2] - matrix[3*4+2];
-
-	// Multiply inverse-translated source vector by inverted rotation transform
-
-	t[0] = (matrix[0*4+0] * _x) + (matrix[0*4+1] * _y) + (matrix[0*4+2] * _z);
-	t[1] = (matrix[1*4+0] * _x) + (matrix[1*4+1] * _y) + (matrix[1*4+2] * _z);
-	t[2] = (matrix[2*4+0] * _x) + (matrix[2*4+1] * _y) + (matrix[2*4+2] * _z);
-
-}
-
-REAL fm_getDeterminant(const REAL matrix[16])
-{
-  REAL tempv[3];
-  REAL p0[3];
-  REAL p1[3];
-  REAL p2[3];
-
-
-	p0[0] = matrix[0*4+0];
-	p0[1] = matrix[0*4+1];
-	p0[2] = matrix[0*4+2];
-
-	p1[0] = matrix[1*4+0];
-	p1[1] = matrix[1*4+1];
-	p1[2] = matrix[1*4+2];
-
-	p2[0] = matrix[2*4+0];
-	p2[1] = matrix[2*4+1];
-	p2[2] = matrix[2*4+2];
-
-  fm_cross(tempv,p1,p2);
-
-  return fm_dot(p0,tempv);
-
-}
-
-REAL fm_squared(REAL x) { return x*x; };
-
-void fm_decomposeTransform(const REAL local_transform[16],REAL trans[3],REAL rot[4],REAL scale[3])
-{
-
-  trans[0] = local_transform[12];
-  trans[1] = local_transform[13];
-  trans[2] = local_transform[14];
-
-  scale[0] = sqrt(fm_squared(local_transform[0*4+0]) + fm_squared(local_transform[0*4+1]) + fm_squared(local_transform[0*4+2]));
-  scale[1] = sqrt(fm_squared(local_transform[1*4+0]) + fm_squared(local_transform[1*4+1]) + fm_squared(local_transform[1*4+2]));
-  scale[2] = sqrt(fm_squared(local_transform[2*4+0]) + fm_squared(local_transform[2*4+1]) + fm_squared(local_transform[2*4+2]));
-
-  REAL m[16];
-  memcpy(m,local_transform,sizeof(REAL)*16);
-
-  REAL sx = 1.0f / scale[0];
-  REAL sy = 1.0f / scale[1];
-  REAL sz = 1.0f / scale[2];
-
-  m[0*4+0]*=sx;
-  m[0*4+1]*=sx;
-  m[0*4+2]*=sx;
-
-  m[1*4+0]*=sy;
-  m[1*4+1]*=sy;
-  m[1*4+2]*=sy;
-
-  m[2*4+0]*=sz;
-  m[2*4+1]*=sz;
-  m[2*4+2]*=sz;
-
-  fm_matrixToQuat(m,rot);
-
-}
-
-void fm_getSubMatrix(NxI32 ki,NxI32 kj,REAL pDst[16],const REAL matrix[16])
-{
-	NxI32 row, col;
-	NxI32 dstCol = 0, dstRow = 0;
-
-	for ( col = 0; col < 4; col++ )
-	{
-		if ( col == kj )
-		{
-			continue;
-		}
-		for ( dstRow = 0, row = 0; row < 4; row++ )
-		{
-			if ( row == ki )
-			{
-				continue;
-			}
-			pDst[dstCol*4+dstRow] = matrix[col*4+row];
-			dstRow++;
-		}
-		dstCol++;
-	}
-}
-
-void  fm_inverseTransform(const REAL matrix[16],REAL inverse_matrix[16])
-{
-	REAL determinant = fm_getDeterminant(matrix);
-	determinant = 1.0f / determinant;
-	for (NxI32 i = 0; i < 4; i++ )
-	{
-		for (NxI32 j = 0; j < 4; j++ )
-		{
-			NxI32 sign = 1 - ( ( i + j ) % 2 ) * 2;
-			REAL subMat[16];
-      fm_identity(subMat);
-			fm_getSubMatrix( i, j, subMat, matrix );
-			REAL subDeterminant = fm_getDeterminant(subMat);
-			inverse_matrix[i*4+j] = ( subDeterminant * sign ) * determinant;
-		}
-	}
-}
-
-void fm_identity(REAL matrix[16]) // set 4x4 matrix to identity.
-{
-	matrix[0*4+0] = 1;
-	matrix[1*4+1] = 1;
-	matrix[2*4+2] = 1;
-	matrix[3*4+3] = 1;
-
-	matrix[1*4+0] = 0;
-	matrix[2*4+0] = 0;
-	matrix[3*4+0] = 0;
-
-	matrix[0*4+1] = 0;
-	matrix[2*4+1] = 0;
-	matrix[3*4+1] = 0;
-
-	matrix[0*4+2] = 0;
-	matrix[1*4+2] = 0;
-	matrix[3*4+2] = 0;
-
-	matrix[0*4+3] = 0;
-	matrix[1*4+3] = 0;
-	matrix[2*4+3] = 0;
-
-}
-
-void  fm_quatToEuler(const REAL quat[4],REAL &ax,REAL &ay,REAL &az)
-{
-  REAL x = quat[0];
-  REAL y = quat[1];
-  REAL z = quat[2];
-  REAL w = quat[3];
-
-	REAL sint	     = (2.0f * w * y) - (2.0f * x * z);
-	REAL cost_temp = 1.0f - (sint * sint);
-	REAL cost	   	 = 0;
-
-	if ( (REAL)fabs(cost_temp) > 0.001f )
-	{
-		cost = sqrt( cost_temp );
-	}
-
-	REAL sinv, cosv, sinf, cosf;
-	if ( (REAL)fabs(cost) > 0.001f )
-	{
-    cost = 1.0f / cost;
-		sinv = ((2.0f * y * z) + (2.0f * w * x)) * cost;
-		cosv = (1.0f - (2.0f * x * x) - (2.0f * y * y)) * cost;
-		sinf = ((2.0f * x * y) + (2.0f * w * z)) * cost;
-		cosf = (1.0f - (2.0f * y * y) - (2.0f * z * z)) * cost;
-	}
-	else
-	{
-		sinv = (2.0f * w * x) - (2.0f * y * z);
-		cosv = 1.0f - (2.0f * x * x) - (2.0f * z * z);
-		sinf = 0;
-		cosf = 1.0f;
-	}
-
-	// compute output rotations
-	ax	= atan2( sinv, cosv );
-	ay	= atan2( sint, cost );
-  az	= atan2( sinf, cosf );
-
-}
-
-void fm_eulerToMatrix(REAL ax,REAL ay,REAL az,REAL *matrix) // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
-{
-  REAL quat[4];
-  fm_eulerToQuat(ax,ay,az,quat);
-  fm_quatToMatrix(quat,matrix);
-}
-
-void fm_getAABB(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *bmin,REAL *bmax)
-{
-
-  const NxU8 *source = (const NxU8 *) points;
-
-	bmin[0] = points[0];
-	bmin[1] = points[1];
-	bmin[2] = points[2];
-
-	bmax[0] = points[0];
-	bmax[1] = points[1];
-	bmax[2] = points[2];
-
-
-  for (NxU32 i=1; i<vcount; i++)
-  {
-  	source+=pstride;
-  	const REAL *p = (const REAL *) source;
-
-  	if ( p[0] < bmin[0] ) bmin[0] = p[0];
-  	if ( p[1] < bmin[1] ) bmin[1] = p[1];
-  	if ( p[2] < bmin[2] ) bmin[2] = p[2];
-
-		if ( p[0] > bmax[0] ) bmax[0] = p[0];
-		if ( p[1] > bmax[1] ) bmax[1] = p[1];
-		if ( p[2] > bmax[2] ) bmax[2] = p[2];
-
-  }
-}
-
-void  fm_eulerToQuat(const REAL *euler,REAL *quat) // convert euler angles to quaternion.
-{
-  fm_eulerToQuat(euler[0],euler[1],euler[2],quat);
-}
-
-void fm_eulerToQuat(REAL roll,REAL pitch,REAL yaw,REAL *quat) // convert euler angles to quaternion.
-{
-	roll  *= 0.5f;
-	pitch *= 0.5f;
-	yaw   *= 0.5f;
-
-	REAL cr = cos(roll);
-	REAL cp = cos(pitch);
-	REAL cy = cos(yaw);
-
-	REAL sr = sin(roll);
-	REAL sp = sin(pitch);
-	REAL sy = sin(yaw);
-
-	REAL cpcy = cp * cy;
-	REAL spsy = sp * sy;
-	REAL spcy = sp * cy;
-	REAL cpsy = cp * sy;
-
-	quat[0]   = ( sr * cpcy - cr * spsy);
-	quat[1]   = ( cr * spcy + sr * cpsy);
-	quat[2]   = ( cr * cpsy - sr * spcy);
-	quat[3]   = cr * cpcy + sr * spsy;
-}
-
-void fm_quatToMatrix(const REAL *quat,REAL *matrix) // convert quaterinion rotation to matrix, zeros out the translation component.
-{
-
-	REAL xx = quat[0]*quat[0];
-	REAL yy = quat[1]*quat[1];
-	REAL zz = quat[2]*quat[2];
-	REAL xy = quat[0]*quat[1];
-	REAL xz = quat[0]*quat[2];
-	REAL yz = quat[1]*quat[2];
-	REAL wx = quat[3]*quat[0];
-	REAL wy = quat[3]*quat[1];
-	REAL wz = quat[3]*quat[2];
-
-	matrix[0*4+0] = 1 - 2 * ( yy + zz );
-	matrix[1*4+0] =     2 * ( xy - wz );
-	matrix[2*4+0] =     2 * ( xz + wy );
-
-	matrix[0*4+1] =     2 * ( xy + wz );
-	matrix[1*4+1] = 1 - 2 * ( xx + zz );
-	matrix[2*4+1] =     2 * ( yz - wx );
-
-	matrix[0*4+2] =     2 * ( xz - wy );
-	matrix[1*4+2] =     2 * ( yz + wx );
-	matrix[2*4+2] = 1 - 2 * ( xx + yy );
-
-	matrix[3*4+0] = matrix[3*4+1] = matrix[3*4+2] = (REAL) 0.0f;
-	matrix[0*4+3] = matrix[1*4+3] = matrix[2*4+3] = (REAL) 0.0f;
-	matrix[3*4+3] =(REAL) 1.0f;
-
-}
-
-
-void fm_quatRotate(const REAL *quat,const REAL *v,REAL *r) // rotate a vector directly by a quaternion.
-{
-  REAL left[4];
-
-	left[0] =   quat[3]*v[0] + quat[1]*v[2] - v[1]*quat[2];
-	left[1] =   quat[3]*v[1] + quat[2]*v[0] - v[2]*quat[0];
-	left[2] =   quat[3]*v[2] + quat[0]*v[1] - v[0]*quat[1];
-	left[3] = - quat[0]*v[0] - quat[1]*v[1] - quat[2]*v[2];
-
-	r[0] = (left[3]*-quat[0]) + (quat[3]*left[0]) + (left[1]*-quat[2]) - (-quat[1]*left[2]);
-	r[1] = (left[3]*-quat[1]) + (quat[3]*left[1]) + (left[2]*-quat[0]) - (-quat[2]*left[0]);
-	r[2] = (left[3]*-quat[2]) + (quat[3]*left[2]) + (left[0]*-quat[1]) - (-quat[0]*left[1]);
-
-}
-
-
-void fm_getTranslation(const REAL *matrix,REAL *t)
-{
-	t[0] = matrix[3*4+0];
-	t[1] = matrix[3*4+1];
-	t[2] = matrix[3*4+2];
-}
-
-void fm_matrixToQuat(const REAL *matrix,REAL *quat) // convert the 3x3 portion of a 4x4 matrix into a quaterion as x,y,z,w
-{
-
-	REAL tr = matrix[0*4+0] + matrix[1*4+1] + matrix[2*4+2];
-
-	// check the diagonal
-
-	if (tr > 0.0f )
-	{
-		REAL s = (REAL) sqrt ( (NxF64) (tr + 1.0f) );
-		quat[3] = s * 0.5f;
-		s = 0.5f / s;
-		quat[0] = (matrix[1*4+2] - matrix[2*4+1]) * s;
-		quat[1] = (matrix[2*4+0] - matrix[0*4+2]) * s;
-		quat[2] = (matrix[0*4+1] - matrix[1*4+0]) * s;
-
-	}
-	else
-	{
-		// diagonal is negative
-		NxI32 nxt[3] = {1, 2, 0};
-		REAL  qa[4];
-
-		NxI32 i = 0;
-
-		if (matrix[1*4+1] > matrix[0*4+0]) i = 1;
-		if (matrix[2*4+2] > matrix[i*4+i]) i = 2;
-
-		NxI32 j = nxt[i];
-		NxI32 k = nxt[j];
-
-		REAL s = sqrt ( ((matrix[i*4+i] - (matrix[j*4+j] + matrix[k*4+k])) + 1.0f) );
-
-		qa[i] = s * 0.5f;
-
-		if (s != 0.0f ) s = 0.5f / s;
-
-		qa[3] = (matrix[j*4+k] - matrix[k*4+j]) * s;
-		qa[j] = (matrix[i*4+j] + matrix[j*4+i]) * s;
-		qa[k] = (matrix[i*4+k] + matrix[k*4+i]) * s;
-
-		quat[0] = qa[0];
-		quat[1] = qa[1];
-		quat[2] = qa[2];
-		quat[3] = qa[3];
-	}
-
-
-}
-
-
-REAL fm_sphereVolume(REAL radius) // return's the volume of a sphere of this radius (4/3 PI * R cubed )
-{
-	return (4.0f / 3.0f ) * FM_PI * radius * radius * radius;
-}
-
-
-REAL fm_cylinderVolume(REAL radius,REAL h)
-{
-	return FM_PI * radius * radius *h;
-}
-
-REAL fm_capsuleVolume(REAL radius,REAL h)
-{
-	REAL volume = fm_sphereVolume(radius); // volume of the sphere portion.
-	REAL ch = h-radius*2; // this is the cylinder length
-	if ( ch > 0 )
-	{
-		volume+=fm_cylinderVolume(radius,ch);
-	}
-	return volume;
-}
-
-void  fm_transform(const REAL matrix[16],const REAL v[3],REAL t[3]) // rotate and translate this point
-{
-  if ( matrix )
-  {
-    REAL tx = (matrix[0*4+0] * v[0]) +  (matrix[1*4+0] * v[1]) + (matrix[2*4+0] * v[2]) + matrix[3*4+0];
-    REAL ty = (matrix[0*4+1] * v[0]) +  (matrix[1*4+1] * v[1]) + (matrix[2*4+1] * v[2]) + matrix[3*4+1];
-    REAL tz = (matrix[0*4+2] * v[0]) +  (matrix[1*4+2] * v[1]) + (matrix[2*4+2] * v[2]) + matrix[3*4+2];
-    t[0] = tx;
-    t[1] = ty;
-    t[2] = tz;
-  }
-  else
-  {
-    t[0] = v[0];
-    t[1] = v[1];
-    t[2] = v[2];
-  }
-}
-
-void  fm_rotate(const REAL matrix[16],const REAL v[3],REAL t[3]) // rotate and translate this point
-{
-  if ( matrix )
-  {
-    REAL tx = (matrix[0*4+0] * v[0]) +  (matrix[1*4+0] * v[1]) + (matrix[2*4+0] * v[2]);
-    REAL ty = (matrix[0*4+1] * v[0]) +  (matrix[1*4+1] * v[1]) + (matrix[2*4+1] * v[2]);
-    REAL tz = (matrix[0*4+2] * v[0]) +  (matrix[1*4+2] * v[1]) + (matrix[2*4+2] * v[2]);
-    t[0] = tx;
-    t[1] = ty;
-    t[2] = tz;
-  }
-  else
-  {
-    t[0] = v[0];
-    t[1] = v[1];
-    t[2] = v[2];
-  }
-}
-
-
-REAL fm_distance(const REAL *p1,const REAL *p2)
-{
-	REAL dx = p1[0] - p2[0];
-	REAL dy = p1[1] - p2[1];
-	REAL dz = p1[2] - p2[2];
-
-	return sqrt( dx*dx + dy*dy + dz *dz );
-}
-
-REAL fm_distanceSquared(const REAL *p1,const REAL *p2)
-{
-	REAL dx = p1[0] - p2[0];
-	REAL dy = p1[1] - p2[1];
-	REAL dz = p1[2] - p2[2];
-
-	return dx*dx + dy*dy + dz *dz;
-}
-
-
-REAL fm_distanceSquaredXZ(const REAL *p1,const REAL *p2)
-{
-	REAL dx = p1[0] - p2[0];
-	REAL dz = p1[2] - p2[2];
-
-	return dx*dx +  dz *dz;
-}
-
-
-REAL fm_computePlane(const REAL *A,const REAL *B,const REAL *C,REAL *n) // returns D
-{
-	REAL vx = (B[0] - C[0]);
-	REAL vy = (B[1] - C[1]);
-	REAL vz = (B[2] - C[2]);
-
-	REAL wx = (A[0] - B[0]);
-	REAL wy = (A[1] - B[1]);
-	REAL wz = (A[2] - B[2]);
-
-	REAL vw_x = vy * wz - vz * wy;
-	REAL vw_y = vz * wx - vx * wz;
-	REAL vw_z = vx * wy - vy * wx;
-
-	REAL mag = sqrt((vw_x * vw_x) + (vw_y * vw_y) + (vw_z * vw_z));
-
-	if ( mag < 0.000001f )
-	{
-		mag = 0;
-	}
-	else
-	{
-		mag = 1.0f/mag;
-	}
-
-	REAL x = vw_x * mag;
-	REAL y = vw_y * mag;
-	REAL z = vw_z * mag;
-
-
-	REAL D = 0.0f - ((x*A[0])+(y*A[1])+(z*A[2]));
-
-  n[0] = x;
-  n[1] = y;
-  n[2] = z;
-
-	return D;
-}
-
-REAL fm_distToPlane(const REAL *plane,const REAL *p) // computes the distance of this point from the plane.
-{
-  return p[0]*plane[0]+p[1]*plane[1]+p[2]*plane[2]+plane[3];
-}
-
-REAL fm_dot(const REAL *p1,const REAL *p2)
-{
-  return p1[0]*p2[0]+p1[1]*p2[1]+p1[2]*p2[2];
-}
-
-void fm_cross(REAL *cross,const REAL *a,const REAL *b)
-{
-	cross[0] = a[1]*b[2] - a[2]*b[1];
-	cross[1] = a[2]*b[0] - a[0]*b[2];
-	cross[2] = a[0]*b[1] - a[1]*b[0];
-}
-
-void fm_computeNormalVector(REAL *n,const REAL *p1,const REAL *p2)
-{
-  n[0] = p2[0] - p1[0];
-  n[1] = p2[1] - p1[1];
-  n[2] = p2[2] - p1[2];
-  fm_normalize(n);
-}
-
-bool  fm_computeWindingOrder(const REAL *p1,const REAL *p2,const REAL *p3) // returns true if the triangle is clockwise.
-{
-  bool ret = false;
-
-  REAL v1[3];
-  REAL v2[3];
-
-  fm_computeNormalVector(v1,p1,p2); // p2-p1 (as vector) and then normalized
-  fm_computeNormalVector(v2,p1,p3); // p3-p1 (as vector) and then normalized
-
-  REAL cross[3];
-
-  fm_cross(cross, v1, v2 );
-  REAL ref[3] = { 1, 0, 0 };
-
-  REAL d = fm_dot( cross, ref );
-
-
-  if ( d <= 0 )
-    ret = false;
-  else
-    ret = true;
-
-  return ret;
-}
-
-REAL fm_normalize(REAL *n) // normalize this vector
-{
-  REAL dist = (REAL)sqrt(n[0]*n[0] + n[1]*n[1] + n[2]*n[2]);
-  if ( dist > 0.0000001f )
-  {
-    REAL mag = 1.0f / dist;
-    n[0]*=mag;
-    n[1]*=mag;
-    n[2]*=mag;
-  }
-  else
-  {
-    n[0] = 1;
-    n[1] = 0;
-    n[2] = 0;
-  }
-
-  return dist;
-}
-
-
-void  fm_matrixMultiply(const REAL *pA,const REAL *pB,REAL *pM)
-{
-#if 1
-
-  REAL a = pA[0*4+0] * pB[0*4+0] + pA[0*4+1] * pB[1*4+0] + pA[0*4+2] * pB[2*4+0] + pA[0*4+3] * pB[3*4+0];
-  REAL b = pA[0*4+0] * pB[0*4+1] + pA[0*4+1] * pB[1*4+1] + pA[0*4+2] * pB[2*4+1] + pA[0*4+3] * pB[3*4+1];
-  REAL c = pA[0*4+0] * pB[0*4+2] + pA[0*4+1] * pB[1*4+2] + pA[0*4+2] * pB[2*4+2] + pA[0*4+3] * pB[3*4+2];
-  REAL d = pA[0*4+0] * pB[0*4+3] + pA[0*4+1] * pB[1*4+3] + pA[0*4+2] * pB[2*4+3] + pA[0*4+3] * pB[3*4+3];
-
-  REAL e = pA[1*4+0] * pB[0*4+0] + pA[1*4+1] * pB[1*4+0] + pA[1*4+2] * pB[2*4+0] + pA[1*4+3] * pB[3*4+0];
-  REAL f = pA[1*4+0] * pB[0*4+1] + pA[1*4+1] * pB[1*4+1] + pA[1*4+2] * pB[2*4+1] + pA[1*4+3] * pB[3*4+1];
-  REAL g = pA[1*4+0] * pB[0*4+2] + pA[1*4+1] * pB[1*4+2] + pA[1*4+2] * pB[2*4+2] + pA[1*4+3] * pB[3*4+2];
-  REAL h = pA[1*4+0] * pB[0*4+3] + pA[1*4+1] * pB[1*4+3] + pA[1*4+2] * pB[2*4+3] + pA[1*4+3] * pB[3*4+3];
-
-  REAL i = pA[2*4+0] * pB[0*4+0] + pA[2*4+1] * pB[1*4+0] + pA[2*4+2] * pB[2*4+0] + pA[2*4+3] * pB[3*4+0];
-  REAL j = pA[2*4+0] * pB[0*4+1] + pA[2*4+1] * pB[1*4+1] + pA[2*4+2] * pB[2*4+1] + pA[2*4+3] * pB[3*4+1];
-  REAL k = pA[2*4+0] * pB[0*4+2] + pA[2*4+1] * pB[1*4+2] + pA[2*4+2] * pB[2*4+2] + pA[2*4+3] * pB[3*4+2];
-  REAL l = pA[2*4+0] * pB[0*4+3] + pA[2*4+1] * pB[1*4+3] + pA[2*4+2] * pB[2*4+3] + pA[2*4+3] * pB[3*4+3];
-
-  REAL m = pA[3*4+0] * pB[0*4+0] + pA[3*4+1] * pB[1*4+0] + pA[3*4+2] * pB[2*4+0] + pA[3*4+3] * pB[3*4+0];
-  REAL n = pA[3*4+0] * pB[0*4+1] + pA[3*4+1] * pB[1*4+1] + pA[3*4+2] * pB[2*4+1] + pA[3*4+3] * pB[3*4+1];
-  REAL o = pA[3*4+0] * pB[0*4+2] + pA[3*4+1] * pB[1*4+2] + pA[3*4+2] * pB[2*4+2] + pA[3*4+3] * pB[3*4+2];
-  REAL p = pA[3*4+0] * pB[0*4+3] + pA[3*4+1] * pB[1*4+3] + pA[3*4+2] * pB[2*4+3] + pA[3*4+3] * pB[3*4+3];
-
-  pM[0] = a;
-  pM[1] = b;
-  pM[2] = c;
-  pM[3] = d;
-
-  pM[4] = e;
-  pM[5] = f;
-  pM[6] = g;
-  pM[7] = h;
-
-  pM[8] = i;
-  pM[9] = j;
-  pM[10] = k;
-  pM[11] = l;
-
-  pM[12] = m;
-  pM[13] = n;
-  pM[14] = o;
-  pM[15] = p;
-
-
-#else
-	memset(pM, 0, sizeof(REAL)*16);
-	for(NxI32 i=0; i<4; i++ )
-		for(NxI32 j=0; j<4; j++ )
-			for(NxI32 k=0; k<4; k++ )
-				pM[4*i+j] +=  pA[4*i+k] * pB[4*k+j];
-#endif
-}
-
-
-void  fm_eulerToQuatDX(REAL x,REAL y,REAL z,REAL *quat) // convert euler angles to quaternion using the fucked up DirectX method
-{
-  REAL matrix[16];
-  fm_eulerToMatrix(x,y,z,matrix);
-  fm_matrixToQuat(matrix,quat);
-}
-
-// implementation copied from: http://blogs.msdn.com/mikepelton/archive/2004/10/29/249501.aspx
-void  fm_eulerToMatrixDX(REAL x,REAL y,REAL z,REAL *matrix) // convert euler angles to quaternion using the fucked up DirectX method.
-{
-  fm_identity(matrix);
-  matrix[0*4+0] = cos(z)*cos(y) + sin(z)*sin(x)*sin(y);
-  matrix[0*4+1] = sin(z)*cos(x);
-  matrix[0*4+2] = cos(z)*-sin(y) + sin(z)*sin(x)*cos(y);
-
-  matrix[1*4+0] = -sin(z)*cos(y)+cos(z)*sin(x)*sin(y);
-  matrix[1*4+1] = cos(z)*cos(x);
-  matrix[1*4+2] = sin(z)*sin(y) +cos(z)*sin(x)*cos(y);
-
-  matrix[2*4+0] = cos(x)*sin(y);
-  matrix[2*4+1] = -sin(x);
-  matrix[2*4+2] = cos(x)*cos(y);
-}
-
-
-void  fm_scale(REAL x,REAL y,REAL z,REAL *fscale) // apply scale to the matrix.
-{
-  fscale[0*4+0] = x;
-  fscale[1*4+1] = y;
-  fscale[2*4+2] = z;
-}
-
-
-void  fm_composeTransform(const REAL *position,const REAL *quat,const REAL *scale,REAL *matrix)
-{
-  fm_identity(matrix);
-  fm_quatToMatrix(quat,matrix);
-
-  if ( scale && ( scale[0] != 1 || scale[1] != 1 || scale[2] != 1 ) )
-  {
-    REAL work[16];
-    memcpy(work,matrix,sizeof(REAL)*16);
-    REAL mscale[16];
-    fm_identity(mscale);
-    fm_scale(scale[0],scale[1],scale[2],mscale);
-    fm_matrixMultiply(work,mscale,matrix);
-  }
-
-  matrix[12] = position[0];
-  matrix[13] = position[1];
-  matrix[14] = position[2];
-}
-
-
-void  fm_setTranslation(const REAL *translation,REAL *matrix)
-{
-  matrix[12] = translation[0];
-  matrix[13] = translation[1];
-  matrix[14] = translation[2];
-}
-
-static REAL enorm0_3d ( REAL x0, REAL y0, REAL z0, REAL x1, REAL y1, REAL z1 )
-
-/**********************************************************************/
-
-/*
-Purpose:
-
-ENORM0_3D computes the Euclidean norm of (P1-P0) in 3D.
-
-Modified:
-
-18 April 1999
-
-Author:
-
-John Burkardt
-
-Parameters:
-
-Input, REAL X0, Y0, Z0, X1, Y1, Z1, the coordinates of the points 
-P0 and P1.
-
-Output, REAL ENORM0_3D, the Euclidean norm of (P1-P0).
-*/
-{
-  REAL value;
-
-  value = sqrt (
-    ( x1 - x0 ) * ( x1 - x0 ) + 
-    ( y1 - y0 ) * ( y1 - y0 ) + 
-    ( z1 - z0 ) * ( z1 - z0 ) );
-
-  return value;
-}
-
-
-static REAL triangle_area_3d ( REAL x1, REAL y1, REAL z1, REAL x2,REAL y2, REAL z2, REAL x3, REAL y3, REAL z3 )
-
-                        /**********************************************************************/
-
-                        /*
-                        Purpose:
-
-                        TRIANGLE_AREA_3D computes the area of a triangle in 3D.
-
-                        Modified:
-
-                        22 April 1999
-
-                        Author:
-
-                        John Burkardt
-
-                        Parameters:
-
-                        Input, REAL X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3, the (X,Y,Z)
-                        coordinates of the corners of the triangle.
-
-                        Output, REAL TRIANGLE_AREA_3D, the area of the triangle.
-                        */
-{
-  REAL a;
-  REAL alpha;
-  REAL area;
-  REAL b;
-  REAL base;
-  REAL c;
-  REAL dot;
-  REAL height;
-  /*
-  Find the projection of (P3-P1) onto (P2-P1).
-  */
-  dot = 
-    ( x2 - x1 ) * ( x3 - x1 ) +
-    ( y2 - y1 ) * ( y3 - y1 ) +
-    ( z2 - z1 ) * ( z3 - z1 );
-
-  base = enorm0_3d ( x1, y1, z1, x2, y2, z2 );
-  /*
-  The height of the triangle is the length of (P3-P1) after its
-  projection onto (P2-P1) has been subtracted.
-  */
-  if ( base == 0.0 ) {
-
-    height = 0.0;
-
-  }
-  else {
-
-    alpha = dot / ( base * base );
-
-    a = x3 - x1 - alpha * ( x2 - x1 );
-    b = y3 - y1 - alpha * ( y2 - y1 );
-    c = z3 - z1 - alpha * ( z2 - z1 );
-
-    height = sqrt ( a * a + b * b + c * c );
-
-  }
-
-  area = 0.5f * base * height;
-
-  return area;
-}
-
-
-REAL fm_computeArea(const REAL *p1,const REAL *p2,const REAL *p3)
-{
-  REAL ret = 0;
-
-  ret = triangle_area_3d(p1[0],p1[1],p1[2],p2[0],p2[1],p2[2],p3[0],p3[1],p3[2]);
-
-  return ret;
-}
-
-
-void  fm_lerp(const REAL *p1,const REAL *p2,REAL *dest,REAL lerpValue)
-{
-  dest[0] = ((p2[0] - p1[0])*lerpValue) + p1[0];
-  dest[1] = ((p2[1] - p1[1])*lerpValue) + p1[1];
-  dest[2] = ((p2[2] - p1[2])*lerpValue) + p1[2];
-}
-
-bool fm_pointTestXZ(const REAL *p,const REAL *i,const REAL *j)
-{
-  bool ret = false;
-
-  if (((( i[2] <= p[2] ) && ( p[2]  < j[2] )) || (( j[2] <= p[2] ) && ( p[2]  < i[2] ))) && ( p[0] < (j[0] - i[0]) * (p[2] - i[2]) / (j[2] - i[2]) + i[0]))
-    ret = true;
-
-  return ret;
-};
-
-
-bool  fm_insideTriangleXZ(const REAL *p,const REAL *p1,const REAL *p2,const REAL *p3)
-{
-  bool ret = false;
-
-  NxI32 c = 0;
-  if ( fm_pointTestXZ(p,p1,p2) ) c = !c;
-  if ( fm_pointTestXZ(p,p2,p3) ) c = !c;
-  if ( fm_pointTestXZ(p,p3,p1) ) c = !c;
-  if ( c ) ret = true;
-
-  return ret;
-}
-
-bool  fm_insideAABB(const REAL *pos,const REAL *bmin,const REAL *bmax)
-{
-  bool ret = false;
-
-  if ( pos[0] >= bmin[0] && pos[0] <= bmax[0] &&
-       pos[1] >= bmin[1] && pos[1] <= bmax[1] &&
-       pos[2] >= bmin[2] && pos[2] <= bmax[2] )
-    ret = true;
-
-  return ret;
-}
-
-
-NxU32 fm_clipTestPoint(const REAL *bmin,const REAL *bmax,const REAL *pos)
-{
-  NxU32 ret = 0;
-
-  if ( pos[0] < bmin[0] )
-    ret|=FMCS_XMIN;
-  else if ( pos[0] > bmax[0] )
-    ret|=FMCS_XMAX;
-
-  if ( pos[1] < bmin[1] )
-    ret|=FMCS_YMIN;
-  else if ( pos[1] > bmax[1] )
-    ret|=FMCS_YMAX;
-
-  if ( pos[2] < bmin[2] )
-    ret|=FMCS_ZMIN;
-  else if ( pos[2] > bmax[2] )
-    ret|=FMCS_ZMAX;
-
-  return ret;
-}
-
-NxU32 fm_clipTestPointXZ(const REAL *bmin,const REAL *bmax,const REAL *pos) // only tests X and Z, not Y
-{
-  NxU32 ret = 0;
-
-  if ( pos[0] < bmin[0] )
-    ret|=FMCS_XMIN;
-  else if ( pos[0] > bmax[0] )
-    ret|=FMCS_XMAX;
-
-  if ( pos[2] < bmin[2] )
-    ret|=FMCS_ZMIN;
-  else if ( pos[2] > bmax[2] )
-    ret|=FMCS_ZMAX;
-
-  return ret;
-}
-
-NxU32 fm_clipTestAABB(const REAL *bmin,const REAL *bmax,const REAL *p1,const REAL *p2,const REAL *p3,NxU32 &andCode)
-{
-  NxU32 orCode = 0;
-
-  andCode = FMCS_XMIN | FMCS_XMAX | FMCS_YMIN | FMCS_YMAX | FMCS_ZMIN | FMCS_ZMAX;
-
-  NxU32 c = fm_clipTestPoint(bmin,bmax,p1);
-  orCode|=c;
-  andCode&=c;
-
-  c = fm_clipTestPoint(bmin,bmax,p2);
-  orCode|=c;
-  andCode&=c;
-
-  c = fm_clipTestPoint(bmin,bmax,p3);
-  orCode|=c;
-  andCode&=c;
-
-  return orCode;
-}
-
-bool intersect(const REAL *si,const REAL *ei,const REAL *bmin,const REAL *bmax,REAL *time)
-{
-  REAL st,et,fst = 0,fet = 1;
-
-  for (NxI32 i = 0; i < 3; i++)
-  {
-    if (*si < *ei)
-    {
-      if (*si > *bmax || *ei < *bmin)
-        return false;
-      REAL di = *ei - *si;
-      st = (*si < *bmin)? (*bmin - *si) / di: 0;
-      et = (*ei > *bmax)? (*bmax - *si) / di: 1;
-    }
-    else
-    {
-      if (*ei > *bmax || *si < *bmin)
-        return false;
-      REAL di = *ei - *si;
-      st = (*si > *bmax)? (*bmax - *si) / di: 0;
-      et = (*ei < *bmin)? (*bmin - *si) / di: 1;
-    }
-
-    if (st > fst) fst = st;
-    if (et < fet) fet = et;
-    if (fet < fst)
-      return false;
-    bmin++; bmax++;
-    si++; ei++;
-  }
-
-  *time = fst;
-  return true;
-}
-
-
-
-bool fm_lineTestAABB(const REAL *p1,const REAL *p2,const REAL *bmin,const REAL *bmax,REAL &time)
-{
-  bool sect = intersect(p1,p2,bmin,bmax,&time);
-  return sect;
-}
-
-
-bool fm_lineTestAABBXZ(const REAL *p1,const REAL *p2,const REAL *bmin,const REAL *bmax,REAL &time)
-{
-  REAL _bmin[3];
-  REAL _bmax[3];
-
-  _bmin[0] = bmin[0];
-  _bmin[1] = -1e9;
-  _bmin[2] = bmin[2];
-
-  _bmax[0] = bmax[0];
-  _bmax[1] = 1e9;
-  _bmax[2] = bmax[2];
-
-  bool sect = intersect(p1,p2,_bmin,_bmax,&time);
-
-  return sect;
-}
-
-void  fm_minmax(const REAL *p,REAL *bmin,REAL *bmax) // accmulate to a min-max value
-{
-
-  if ( p[0] < bmin[0] ) bmin[0] = p[0];
-  if ( p[1] < bmin[1] ) bmin[1] = p[1];
-  if ( p[2] < bmin[2] ) bmin[2] = p[2];
-
-  if ( p[0] > bmax[0] ) bmax[0] = p[0];
-  if ( p[1] > bmax[1] ) bmax[1] = p[1];
-  if ( p[2] > bmax[2] ) bmax[2] = p[2];
-
-}
-
-REAL fm_solveX(const REAL *plane,REAL y,REAL z) // solve for X given this plane equation and the other two components.
-{
-  REAL x = (y*plane[1]+z*plane[2]+plane[3]) / -plane[0];
-  return x;
-}
-
-REAL fm_solveY(const REAL *plane,REAL x,REAL z) // solve for Y given this plane equation and the other two components.
-{
-  REAL y = (x*plane[0]+z*plane[2]+plane[3]) / -plane[1];
-  return y;
-}
-
-
-REAL fm_solveZ(const REAL *plane,REAL x,REAL y) // solve for Y given this plane equation and the other two components.
-{
-  REAL z = (x*plane[0]+y*plane[1]+plane[3]) / -plane[2];
-  return z;
-}
-
-
-void  fm_getAABBCenter(const REAL *bmin,const REAL *bmax,REAL *center)
-{
-  center[0] = (bmax[0]-bmin[0])*0.5f+bmin[0];
-  center[1] = (bmax[1]-bmin[1])*0.5f+bmin[1];
-  center[2] = (bmax[2]-bmin[2])*0.5f+bmin[2];
-}
-
-FM_Axis fm_getDominantAxis(const REAL normal[3])
-{
-  FM_Axis ret = FM_XAXIS;
-
-  REAL x = fabs(normal[0]);
-  REAL y = fabs(normal[1]);
-  REAL z = fabs(normal[2]);
-
-  if ( y > x && y > z )
-    ret = FM_YAXIS;
-  else if ( z > x && z > y )
-    ret = FM_ZAXIS;
-
-  return ret;
-}
-
-
-bool fm_lineSphereIntersect(const REAL *center,REAL radius,const REAL *p1,const REAL *p2,REAL *intersect)
-{
-  bool ret = false;
-
-  REAL dir[3];
-
-  dir[0] = p2[0]-p1[0];
-  dir[1] = p2[1]-p1[1];
-  dir[2] = p2[2]-p1[2];
-
-  REAL distance = sqrt( dir[0]*dir[0]+dir[1]*dir[1]+dir[2]*dir[2]);
-
-  if ( distance > 0 )
-  {
-    REAL recip = 1.0f / distance;
-    dir[0]*=recip;
-    dir[1]*=recip;
-    dir[2]*=recip;
-    ret = fm_raySphereIntersect(center,radius,p1,dir,distance,intersect);
-  }
-  else
-  {
-    dir[0] = center[0]-p1[0];
-    dir[1] = center[1]-p1[1];
-    dir[2] = center[2]-p1[2];
-    REAL d2 = dir[0]*dir[0]+dir[1]*dir[1]+dir[2]*dir[2];
-    REAL r2 = radius*radius;
-    if ( d2 < r2 )
-    {
-      ret = true;
-      if ( intersect )
-      {
-        intersect[0] = p1[0];
-        intersect[1] = p1[1];
-        intersect[2] = p1[2];
-      }
-    }
-  }
-  return ret;
-}
-
-#define DOT(p1,p2) (p1[0]*p2[0]+p1[1]*p2[1]+p1[2]*p2[2])
-
-bool fm_raySphereIntersect(const REAL *center,REAL radius,const REAL *pos,const REAL *dir,REAL distance,REAL *intersect)
-{
-  bool ret = false;
-
-  REAL E0[3];
-
-  E0[0] = center[0] - pos[0];
-  E0[1] = center[1] - pos[1];
-  E0[2] = center[2] - pos[2];
-
-  REAL V[3];
-
-  V[0]  = dir[0];
-  V[1]  = dir[1];
-  V[2]  = dir[2];
-
-
-  REAL dist2   = E0[0]*E0[0] + E0[1]*E0[1] + E0[2] * E0[2];
-  REAL radius2 = radius*radius; // radius squared..
-
-  // Bug Fix For Gem, if origin is *inside* the sphere, invert the
-  // direction vector so that we get a valid intersection location.
-  if ( dist2 < radius2 )
-  {
-    V[0]*=-1;
-    V[1]*=-1;
-    V[2]*=-1;
-  }
-
-
-	REAL v = DOT(E0,V);
-
-	REAL disc = radius2 - (dist2 - v*v);
-
-	if (disc > 0.0f)
-	{
-		if ( intersect )
-		{
-		  REAL d = sqrt(disc);
-      REAL diff = v-d;
-      if ( diff < distance )
-      {
-        intersect[0] = pos[0]+V[0]*diff;
-        intersect[1] = pos[1]+V[1]*diff;
-        intersect[2] = pos[2]+V[2]*diff;
-        ret = true;
-      }
-    }
-	}
-
-	return ret;
-}
-
-
-void fm_catmullRom(REAL *out_vector,const REAL *p1,const REAL *p2,const REAL *p3,const REAL *p4, const REAL s)
-{
-  REAL s_squared = s * s;
-  REAL s_cubed = s_squared * s;
-
-  REAL coefficient_p1 = -s_cubed + 2*s_squared - s;
-  REAL coefficient_p2 = 3 * s_cubed - 5 * s_squared + 2;
-  REAL coefficient_p3 = -3 * s_cubed +4 * s_squared + s;
-  REAL coefficient_p4 = s_cubed - s_squared;
-
-  out_vector[0] = (coefficient_p1 * p1[0] + coefficient_p2 * p2[0] + coefficient_p3 * p3[0] + coefficient_p4 * p4[0])*0.5f;
-  out_vector[1] = (coefficient_p1 * p1[1] + coefficient_p2 * p2[1] + coefficient_p3 * p3[1] + coefficient_p4 * p4[1])*0.5f;
-  out_vector[2] = (coefficient_p1 * p1[2] + coefficient_p2 * p2[2] + coefficient_p3 * p3[2] + coefficient_p4 * p4[2])*0.5f;
-}
-
-bool fm_intersectAABB(const REAL *bmin1,const REAL *bmax1,const REAL *bmin2,const REAL *bmax2)
-{
-  if ((bmin1[0] > bmax2[0]) || (bmin2[0] > bmax1[0])) return false;
-  if ((bmin1[1] > bmax2[1]) || (bmin2[1] > bmax1[1])) return false;
-  if ((bmin1[2] > bmax2[2]) || (bmin2[2] > bmax1[2])) return false;
-  return true;
-
-}
-
-bool  fm_insideAABB(const REAL *obmin,const REAL *obmax,const REAL *tbmin,const REAL *tbmax) // test if bounding box tbmin/tmbax is fully inside obmin/obmax
-{
-  bool ret = false;
-
-  if ( tbmax[0] <= obmax[0] &&
-       tbmax[1] <= obmax[1] &&
-       tbmax[2] <= obmax[2] &&
-       tbmin[0] >= obmin[0] &&
-       tbmin[1] >= obmin[1] &&
-       tbmin[2] >= obmin[2] ) ret = true;
-
-  return ret;
-}
-
-
-// Reference, from Stan Melax in Game Gems I
-//  Quaternion q;
-//  vector3 c = CrossProduct(v0,v1);
-//  REAL   d = DotProduct(v0,v1);
-//  REAL   s = (REAL)sqrt((1+d)*2);
-//  q.x = c.x / s;
-//  q.y = c.y / s;
-//  q.z = c.z / s;
-//  q.w = s /2.0f;
-//  return q;
-void fm_rotationArc(const REAL *v0,const REAL *v1,REAL *quat)
-{
-  REAL cross[3];
-
-  fm_cross(cross,v0,v1);
-  REAL d = fm_dot(v0,v1);
-  REAL s = sqrt((1+d)*2);
-  REAL recip = 1.0f / s;
-
-  quat[0] = cross[0] * recip;
-  quat[1] = cross[1] * recip;
-  quat[2] = cross[2] * recip;
-  quat[3] = s * 0.5f;
-
-}
-
-
-REAL fm_distancePointLineSegment(const REAL *Point,const REAL *LineStart,const REAL *LineEnd,REAL *intersection,LineSegmentType &type,REAL epsilon)
-{
-  REAL ret;
-
-  REAL LineMag = fm_distance( LineEnd, LineStart );
-
-  if ( LineMag > 0 )
-  {
-    REAL U = ( ( ( Point[0] - LineStart[0] ) * ( LineEnd[0] - LineStart[0] ) ) + ( ( Point[1] - LineStart[1] ) * ( LineEnd[1] - LineStart[1] ) ) + ( ( Point[2] - LineStart[2] ) * ( LineEnd[2] - LineStart[2] ) ) ) / ( LineMag * LineMag );
-    if( U < 0.0f || U > 1.0f )
-    {
-      REAL d1 = fm_distanceSquared(Point,LineStart);
-      REAL d2 = fm_distanceSquared(Point,LineEnd);
-      if ( d1 <= d2 )
-      {
-        ret = sqrt(d1);
-        intersection[0] = LineStart[0];
-        intersection[1] = LineStart[1];
-        intersection[2] = LineStart[2];
-        type = LS_START;
-      }
-      else
-      {
-        ret = sqrt(d2);
-        intersection[0] = LineEnd[0];
-        intersection[1] = LineEnd[1];
-        intersection[2] = LineEnd[2];
-        type = LS_END;
-      }
-    }
-    else
-    {
-      intersection[0] = LineStart[0] + U * ( LineEnd[0] - LineStart[0] );
-      intersection[1] = LineStart[1] + U * ( LineEnd[1] - LineStart[1] );
-      intersection[2] = LineStart[2] + U * ( LineEnd[2] - LineStart[2] );
-
-      ret = fm_distance(Point,intersection);
-
-      REAL d1 = fm_distanceSquared(intersection,LineStart);
-      REAL d2 = fm_distanceSquared(intersection,LineEnd);
-	  REAL mag = (epsilon*2)*(epsilon*2);
-
-      if ( d1 < mag ) // if less than 1/100th the total distance, treat is as the 'start'
-      {
-        type = LS_START;
-      }
-      else if ( d2 < mag )
-      {
-        type = LS_END;
-      }
-      else
-      {
-        type = LS_MIDDLE;
-      }
-
-    }
-  }
-  else
-  {
-    ret = LineMag;
-    intersection[0] = LineEnd[0];
-    intersection[1] = LineEnd[1];
-    intersection[2] = LineEnd[2];
-    type = LS_END;
-  }
-
-  return ret;
-}
-
-
-#ifndef BEST_FIT_PLANE_H
-
-#define BEST_FIT_PLANE_H
-
-template <class Type> class Eigen
-{
-public:
-
-
-  void DecrSortEigenStuff(void)
-  {
-    Tridiagonal(); //diagonalize the matrix.
-    QLAlgorithm(); //
-    DecreasingSort();
-    GuaranteeRotation();
-  }
-
-  void Tridiagonal(void)
-  {
-    Type fM00 = mElement[0][0];
-    Type fM01 = mElement[0][1];
-    Type fM02 = mElement[0][2];
-    Type fM11 = mElement[1][1];
-    Type fM12 = mElement[1][2];
-    Type fM22 = mElement[2][2];
-
-    m_afDiag[0] = fM00;
-    m_afSubd[2] = 0;
-    if (fM02 != (Type)0.0)
-    {
-      Type fLength = sqrt(fM01*fM01+fM02*fM02);
-      Type fInvLength = ((Type)1.0)/fLength;
-      fM01 *= fInvLength;
-      fM02 *= fInvLength;
-      Type fQ = ((Type)2.0)*fM01*fM12+fM02*(fM22-fM11);
-      m_afDiag[1] = fM11+fM02*fQ;
-      m_afDiag[2] = fM22-fM02*fQ;
-      m_afSubd[0] = fLength;
-      m_afSubd[1] = fM12-fM01*fQ;
-      mElement[0][0] = (Type)1.0;
-      mElement[0][1] = (Type)0.0;
-      mElement[0][2] = (Type)0.0;
-      mElement[1][0] = (Type)0.0;
-      mElement[1][1] = fM01;
-      mElement[1][2] = fM02;
-      mElement[2][0] = (Type)0.0;
-      mElement[2][1] = fM02;
-      mElement[2][2] = -fM01;
-      m_bIsRotation = false;
-    }
-    else
-    {
-      m_afDiag[1] = fM11;
-      m_afDiag[2] = fM22;
-      m_afSubd[0] = fM01;
-      m_afSubd[1] = fM12;
-      mElement[0][0] = (Type)1.0;
-      mElement[0][1] = (Type)0.0;
-      mElement[0][2] = (Type)0.0;
-      mElement[1][0] = (Type)0.0;
-      mElement[1][1] = (Type)1.0;
-      mElement[1][2] = (Type)0.0;
-      mElement[2][0] = (Type)0.0;
-      mElement[2][1] = (Type)0.0;
-      mElement[2][2] = (Type)1.0;
-      m_bIsRotation = true;
-    }
-  }
-
-  bool QLAlgorithm(void)
-  {
-    const NxI32 iMaxIter = 32;
-
-    for (NxI32 i0 = 0; i0 <3; i0++)
-    {
-      NxI32 i1;
-      for (i1 = 0; i1 < iMaxIter; i1++)
-      {
-        NxI32 i2;
-        for (i2 = i0; i2 <= (3-2); i2++)
-        {
-          Type fTmp = fabs(m_afDiag[i2]) + fabs(m_afDiag[i2+1]);
-          if ( fabs(m_afSubd[i2]) + fTmp == fTmp )
-            break;
-        }
-        if (i2 == i0)
-        {
-          break;
-        }
-
-        Type fG = (m_afDiag[i0+1] - m_afDiag[i0])/(((Type)2.0) * m_afSubd[i0]);
-        Type fR = sqrt(fG*fG+(Type)1.0);
-        if (fG < (Type)0.0)
-        {
-          fG = m_afDiag[i2]-m_afDiag[i0]+m_afSubd[i0]/(fG-fR);
-        }
-        else
-        {
-          fG = m_afDiag[i2]-m_afDiag[i0]+m_afSubd[i0]/(fG+fR);
-        }
-        Type fSin = (Type)1.0, fCos = (Type)1.0, fP = (Type)0.0;
-        for (NxI32 i3 = i2-1; i3 >= i0; i3--)
-        {
-          Type fF = fSin*m_afSubd[i3];
-          Type fB = fCos*m_afSubd[i3];
-          if (fabs(fF) >= fabs(fG))
-          {
-            fCos = fG/fF;
-            fR = sqrt(fCos*fCos+(Type)1.0);
-            m_afSubd[i3+1] = fF*fR;
-            fSin = ((Type)1.0)/fR;
-            fCos *= fSin;
-          }
-          else
-          {
-            fSin = fF/fG;
-            fR = sqrt(fSin*fSin+(Type)1.0);
-            m_afSubd[i3+1] = fG*fR;
-            fCos = ((Type)1.0)/fR;
-            fSin *= fCos;
-          }
-          fG = m_afDiag[i3+1]-fP;
-          fR = (m_afDiag[i3]-fG)*fSin+((Type)2.0)*fB*fCos;
-          fP = fSin*fR;
-          m_afDiag[i3+1] = fG+fP;
-          fG = fCos*fR-fB;
-          for (NxI32 i4 = 0; i4 < 3; i4++)
-          {
-            fF = mElement[i4][i3+1];
-            mElement[i4][i3+1] = fSin*mElement[i4][i3]+fCos*fF;
-            mElement[i4][i3] = fCos*mElement[i4][i3]-fSin*fF;
-          }
-        }
-        m_afDiag[i0] -= fP;
-        m_afSubd[i0] = fG;
-        m_afSubd[i2] = (Type)0.0;
-      }
-      if (i1 == iMaxIter)
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void DecreasingSort(void)
-  {
-    //sort eigenvalues in decreasing order, e[0] >= ... >= e[iSize-1]
-    for (NxI32 i0 = 0, i1; i0 <= 3-2; i0++)
-    {
-      // locate maximum eigenvalue
-      i1 = i0;
-      Type fMax = m_afDiag[i1];
-      NxI32 i2;
-      for (i2 = i0+1; i2 < 3; i2++)
-      {
-        if (m_afDiag[i2] > fMax)
-        {
-          i1 = i2;
-          fMax = m_afDiag[i1];
-        }
-      }
-
-      if (i1 != i0)
-      {
-        // swap eigenvalues
-        m_afDiag[i1] = m_afDiag[i0];
-        m_afDiag[i0] = fMax;
-        // swap eigenvectors
-        for (i2 = 0; i2 < 3; i2++)
-        {
-          Type fTmp = mElement[i2][i0];
-          mElement[i2][i0] = mElement[i2][i1];
-          mElement[i2][i1] = fTmp;
-          m_bIsRotation = !m_bIsRotation;
-        }
-      }
-    }
-  }
-
-
-  void GuaranteeRotation(void)
-  {
-    if (!m_bIsRotation)
-    {
-      // change sign on the first column
-      for (NxI32 iRow = 0; iRow <3; iRow++)
-      {
-        mElement[iRow][0] = -mElement[iRow][0];
-      }
-    }
-  }
-
-  Type mElement[3][3];
-  Type m_afDiag[3];
-  Type m_afSubd[3];
-  bool m_bIsRotation;
-};
-
-#endif
-
-bool fm_computeBestFitPlane(NxU32 vcount,
-                     const REAL *points,
-                     NxU32 vstride,
-                     const REAL *weights,
-                     NxU32 wstride,
-                     REAL *plane)
-{
-  bool ret = false;
-
-  REAL kOrigin[3] = { 0, 0, 0 };
-
-  REAL wtotal = 0;
-
-  {
-    const char *source  = (const char *) points;
-    const char *wsource = (const char *) weights;
-
-    for (NxU32 i=0; i<vcount; i++)
-    {
-
-      const REAL *p = (const REAL *) source;
-
-      REAL w = 1;
-
-      if ( wsource )
-      {
-        const REAL *ws = (const REAL *) wsource;
-        w = *ws; //
-        wsource+=wstride;
-      }
-
-      kOrigin[0]+=p[0]*w;
-      kOrigin[1]+=p[1]*w;
-      kOrigin[2]+=p[2]*w;
-
-      wtotal+=w;
-
-      source+=vstride;
-    }
-  }
-
-  REAL recip = 1.0f / wtotal; // reciprocol of total weighting
-
-  kOrigin[0]*=recip;
-  kOrigin[1]*=recip;
-  kOrigin[2]*=recip;
-
-
-  REAL fSumXX=0;
-  REAL fSumXY=0;
-  REAL fSumXZ=0;
-
-  REAL fSumYY=0;
-  REAL fSumYZ=0;
-  REAL fSumZZ=0;
-
-
-  {
-    const char *source  = (const char *) points;
-    const char *wsource = (const char *) weights;
-
-    for (NxU32 i=0; i<vcount; i++)
-    {
-
-      const REAL *p = (const REAL *) source;
-
-      REAL w = 1;
-
-      if ( wsource )
-      {
-        const REAL *ws = (const REAL *) wsource;
-        w = *ws; //
-        wsource+=wstride;
-      }
-
-      REAL kDiff[3];
-
-      kDiff[0] = w*(p[0] - kOrigin[0]); // apply vertex weighting!
-      kDiff[1] = w*(p[1] - kOrigin[1]);
-      kDiff[2] = w*(p[2] - kOrigin[2]);
-
-      fSumXX+= kDiff[0] * kDiff[0]; // sume of the squares of the differences.
-      fSumXY+= kDiff[0] * kDiff[1]; // sume of the squares of the differences.
-      fSumXZ+= kDiff[0] * kDiff[2]; // sume of the squares of the differences.
-
-      fSumYY+= kDiff[1] * kDiff[1];
-      fSumYZ+= kDiff[1] * kDiff[2];
-      fSumZZ+= kDiff[2] * kDiff[2];
-
-
-      source+=vstride;
-    }
-  }
-
-  fSumXX *= recip;
-  fSumXY *= recip;
-  fSumXZ *= recip;
-  fSumYY *= recip;
-  fSumYZ *= recip;
-  fSumZZ *= recip;
-
-  // setup the eigensolver
-  Eigen<REAL> kES;
-
-  kES.mElement[0][0] = fSumXX;
-  kES.mElement[0][1] = fSumXY;
-  kES.mElement[0][2] = fSumXZ;
-
-  kES.mElement[1][0] = fSumXY;
-  kES.mElement[1][1] = fSumYY;
-  kES.mElement[1][2] = fSumYZ;
-
-  kES.mElement[2][0] = fSumXZ;
-  kES.mElement[2][1] = fSumYZ;
-  kES.mElement[2][2] = fSumZZ;
-
-  // compute eigenstuff, smallest eigenvalue is in last position
-  kES.DecrSortEigenStuff();
-
-  REAL kNormal[3];
-
-  kNormal[0] = kES.mElement[0][2];
-  kNormal[1] = kES.mElement[1][2];
-  kNormal[2] = kES.mElement[2][2];
-
-  // the minimum energy
-  plane[0] = kNormal[0];
-  plane[1] = kNormal[1];
-  plane[2] = kNormal[2];
-
-  plane[3] = 0 - fm_dot(kNormal,kOrigin);
-
-  ret = true;
-
-  return ret;
-}
-
-
-bool fm_colinear(const REAL a1[3],const REAL a2[3],const REAL b1[3],const REAL b2[3],REAL epsilon)  // true if these two line segments are co-linear.
-{
-  bool ret = false;
-
-  REAL dir1[3];
-  REAL dir2[3];
-
-  dir1[0] = (a2[0] - a1[0]);
-  dir1[1] = (a2[1] - a1[1]);
-  dir1[2] = (a2[2] - a1[2]);
-
-  dir2[0] = (b2[0]-a1[0]) - (b1[0]-a1[0]);
-  dir2[1] = (b2[1]-a1[1]) - (b1[1]-a1[1]);
-  dir2[2] = (b2[2]-a2[2]) - (b1[2]-a2[2]);
-
-  fm_normalize(dir1);
-  fm_normalize(dir2);
-
-  REAL dot = fm_dot(dir1,dir2);
-
-  if ( dot >= epsilon )
-  {
-    ret = true;
-  }
-
-
-  return ret;
-}
-
-bool fm_colinear(const REAL *p1,const REAL *p2,const REAL *p3,REAL epsilon)
-{
-  bool ret = false;
-
-  REAL dir1[3];
-  REAL dir2[3];
-
-  dir1[0] = p2[0] - p1[0];
-  dir1[1] = p2[1] - p1[1];
-  dir1[2] = p2[2] - p1[2];
-
-  dir2[0] = p3[0] - p2[0];
-  dir2[1] = p3[1] - p2[1];
-  dir2[2] = p3[2] - p2[2];
-
-  fm_normalize(dir1);
-  fm_normalize(dir2);
-
-  REAL dot = fm_dot(dir1,dir2);
-
-  if ( dot >= epsilon )
-  {
-    ret = true;
-  }
-
-
-  return ret;
-}
-
-void  fm_initMinMax(const REAL *p,REAL *bmin,REAL *bmax)
-{
-  bmax[0] = bmin[0] = p[0];
-  bmax[1] = bmin[1] = p[1];
-  bmax[2] = bmin[2] = p[2];
-}
-
-IntersectResult fm_intersectLineSegments2d(const REAL *a1,const REAL *a2,const REAL *b1,const REAL *b2,REAL *intersection)
-{
-  IntersectResult ret;
-
-  REAL denom  = ((b2[1] - b1[1])*(a2[0] - a1[0])) - ((b2[0] - b1[0])*(a2[1] - a1[1]));
-  REAL nume_a = ((b2[0] - b1[0])*(a1[1] - b1[1])) - ((b2[1] - b1[1])*(a1[0] - b1[0]));
-  REAL nume_b = ((a2[0] - a1[0])*(a1[1] - b1[1])) - ((a2[1] - a1[1])*(a1[0] - b1[0]));
-  if (denom == 0 )
-  {
-    if(nume_a == 0 && nume_b == 0)
-    {
-      ret = IR_COINCIDENT;
-    }
-    else
-    {
-      ret = IR_PARALLEL;
-    }
-  }
-  else
-  {
-
-    REAL recip = 1 / denom;
-    REAL ua = nume_a * recip;
-    REAL ub = nume_b * recip;
-
-    if(ua >= 0 && ua <= 1 && ub >= 0 && ub <= 1 )
-    {
-      // Get the intersection point.
-      intersection[0] = a1[0] + ua*(a2[0] - a1[0]);
-      intersection[1] = a1[1] + ua*(a2[1] - a1[1]);
-      ret = IR_DO_INTERSECT;
-    }
-    else
-    {
-      ret = IR_DONT_INTERSECT;
-    }
-  }
-  return ret;
-}
-
-IntersectResult fm_intersectLineSegments2dTime(const REAL *a1,const REAL *a2,const REAL *b1,const REAL *b2,REAL &t1,REAL &t2)
-{
-  IntersectResult ret;
-
-  REAL denom  = ((b2[1] - b1[1])*(a2[0] - a1[0])) - ((b2[0] - b1[0])*(a2[1] - a1[1]));
-  REAL nume_a = ((b2[0] - b1[0])*(a1[1] - b1[1])) - ((b2[1] - b1[1])*(a1[0] - b1[0]));
-  REAL nume_b = ((a2[0] - a1[0])*(a1[1] - b1[1])) - ((a2[1] - a1[1])*(a1[0] - b1[0]));
-  if (denom == 0 )
-  {
-    if(nume_a == 0 && nume_b == 0)
-    {
-      ret = IR_COINCIDENT;
-    }
-    else
-    {
-      ret = IR_PARALLEL;
-    }
-  }
-  else
-  {
-
-    REAL recip = 1 / denom;
-    REAL ua = nume_a * recip;
-    REAL ub = nume_b * recip;
-
-    if(ua >= 0 && ua <= 1 && ub >= 0 && ub <= 1 )
-    {
-      t1 = ua;
-      t2 = ub;
-      ret = IR_DO_INTERSECT;
-    }
-    else
-    {
-      ret = IR_DONT_INTERSECT;
-    }
-  }
-  return ret;
-}
-
-//**** Plane Triangle Intersection
-
-
-
-
-
-// assumes that the points are on opposite sides of the plane!
-void fm_intersectPointPlane(const REAL *p1,const REAL *p2,REAL *split,const REAL *plane)
-{
-
-  REAL dp1 = fm_distToPlane(plane,p1);
-
-  REAL dir[3];
-
-  dir[0] = p2[0] - p1[0];
-  dir[1] = p2[1] - p1[1];
-  dir[2] = p2[2] - p1[2];
-
-  REAL dot1 = dir[0]*plane[0] + dir[1]*plane[1] + dir[2]*plane[2];
-  REAL dot2 = dp1 - plane[3];
-
-  REAL    t = -(plane[3] + dot2 ) / dot1;
-
-  split[0] = (dir[0]*t)+p1[0];
-  split[1] = (dir[1]*t)+p1[1];
-  split[2] = (dir[2]*t)+p1[2];
-
-}
-
-PlaneTriResult fm_getSidePlane(const REAL *p,const REAL *plane,REAL epsilon)
-{
-  PlaneTriResult ret = PTR_ON_PLANE;
-
-  REAL d = fm_distToPlane(plane,p);
-
-  if ( d < -epsilon || d > epsilon )
-  {
-    if ( d > 0 )
-  		ret =  PTR_FRONT; // it is 'in front' within the provided epsilon value.
-    else
-      ret = PTR_BACK;
-  }
-
-  return ret;
-}
-
-
-
-#ifndef PLANE_TRIANGLE_INTERSECTION_H
-
-#define PLANE_TRIANGLE_INTERSECTION_H
-
-#define MAXPTS 256
-
-template <class Type> class point
-{
-public:
-
-  void set(const Type *p)
-  {
-    x = p[0];
-    y = p[1];
-    z = p[2];
-  }
-
-  Type x;
-  Type y;
-  Type z;
-};
-
-template <class Type> class plane
-{
-public:
-  plane(const Type *p)
-  {
-    normal.x = p[0];
-    normal.y = p[1];
-    normal.z = p[2];
-    D        = p[3];
-  }
-
-  Type Classify_Point(const point<Type> &p)
-  {
-    return p.x*normal.x + p.y*normal.y + p.z*normal.z + D;
-  }
-
-  point<Type> normal;
-  Type  D;
-};
-
-template <class Type> class polygon
-{
-public:
-  polygon(void)
-  {
-    mVcount = 0;
-  }
-
-  polygon(const Type *p1,const Type *p2,const Type *p3)
-  {
-    mVcount = 3;
-    mVertices[0].set(p1);
-    mVertices[1].set(p2);
-    mVertices[2].set(p3);
-  }
-
-
-  NxI32 NumVertices(void) const { return mVcount; };
-
-  const point<Type>& Vertex(NxI32 index)
-  {
-    if ( index < 0 ) index+=mVcount;
-    return mVertices[index];
-  };
-
-
-  void set(const point<Type> *pts,NxI32 count)
-  {
-    for (NxI32 i=0; i<count; i++)
-    {
-      mVertices[i] = pts[i];
-    }
-    mVcount = count;
-  }
-
-
-  void Split_Polygon(polygon<Type> *poly,plane<Type> *part, polygon<Type> &front, polygon<Type> &back)
-  {
-    NxI32   count = poly->NumVertices ();
-    NxI32   out_c = 0, in_c = 0;
-    point<Type> ptA, ptB,outpts[MAXPTS],inpts[MAXPTS];
-    Type sideA, sideB;
-    ptA = poly->Vertex (count - 1);
-    sideA = part->Classify_Point (ptA);
-    for (NxI32 i = -1; ++i < count;)
-    {
-      ptB = poly->Vertex(i);
-      sideB = part->Classify_Point(ptB);
-      if (sideB > 0)
-      {
-        if (sideA < 0)
-        {
-  			  point<Type> v;
-          fm_intersectPointPlane(&ptB.x, &ptA.x, &v.x, &part->normal.x );
-          outpts[out_c++] = inpts[in_c++] = v;
-        }
-        outpts[out_c++] = ptB;
-      }
-      else if (sideB < 0)
-      {
-        if (sideA > 0)
-        {
-          point<Type> v;
-          fm_intersectPointPlane(&ptB.x, &ptA.x, &v.x, &part->normal.x );
-          outpts[out_c++] = inpts[in_c++] = v;
-        }
-        inpts[in_c++] = ptB;
-      }
-      else
-         outpts[out_c++] = inpts[in_c++] = ptB;
-      ptA = ptB;
-      sideA = sideB;
-    }
-
-    front.set(&outpts[0], out_c);
-    back.set(&inpts[0], in_c);
-  }
-
-  NxI32           mVcount;
-  point<Type>   mVertices[MAXPTS];
-};
-
-
-
-#endif
-
-static inline void add(const REAL *p,REAL *dest,NxU32 tstride,NxU32 &pcount)
-{
-  char *d = (char *) dest;
-  d = d + pcount*tstride;
-  dest = (REAL *) d;
-  dest[0] = p[0];
-  dest[1] = p[1];
-  dest[2] = p[2];
-  pcount++;
-	assert( pcount <= 4 );
-}
-
-
-PlaneTriResult fm_planeTriIntersection(const REAL *_plane,    // the plane equation in Ax+By+Cz+D format
-                                    const REAL *triangle, // the source triangle.
-                                    NxU32 tstride,  // stride in bytes of the input and output *vertices*
-                                    REAL        epsilon,  // the co-planar epsilon value.
-                                    REAL       *front,    // the triangle in front of the
-                                    NxU32 &fcount,  // number of vertices in the 'front' triangle
-                                    REAL       *back,     // the triangle in back of the plane
-                                    NxU32 &bcount) // the number of vertices in the 'back' triangle.
-{
-
-  fcount = 0;
-  bcount = 0;
-
-  const char *tsource = (const char *) triangle;
-
-  // get the three vertices of the triangle.
-  const REAL *p1     = (const REAL *) (tsource);
-  const REAL *p2     = (const REAL *) (tsource+tstride);
-  const REAL *p3     = (const REAL *) (tsource+tstride*2);
-
-
-  PlaneTriResult r1   = fm_getSidePlane(p1,_plane,epsilon); // compute the side of the plane each vertex is on
-  PlaneTriResult r2   = fm_getSidePlane(p2,_plane,epsilon);
-  PlaneTriResult r3   = fm_getSidePlane(p3,_plane,epsilon);
-
-  // If any of the points lay right *on* the plane....
-  if ( r1 == PTR_ON_PLANE || r2 == PTR_ON_PLANE || r3 == PTR_ON_PLANE )
-  {
-    // If the triangle is completely co-planar, then just treat it as 'front' and return!
-    if ( r1 == PTR_ON_PLANE && r2 == PTR_ON_PLANE && r3 == PTR_ON_PLANE )
-    {
-      add(p1,front,tstride,fcount);
-      add(p2,front,tstride,fcount);
-      add(p3,front,tstride,fcount);
-      return PTR_FRONT;
-    }
-    // Decide to place the co-planar points on the same side as the co-planar point.
-    PlaneTriResult r= PTR_ON_PLANE;
-    if ( r1 != PTR_ON_PLANE )
-      r = r1;
-    else if ( r2 != PTR_ON_PLANE )
-      r = r2;
-    else if ( r3 != PTR_ON_PLANE )
-      r = r3;
-
-    if ( r1 == PTR_ON_PLANE ) r1 = r;
-    if ( r2 == PTR_ON_PLANE ) r2 = r;
-    if ( r3 == PTR_ON_PLANE ) r3 = r;
-
-  }
-
-  if ( r1 == r2 && r1 == r3 ) // if all three vertices are on the same side of the plane.
-  {
-    if ( r1 == PTR_FRONT ) // if all three are in front of the plane, then copy to the 'front' output triangle.
-    {
-      add(p1,front,tstride,fcount);
-      add(p2,front,tstride,fcount);
-      add(p3,front,tstride,fcount);
-    }
-    else
-    {
-      add(p1,back,tstride,bcount); // if all three are in 'back' then copy to the 'back' output triangle.
-      add(p2,back,tstride,bcount);
-      add(p3,back,tstride,bcount);
-    }
-    return r1; // if all three points are on the same side of the plane return result
-  }
-
-
-  polygon<REAL> pi(p1,p2,p3);
-  polygon<REAL>  pfront,pback;
-
-  plane<REAL>    part(_plane);
-
-  pi.Split_Polygon(&pi,&part,pfront,pback);
-
-  for (NxI32 i=0; i<pfront.mVcount; i++)
-  {
-    add( &pfront.mVertices[i].x, front, tstride, fcount );
-  }
-
-  for (NxI32 i=0; i<pback.mVcount; i++)
-  {
-    add( &pback.mVertices[i].x, back, tstride, bcount );
-  }
-
-  PlaneTriResult ret = PTR_SPLIT;
-
-  if ( fcount < 3 ) fcount = 0;
-  if ( bcount < 3 ) bcount = 0;
-
-  if ( fcount == 0 && bcount )
-    ret = PTR_BACK;
-
-  if ( bcount == 0 && fcount )
-    ret = PTR_FRONT;
-
-
-  return ret;
-}
-
-// computes the OBB for this set of points relative to this transform matrix.
-void computeOBB(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *sides,REAL *matrix)
-{
-  const char *src = (const char *) points;
-
-  REAL bmin[3] = { 1e9, 1e9, 1e9 };
-  REAL bmax[3] = { -1e9, -1e9, -1e9 };
-
-  for (NxU32 i=0; i<vcount; i++)
-  {
-    const REAL *p = (const REAL *) src;
-    REAL t[3];
-
-    fm_inverseRT(matrix, p, t ); // inverse rotate translate
-
-    if ( t[0] < bmin[0] ) bmin[0] = t[0];
-    if ( t[1] < bmin[1] ) bmin[1] = t[1];
-    if ( t[2] < bmin[2] ) bmin[2] = t[2];
-
-    if ( t[0] > bmax[0] ) bmax[0] = t[0];
-    if ( t[1] > bmax[1] ) bmax[1] = t[1];
-    if ( t[2] > bmax[2] ) bmax[2] = t[2];
-
-    src+=pstride;
-  }
-
-  REAL center[3];
-
-  sides[0] = bmax[0]-bmin[0];
-  sides[1] = bmax[1]-bmin[1];
-  sides[2] = bmax[2]-bmin[2];
-
-  center[0] = sides[0]*0.5f+bmin[0];
-  center[1] = sides[1]*0.5f+bmin[1];
-  center[2] = sides[2]*0.5f+bmin[2];
-
-  REAL ocenter[3];
-
-  fm_rotate(matrix,center,ocenter);
-
-  matrix[12]+=ocenter[0];
-  matrix[13]+=ocenter[1];
-  matrix[14]+=ocenter[2];
-
-}
-
-void fm_computeBestFitOBB(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *sides,REAL *matrix,bool bruteForce)
-{
-  REAL plane[4];
-  fm_computeBestFitPlane(vcount,points,pstride,0,0,plane);
-  fm_planeToMatrix(plane,matrix);
-  computeOBB( vcount, points, pstride, sides, matrix );
-
-  REAL refmatrix[16];
-  memcpy(refmatrix,matrix,16*sizeof(REAL));
-
-  REAL volume = sides[0]*sides[1]*sides[2];
-  if ( bruteForce )
-  {
-    for (REAL a=10; a<180; a+=10)
-    {
-      REAL quat[4];
-      fm_eulerToQuat(0,a*FM_DEG_TO_RAD,0,quat);
-      REAL temp[16];
-      REAL pmatrix[16];
-      fm_quatToMatrix(quat,temp);
-      fm_matrixMultiply(temp,refmatrix,pmatrix);
-      REAL psides[3];
-      computeOBB( vcount, points, pstride, psides, pmatrix );
-      REAL v = psides[0]*psides[1]*psides[2];
-      if ( v < volume )
-      {
-        volume = v;
-        memcpy(matrix,pmatrix,sizeof(REAL)*16);
-        sides[0] = psides[0];
-        sides[1] = psides[1];
-        sides[2] = psides[2];
-      }
-    }
-  }
-}
-
-void fm_computeBestFitOBB(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *sides,REAL *pos,REAL *quat,bool bruteForce)
-{
-  REAL matrix[16];
-  fm_computeBestFitOBB(vcount,points,pstride,sides,matrix,bruteForce);
-  fm_getTranslation(matrix,pos);
-  fm_matrixToQuat(matrix,quat);
-}
-
-void fm_computeBestFitABB(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *sides,REAL *pos)
-{
-	REAL bmin[3];
-	REAL bmax[3];
-
-  bmin[0] = points[0];
-  bmin[1] = points[1];
-  bmin[2] = points[2];
-
-  bmax[0] = points[0];
-  bmax[1] = points[1];
-  bmax[2] = points[2];
-
-	const char *cp = (const char *) points;
-	for (NxU32 i=0; i<vcount; i++)
-	{
-		const REAL *p = (const REAL *) cp;
-
-		if ( p[0] < bmin[0] ) bmin[0] = p[0];
-		if ( p[1] < bmin[1] ) bmin[1] = p[1];
-		if ( p[2] < bmin[2] ) bmin[2] = p[2];
-
-    if ( p[0] > bmax[0] ) bmax[0] = p[0];
-    if ( p[1] > bmax[1] ) bmax[1] = p[1];
-    if ( p[2] > bmax[2] ) bmax[2] = p[2];
-
-    cp+=pstride;
-	}
-
-
-	sides[0] = bmax[0] - bmin[0];
-	sides[1] = bmax[1] - bmin[1];
-	sides[2] = bmax[2] - bmin[2];
-
-	pos[0] = bmin[0]+sides[0]*0.5f;
-	pos[1] = bmin[1]+sides[1]*0.5f;
-	pos[2] = bmin[2]+sides[2]*0.5f;
-
-}
-
-
-void fm_planeToMatrix(const REAL *plane,REAL *matrix) // convert a plane equation to a 4x4 rotation matrix
-{
-  REAL ref[3] = { 0, 1, 0 };
-  REAL quat[4];
-  fm_rotationArc(ref,plane,quat);
-  fm_quatToMatrix(quat,matrix);
-  REAL origin[3] = { 0, -plane[3], 0 };
-  REAL center[3];
-  fm_transform(matrix,origin,center);
-  fm_setTranslation(center,matrix);
-}
-
-void fm_planeToQuat(const REAL *plane,REAL *quat,REAL *pos) // convert a plane equation to a quaternion and translation
-{
-  REAL ref[3] = { 0, 1, 0 };
-  REAL matrix[16];
-  fm_rotationArc(ref,plane,quat);
-  fm_quatToMatrix(quat,matrix);
-  REAL origin[3] = { 0, plane[3], 0 };
-  fm_transform(matrix,origin,pos);
-}
-
-void fm_eulerMatrix(REAL ax,REAL ay,REAL az,REAL *matrix) // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
-{
-  REAL quat[4];
-  fm_eulerToQuat(ax,ay,az,quat);
-  fm_quatToMatrix(quat,matrix);
-}
-
-
-//**********************************************************
-//**********************************************************
-//**** Vertex Welding
-//**********************************************************
-//**********************************************************
-
-#ifndef VERTEX_INDEX_H
-
-#define VERTEX_INDEX_H
-
-namespace VERTEX_INDEX
-{
-
-class KdTreeNode;
-
-typedef CONVEX_DECOMPOSITION::Array< KdTreeNode * > KdTreeNodeVector;
-
-enum Axes
-{
-  X_AXIS = 0,
-  Y_AXIS = 1,
-  Z_AXIS = 2
-};
-
-class KdTreeFindNode
-{
-public:
-  KdTreeFindNode(void)
-  {
-    mNode = 0;
-    mDistance = 0;
-  }
-  KdTreeNode  *mNode;
-  NxF64        mDistance;
-};
-
-class KdTreeInterface
-{
-public:
-  virtual const NxF64 * getPositionDouble(NxU32 index) const = 0;
-  virtual const NxF32  * getPositionFloat(NxU32 index) const = 0;
-};
-
-class KdTreeNode
-{
-public:
-  KdTreeNode(void)
-  {
-    mIndex = 0;
-    mLeft = 0;
-    mRight = 0;
-  }
-
-  KdTreeNode(NxU32 index)
-  {
-    mIndex = index;
-    mLeft = 0;
-    mRight = 0;
-  };
-
-	~KdTreeNode(void)
-  {
-  }
-
-
-  void addDouble(KdTreeNode *node,Axes dim,const KdTreeInterface *iface)
-  {
-    const NxF64 *nodePosition = iface->getPositionDouble( node->mIndex );
-    const NxF64 *position     = iface->getPositionDouble( mIndex );
-    switch ( dim )
-    {
-      case X_AXIS:
-        if ( nodePosition[0] <= position[0] )
-        {
-          if ( mLeft )
-            mLeft->addDouble(node,Y_AXIS,iface);
-          else
-            mLeft = node;
-        }
-        else
-        {
-          if ( mRight )
-            mRight->addDouble(node,Y_AXIS,iface);
-          else
-            mRight = node;
-        }
-        break;
-      case Y_AXIS:
-        if ( nodePosition[1] <= position[1] )
-        {
-          if ( mLeft )
-            mLeft->addDouble(node,Z_AXIS,iface);
-          else
-            mLeft = node;
-        }
-        else
-        {
-          if ( mRight )
-            mRight->addDouble(node,Z_AXIS,iface);
-          else
-            mRight = node;
-        }
-        break;
-      case Z_AXIS:
-        if ( nodePosition[2] <= position[2] )
-        {
-          if ( mLeft )
-            mLeft->addDouble(node,X_AXIS,iface);
-          else
-            mLeft = node;
-        }
-        else
-        {
-          if ( mRight )
-            mRight->addDouble(node,X_AXIS,iface);
-          else
-            mRight = node;
-        }
-        break;
-    }
-
-  }
-
-
-  void addFloat(KdTreeNode *node,Axes dim,const KdTreeInterface *iface)
-  {
-    const NxF32 *nodePosition = iface->getPositionFloat( node->mIndex );
-    const NxF32 *position     = iface->getPositionFloat( mIndex );
-    switch ( dim )
-    {
-      case X_AXIS:
-        if ( nodePosition[0] <= position[0] )
-        {
-          if ( mLeft )
-            mLeft->addFloat(node,Y_AXIS,iface);
-          else
-            mLeft = node;
-        }
-        else
-        {
-          if ( mRight )
-            mRight->addFloat(node,Y_AXIS,iface);
-          else
-            mRight = node;
-        }
-        break;
-      case Y_AXIS:
-        if ( nodePosition[1] <= position[1] )
-        {
-          if ( mLeft )
-            mLeft->addFloat(node,Z_AXIS,iface);
-          else
-            mLeft = node;
-        }
-        else
-        {
-          if ( mRight )
-            mRight->addFloat(node,Z_AXIS,iface);
-          else
-            mRight = node;
-        }
-        break;
-      case Z_AXIS:
-        if ( nodePosition[2] <= position[2] )
-        {
-          if ( mLeft )
-            mLeft->addFloat(node,X_AXIS,iface);
-          else
-            mLeft = node;
-        }
-        else
-        {
-          if ( mRight )
-            mRight->addFloat(node,X_AXIS,iface);
-          else
-            mRight = node;
-        }
-        break;
-    }
-
-  }
-
-
-  NxU32 getIndex(void) const { return mIndex; };
-
-  void search(Axes axis,const NxF64 *pos,NxF64 radius,NxU32 &count,NxU32 maxObjects,KdTreeFindNode *found,const KdTreeInterface *iface)
-  {
-
-    const NxF64 *position = iface->getPositionDouble(mIndex);
-
-    NxF64 dx = pos[0] - position[0];
-    NxF64 dy = pos[1] - position[1];
-    NxF64 dz = pos[2] - position[2];
-
-    KdTreeNode *search1 = 0;
-    KdTreeNode *search2 = 0;
-
-    switch ( axis )
-    {
-      case X_AXIS:
-       if ( dx <= 0 )     // JWR  if we are to the left
-       {
-        search1 = mLeft; // JWR  then search to the left
-        if ( -dx < radius )  // JWR  if distance to the right is less than our search radius, continue on the right as well.
-          search2 = mRight;
-       }
-       else
-       {
-         search1 = mRight; // JWR  ok, we go down the left tree
-         if ( dx < radius ) // JWR  if the distance from the right is less than our search radius
-	  			search2 = mLeft;
-        }
-        axis = Y_AXIS;
-        break;
-      case Y_AXIS:
-        if ( dy <= 0 )
-        {
-          search1 = mLeft;
-          if ( -dy < radius )
-    				search2 = mRight;
-        }
-        else
-        {
-          search1 = mRight;
-          if ( dy < radius )
-    				search2 = mLeft;
-        }
-        axis = Z_AXIS;
-        break;
-      case Z_AXIS:
-        if ( dz <= 0 )
-        {
-          search1 = mLeft;
-          if ( -dz < radius )
-    				search2 = mRight;
-        }
-        else
-        {
-          search1 = mRight;
-          if ( dz < radius )
-    				search2 = mLeft;
-        }
-        axis = X_AXIS;
-        break;
-    }
-
-    NxF64 r2 = radius*radius;
-    NxF64 m  = dx*dx+dy*dy+dz*dz;
-
-    if ( m < r2 )
-    {
-      switch ( count )
-      {
-        case 0:
-          found[count].mNode = this;
-          found[count].mDistance = m;
-          break;
-        case 1:
-          if ( m < found[0].mDistance )
-          {
-            if ( maxObjects == 1 )
-            {
-              found[0].mNode = this;
-              found[0].mDistance = m;
-            }
-            else
-            {
-              found[1] = found[0];
-              found[0].mNode = this;
-              found[0].mDistance = m;
-            }
-          }
-          else if ( maxObjects > 1)
-          {
-            found[1].mNode = this;
-            found[1].mDistance = m;
-          }
-          break;
-        default:
-          {
-            bool inserted = false;
-
-            for (NxU32 i=0; i<count; i++)
-            {
-              if ( m < found[i].mDistance ) // if this one is closer than a pre-existing one...
-              {
-                // insertion sort...
-                NxU32 scan = count;
-                if ( scan >= maxObjects ) scan=maxObjects-1;
-                for (NxU32 j=scan; j>i; j--)
-                {
-                  found[j] = found[j-1];
-                }
-                found[i].mNode = this;
-                found[i].mDistance = m;
-                inserted = true;
-                break;
-              }
-            }
-
-            if ( !inserted && count < maxObjects )
-            {
-              found[count].mNode = this;
-              found[count].mDistance = m;
-            }
-          }
-          break;
-      }
-      count++;
-      if ( count > maxObjects )
-      {
-        count = maxObjects;
-      }
-    }
-
-
-    if ( search1 )
-  		search1->search( axis, pos,radius, count, maxObjects, found, iface);
-
-    if ( search2 )
-	  	search2->search( axis, pos,radius, count, maxObjects, found, iface);
-
-  }
-
-  void search(Axes axis,const NxF32 *pos,NxF32 radius,NxU32 &count,NxU32 maxObjects,KdTreeFindNode *found,const KdTreeInterface *iface)
-  {
-
-    const NxF32 *position = iface->getPositionFloat(mIndex);
-
-    NxF32 dx = pos[0] - position[0];
-    NxF32 dy = pos[1] - position[1];
-    NxF32 dz = pos[2] - position[2];
-
-    KdTreeNode *search1 = 0;
-    KdTreeNode *search2 = 0;
-
-    switch ( axis )
-    {
-      case X_AXIS:
-       if ( dx <= 0 )     // JWR  if we are to the left
-       {
-        search1 = mLeft; // JWR  then search to the left
-        if ( -dx < radius )  // JWR  if distance to the right is less than our search radius, continue on the right as well.
-          search2 = mRight;
-       }
-       else
-       {
-         search1 = mRight; // JWR  ok, we go down the left tree
-         if ( dx < radius ) // JWR  if the distance from the right is less than our search radius
-	  			search2 = mLeft;
-        }
-        axis = Y_AXIS;
-        break;
-      case Y_AXIS:
-        if ( dy <= 0 )
-        {
-          search1 = mLeft;
-          if ( -dy < radius )
-    				search2 = mRight;
-        }
-        else
-        {
-          search1 = mRight;
-          if ( dy < radius )
-    				search2 = mLeft;
-        }
-        axis = Z_AXIS;
-        break;
-      case Z_AXIS:
-        if ( dz <= 0 )
-        {
-          search1 = mLeft;
-          if ( -dz < radius )
-    				search2 = mRight;
-        }
-        else
-        {
-          search1 = mRight;
-          if ( dz < radius )
-    				search2 = mLeft;
-        }
-        axis = X_AXIS;
-        break;
-    }
-
-    NxF32 r2 = radius*radius;
-    NxF32 m  = dx*dx+dy*dy+dz*dz;
-
-    if ( m < r2 )
-    {
-      switch ( count )
-      {
-        case 0:
-          found[count].mNode = this;
-          found[count].mDistance = m;
-          break;
-        case 1:
-          if ( m < found[0].mDistance )
-          {
-            if ( maxObjects == 1 )
-            {
-              found[0].mNode = this;
-              found[0].mDistance = m;
-            }
-            else
-            {
-              found[1] = found[0];
-              found[0].mNode = this;
-              found[0].mDistance = m;
-            }
-          }
-          else if ( maxObjects > 1)
-          {
-            found[1].mNode = this;
-            found[1].mDistance = m;
-          }
-          break;
-        default:
-          {
-            bool inserted = false;
-
-            for (NxU32 i=0; i<count; i++)
-            {
-              if ( m < found[i].mDistance ) // if this one is closer than a pre-existing one...
-              {
-                // insertion sort...
-                NxU32 scan = count;
-                if ( scan >= maxObjects ) scan=maxObjects-1;
-                for (NxU32 j=scan; j>i; j--)
-                {
-                  found[j] = found[j-1];
-                }
-                found[i].mNode = this;
-                found[i].mDistance = m;
-                inserted = true;
-                break;
-              }
-            }
-
-            if ( !inserted && count < maxObjects )
-            {
-              found[count].mNode = this;
-              found[count].mDistance = m;
-            }
-          }
-          break;
-      }
-      count++;
-      if ( count > maxObjects )
-      {
-        count = maxObjects;
-      }
-    }
-
-
-    if ( search1 )
-  		search1->search( axis, pos,radius, count, maxObjects, found, iface);
-
-    if ( search2 )
-	  	search2->search( axis, pos,radius, count, maxObjects, found, iface);
-
-  }
-
-private:
-
-  void setLeft(KdTreeNode *left) { mLeft = left; };
-  void setRight(KdTreeNode *right) { mRight = right; };
-
-	KdTreeNode *getLeft(void)         { return mLeft; }
-	KdTreeNode *getRight(void)        { return mRight; }
-
-  NxU32          mIndex;
-  KdTreeNode     *mLeft;
-  KdTreeNode     *mRight;
-};
-
-
-#define MAX_BUNDLE_SIZE 1024  // 1024 nodes at a time, to minimize memory allocation and guarentee that pointers are persistent.
-
-class KdTreeNodeBundle : public Memalloc
-{
-public:
-
-  KdTreeNodeBundle(void)
-  {
-    mNext = 0;
-    mIndex = 0;
-  }
-
-  bool isFull(void) const
-  {
-    return (bool)( mIndex == MAX_BUNDLE_SIZE );
-  }
-
-  KdTreeNode * getNextNode(void)
-  {
-    assert(mIndex<MAX_BUNDLE_SIZE);
-    KdTreeNode *ret = &mNodes[mIndex];
-    mIndex++;
-    return ret;
-  }
-
-  KdTreeNodeBundle  *mNext;
-  NxU32             mIndex;
-  KdTreeNode         mNodes[MAX_BUNDLE_SIZE];
-};
-
-
-typedef CONVEX_DECOMPOSITION::Array< NxF64 > DoubleVector;
-typedef CONVEX_DECOMPOSITION::Array< NxF32 >  FloatVector;
-
-class KdTree : public KdTreeInterface, public Memalloc
-{
-public:
-  KdTree(void)
-  {
-    mRoot = 0;
-    mBundle = 0;
-    mVcount = 0;
-    mUseDouble = false;
-  }
-
-  virtual ~KdTree(void)
-  {
-    reset();
-  }
-
-  const NxF64 * getPositionDouble(NxU32 index) const
-  {
-    assert( mUseDouble );
-    assert ( index < mVcount );
-    return  &mVerticesDouble[index*3];
-  }
-
-  const NxF32 * getPositionFloat(NxU32 index) const
-  {
-    assert( !mUseDouble );
-    assert ( index < mVcount );
-    return  &mVerticesFloat[index*3];
-  }
-
-  NxU32 search(const NxF64 *pos,NxF64 radius,NxU32 maxObjects,KdTreeFindNode *found) const
-  {
-    assert( mUseDouble );
-    if ( !mRoot )	return 0;
-    NxU32 count = 0;
-    mRoot->search(X_AXIS,pos,radius,count,maxObjects,found,this);
-    return count;
-  }
-
-  NxU32 search(const NxF32 *pos,NxF32 radius,NxU32 maxObjects,KdTreeFindNode *found) const
-  {
-    assert( !mUseDouble );
-    if ( !mRoot )	return 0;
-    NxU32 count = 0;
-    mRoot->search(X_AXIS,pos,radius,count,maxObjects,found,this);
-    return count;
-  }
-
-  void reset(void)
-  {
-    mRoot = 0;
-    mVerticesDouble.clear();
-    mVerticesFloat.clear();
-    KdTreeNodeBundle *bundle = mBundle;
-    while ( bundle )
-    {
-      KdTreeNodeBundle *next = bundle->mNext;
-      delete bundle;
-      bundle = next;
-    }
-    mBundle = 0;
-    mVcount = 0;
-  }
-
-  NxU32 add(NxF64 x,NxF64 y,NxF64 z)
-  {
-    assert(mUseDouble);
-    NxU32 ret = mVcount;
-    mVerticesDouble.pushBack(x);
-    mVerticesDouble.pushBack(y);
-    mVerticesDouble.pushBack(z);
-    mVcount++;
-    KdTreeNode *node = getNewNode(ret);
-    if ( mRoot )
-    {
-      mRoot->addDouble(node,X_AXIS,this);
-    }
-    else
-    {
-      mRoot = node;
-    }
-    return ret;
-  }
-
-  NxU32 add(NxF32 x,NxF32 y,NxF32 z)
-  {
-    assert(!mUseDouble);
-    NxU32 ret = mVcount;
-    mVerticesFloat.pushBack(x);
-    mVerticesFloat.pushBack(y);
-    mVerticesFloat.pushBack(z);
-    mVcount++;
-    KdTreeNode *node = getNewNode(ret);
-    if ( mRoot )
-    {
-      mRoot->addFloat(node,X_AXIS,this);
-    }
-    else
-    {
-      mRoot = node;
-    }
-    return ret;
-  }
-
-  KdTreeNode * getNewNode(NxU32 index)
-  {
-    if ( mBundle == 0 )
-    {
-      mBundle = MEMALLOC_NEW(KdTreeNodeBundle);
-    }
-    if ( mBundle->isFull() )
-    {
-      KdTreeNodeBundle *bundle = MEMALLOC_NEW(KdTreeNodeBundle);
-      mBundle->mNext = bundle;
-      mBundle = bundle;
-    }
-    KdTreeNode *node = mBundle->getNextNode();
-    new ( node ) KdTreeNode(index);
-    return node;
-  }
-
-  NxU32 getNearest(const NxF64 *pos,NxF64 radius,bool &_found) const // returns the nearest possible neighbor's index.
-  {
-    assert( mUseDouble );
-    NxU32 ret = 0;
-
-    _found = false;
-    KdTreeFindNode found[1];
-    NxU32 count = search(pos,radius,1,found);
-    if ( count )
-    {
-      KdTreeNode *node = found[0].mNode;
-      ret = node->getIndex();
-      _found = true;
-    }
-    return ret;
-  }
-
-  NxU32 getNearest(const NxF32 *pos,NxF32 radius,bool &_found) const // returns the nearest possible neighbor's index.
-  {
-    assert( !mUseDouble );
-    NxU32 ret = 0;
-
-    _found = false;
-    KdTreeFindNode found[1];
-    NxU32 count = search(pos,radius,1,found);
-    if ( count )
-    {
-      KdTreeNode *node = found[0].mNode;
-      ret = node->getIndex();
-      _found = true;
-    }
-    return ret;
-  }
-
-  const NxF64 * getVerticesDouble(void) const
-  {
-    assert( mUseDouble );
-    const NxF64 *ret = 0;
-    if ( !mVerticesDouble.empty() )
-    {
-      ret = &mVerticesDouble[0];
-    }
-    return ret;
-  }
-
-  const NxF32 * getVerticesFloat(void) const
-  {
-    assert( !mUseDouble );
-    const NxF32 * ret = 0;
-    if ( !mVerticesFloat.empty() )
-    {
-      ret = &mVerticesFloat[0];
-    }
-    return ret;
-  }
-
-  NxU32 getVcount(void) const { return mVcount; };
-
-  void setUseDouble(bool useDouble)
-  {
-    mUseDouble = useDouble;
-  }
-
-private:
-  bool                    mUseDouble;
-  KdTreeNode             *mRoot;
-  KdTreeNodeBundle       *mBundle;
-  NxU32                  mVcount;
-  DoubleVector            mVerticesDouble;
-  FloatVector             mVerticesFloat;
-};
-
-}; // end of namespace VERTEX_INDEX
-
-class MyVertexIndex : public fm_VertexIndex, public Memalloc
-{
-public:
-  MyVertexIndex(NxF64 granularity,bool snapToGrid)
-  {
-    mDoubleGranularity = granularity;
-    mFloatGranularity  = (NxF32)granularity;
-    mSnapToGrid        = snapToGrid;
-    mUseDouble         = true;
-    mKdTree.setUseDouble(true);
-  }
-
-  MyVertexIndex(NxF32 granularity,bool snapToGrid)
-  {
-    mDoubleGranularity = granularity;
-    mFloatGranularity  = (NxF32)granularity;
-    mSnapToGrid        = snapToGrid;
-    mUseDouble         = false;
-    mKdTree.setUseDouble(false);
-  }
-
-  virtual ~MyVertexIndex(void)
-  {
-
-  }
-
-
-  NxF64 snapToGrid(NxF64 p)
-  {
-    NxF64 m = fmod(p,mDoubleGranularity);
-    p-=m;
-    return p;
-  }
-
-  NxF32 snapToGrid(NxF32 p)
-  {
-    NxF32 m = fmodf(p,mFloatGranularity);
-    p-=m;
-    return p;
-  }
-
-  NxU32    getIndex(const NxF32 *_p,bool &newPos)  // get index for a vector NxF32
-  {
-    NxU32 ret;
-
-    if ( mUseDouble )
-    {
-      NxF64 p[3];
-      p[0] = _p[0];
-      p[1] = _p[1];
-      p[2] = _p[2];
-      return getIndex(p,newPos);
-    }
-
-    newPos = false;
-
-    NxF32 p[3];
-
-    if ( mSnapToGrid )
-    {
-      p[0] = snapToGrid(_p[0]);
-      p[1] = snapToGrid(_p[1]);
-      p[2] = snapToGrid(_p[2]);
-    }
-    else
-    {
-      p[0] = _p[0];
-      p[1] = _p[1];
-      p[2] = _p[2];
-    }
-
-    bool found;
-    ret = mKdTree.getNearest(p,mFloatGranularity,found);
-    if ( !found )
-    {
-      newPos = true;
-      ret = mKdTree.add(p[0],p[1],p[2]);
-    }
-
-
-    return ret;
-  }
-
-  NxU32    getIndex(const NxF64 *_p,bool &newPos)  // get index for a vector NxF64
-  {
-    NxU32 ret;
-
-    if ( !mUseDouble )
-    {
-      NxF32 p[3];
-      p[0] = (NxF32)_p[0];
-      p[1] = (NxF32)_p[1];
-      p[2] = (NxF32)_p[2];
-      return getIndex(p,newPos);
-    }
-
-    newPos = false;
-
-    NxF64 p[3];
-
-    if ( mSnapToGrid )
-    {
-      p[0] = snapToGrid(_p[0]);
-      p[1] = snapToGrid(_p[1]);
-      p[2] = snapToGrid(_p[2]);
-    }
-    else
-    {
-      p[0] = _p[0];
-      p[1] = _p[1];
-      p[2] = _p[2];
-    }
-
-    bool found;
-    ret = mKdTree.getNearest(p,mDoubleGranularity,found);
-    if ( !found )
-    {
-      newPos = true;
-      ret = mKdTree.add(p[0],p[1],p[2]);
-    }
-
-
-    return ret;
-  }
-
-  const NxF32 *   getVerticesFloat(void) const
-  {
-    const NxF32 * ret = 0;
-
-    assert( !mUseDouble );
-
-    ret = mKdTree.getVerticesFloat();
-
-    return ret;
-  }
-
-  const NxF64 *  getVerticesDouble(void) const
-  {
-    const NxF64 * ret = 0;
-
-    assert( mUseDouble );
-
-    ret = mKdTree.getVerticesDouble();
-
-    return ret;
-  }
-
-  const NxF32 *   getVertexFloat(NxU32 index) const
-  {
-    const NxF32 * ret  = 0;
-    assert( !mUseDouble );
-#ifdef _DEBUG
-    NxU32 vcount = mKdTree.getVcount();
-    assert( index < vcount );
-#endif
-    ret =  mKdTree.getVerticesFloat();
-    ret = &ret[index*3];
-    return ret;
-  }
-
-  const NxF64 *   getVertexDouble(NxU32 index) const
-  {
-    const NxF64 * ret = 0;
-    assert( mUseDouble );
-#ifdef _DEBUG
-    NxU32 vcount = mKdTree.getVcount();
-    assert( index < vcount );
-#endif
-    ret =  mKdTree.getVerticesDouble();
-    ret = &ret[index*3];
-
-    return ret;
-  }
-
-  NxU32    getVcount(void) const
-  {
-    return mKdTree.getVcount();
-  }
-
-  bool isDouble(void) const
-  {
-    return mUseDouble;
-  }
-
-
-  bool            saveAsObj(const char *fname,NxU32 tcount,NxU32 *indices)
-  {
-    bool ret = false;
-
-
-    FILE *fph = fopen(fname,"wb");
-    if ( fph )
-    {
-      ret = true;
-
-      NxU32 vcount    = getVcount();
-      if ( mUseDouble )
-      {
-        const NxF64 *v  = getVerticesDouble();
-        for (NxU32 i=0; i<vcount; i++)
-        {
-          fprintf(fph,"v %0.9f %0.9f %0.9f\r\n", (NxF32)v[0], (NxF32)v[1], (NxF32)v[2] );
-          v+=3;
-        }
-      }
-      else
-      {
-        const NxF32 *v  = getVerticesFloat();
-        for (NxU32 i=0; i<vcount; i++)
-        {
-          fprintf(fph,"v %0.9f %0.9f %0.9f\r\n", v[0], v[1], v[2] );
-          v+=3;
-        }
-      }
-
-      for (NxU32 i=0; i<tcount; i++)
-      {
-        NxU32 i1 = *indices++;
-        NxU32 i2 = *indices++;
-        NxU32 i3 = *indices++;
-        fprintf(fph,"f %d %d %d\r\n", i1+1, i2+1, i3+1 );
-      }
-      fclose(fph);
-    }
-
-    return ret;
-  }
-
-private:
-  bool    mUseDouble:1;
-  bool    mSnapToGrid:1;
-  NxF64  mDoubleGranularity;
-  NxF32   mFloatGranularity;
-  VERTEX_INDEX::KdTree  mKdTree;
-};
-
-fm_VertexIndex * fm_createVertexIndex(NxF64 granularity,bool snapToGrid) // create an indexed vertex system for doubles
-{
-  MyVertexIndex *ret = MEMALLOC_NEW(MyVertexIndex)(granularity,snapToGrid);
-  return static_cast< fm_VertexIndex *>(ret);
-}
-
-fm_VertexIndex * fm_createVertexIndex(NxF32 granularity,bool snapToGrid)  // create an indexed vertext system for floats
-{
-  MyVertexIndex *ret = MEMALLOC_NEW(MyVertexIndex)(granularity,snapToGrid);
-  return static_cast< fm_VertexIndex *>(ret);
-}
-
-void          fm_releaseVertexIndex(fm_VertexIndex *vindex)
-{
-  MyVertexIndex *m = static_cast< MyVertexIndex *>(vindex);
-  delete m;
-}
-
-#endif   // END OF VERTEX WELDING CODE
-
-
-//**********************************************************
-//**********************************************************
-//**** LineSweep Line-Segment Intersection Code
-//**********************************************************
-//**********************************************************
-
-//#ifndef LINE_SWEEP_H
-#if 0
-
-#define LINE_SWEEP_H
-
-class fm_quickSort
-{
-public:
-	void qsort(void **base,NxI32 num); // perform the qsort.
-protected:
-  // -1 less, 0 equal, +1 greater.
-	virtual NxI32 compare(void **p1,void **p2) = 0;
-private:
-	void inline swap(char **a,char **b);
-};
-
-
-void fm_quickSort::swap(char **a,char **b)
-{
-	char *tmp;
-
-	if ( a != b )
-	{
-		tmp = *a;
-		*a++ = *b;
-		*b++ = tmp;
-	}
-}
-
-
-void fm_quickSort::qsort(void **b,NxI32 num)
-{
-	char *lo,*hi;
-	char *mid;
-	char *bottom, *top;
-	NxI32 size;
-	char *lostk[30], *histk[30];
-	NxI32 stkptr;
-	char **base = (char **)b;
-
-	if (num < 2 ) return;
-
-	stkptr = 0;
-
-	lo = (char *)base;
-	hi = (char *)base + sizeof(char **) * (num-1);
-
-nextone:
-
-	size = (NxI32)(hi - lo) / sizeof(char**) + 1;
-
-	mid = lo + (size / 2) * sizeof(char **);
-	swap((char **)mid,(char **)lo);
-	bottom = lo;
-	top = hi + sizeof(char **);
-
-	for (;;)
-	{
-		do
-		{
-			bottom += sizeof(char **);
-		} while (bottom <= hi && compare((void **)bottom,(void **)lo) <= 0);
-
-		do
-		{
-			top -= sizeof(char **);
-		} while (top > lo && compare((void **)top,(void **)lo) >= 0);
-
-		if (top < bottom) break;
-
-		swap((char **)bottom,(char **)top);
-
-	}
-
-	swap((char **)lo,(char **)top);
-
-	if ( top - 1 - lo >= hi - bottom )
-	{
-		if (lo + sizeof(char **) < top)
-		{
-			lostk[stkptr] = lo;
-			histk[stkptr] = top - sizeof(char **);
-			stkptr++;
-		}
-		if (bottom < hi)
-		{
-			lo = bottom;
-			goto nextone;
-		}
-	}
-	else
-	{
-		if ( bottom < hi )
-		{
-			lostk[stkptr] = bottom;
-			histk[stkptr] = hi;
-			stkptr++;
-		}
-		if (lo + sizeof(char **) < top)
-		{
-			hi = top - sizeof(char **);
-			goto nextone; 					/* do small recursion */
-		}
-	}
-
-	stkptr--;
-
-	if (stkptr >= 0)
-	{
-		lo = lostk[stkptr];
-		hi = histk[stkptr];
-		goto nextone;
-	}
-	return;
-}
-
-
-typedef CONVEX_DECOMPOSITION::Array< fm_LineSegment > LineSegmentVector;
-
-static inline void setMinMax(NxF64 &vmin,NxF64 &vmax,NxF64 v1,NxF64 v2)
-{
-  if ( v1 <= v2 )
-  {
-    vmin = v1;
-    vmax = v2;
-  }
-  else
-  {
-    vmin = v2;
-    vmax = v1;
-  }
-}
-
-
-class Intersection
-{
-public:
-  Intersection(void)
-  {
-    mIndex = 0;
-    mTime = 0;
-  }
-  Intersection(NxF64 time,const NxF64 *from,const NxF64 *to,fm_VertexIndex *vpool)
-  {
-    mTime = time;
-    NxF64 pos[3];
-    pos[0] = (to[0]-from[0])*time+from[0];
-    pos[1] = (to[1]-from[1])*time+from[1];
-    pos[2] = (to[2]-from[2])*time+from[2];
-    bool newPos;
-    mIndex = vpool->getIndex(pos,newPos);
-  }
-
-  NxU32    mIndex;
-  NxF64    mTime;
-};
-
-
-typedef CONVEX_DECOMPOSITION::Array< Intersection > IntersectionList;
-
-class MyLineSegment : public fm_LineSegment, public Memalloc
-{
-public:
-
-  void init(const fm_LineSegment &s,fm_VertexIndex *vpool,NxU32 x)
-  {
-    fm_LineSegment *dest = static_cast< fm_LineSegment *>(this);
-    *dest = s;
-
-    mFlipped = false;
-
-    const NxF64 *p1 = vpool->getVertexDouble(mE1);
-    const NxF64 *p2 = vpool->getVertexDouble(mE2);
-
-    setMinMax(mMin[0],mMax[0],p1[0],p2[0]);
-    setMinMax(mMin[1],mMax[1],p1[1],p2[1]);
-    setMinMax(mMin[2],mMax[2],p1[2],p2[2]);
-
-    if ( p1[x] <= p2[x] )
-    {
-      mFrom[0] = p1[0];
-      mFrom[1] = p1[1];
-      mFrom[2] = p1[2];
-
-      mTo[0]   = p2[0];
-      mTo[1]   = p2[1];
-      mTo[2]   = p2[2];
-    }
-    else
-    {
-      mFrom[0] = p2[0];
-      mFrom[1] = p2[1];
-      mFrom[2] = p2[2];
-
-      mTo[0]   = p1[0];
-      mTo[1]   = p1[1];
-      mTo[2]   = p1[2];
-
-      mFlipped = true;
-
-      swap(mE1,mE2);
-    }
-
-  }
-
-  // we already know that the x-extent overlaps or we wouldn't be in this routine..
-  void intersect(MyLineSegment *segment,NxU32 x,NxU32 y,NxU32 /* z */,fm_VertexIndex *vpool)
-  {
-    NxU32 count = 0;
-
-    // if the two segments share any start/end points then they cannot intersect at all!
-
-    if ( segment->mE1 == mE1 || segment->mE1 == mE2 ) count++;
-    if ( segment->mE2 == mE1 || segment->mE2 == mE2 ) count++;
-
-    if ( count == 0 )
-    {
-      if ( mMax[y] < segment->mMin[y] ) // no intersection...
-      {
-
-      }
-      else if ( mMin[y] > segment->mMax[y] ) // no intersection
-      {
-
-      }
-      else
-      {
-
-        NxF64 a1[2];
-        NxF64 a2[2];
-        NxF64 b1[2];
-        NxF64 b2[2];
-
-        a1[0] = mFrom[x];
-        a1[1] = mFrom[y];
-
-        a2[0] = mTo[x];
-        a2[1] = mTo[y];
-
-        b1[0] = segment->mFrom[x];
-        b1[1] = segment->mFrom[y];
-
-        b2[0] = segment->mTo[x];
-        b2[1] = segment->mTo[y];
-
-
-        NxF64 t1,t2;
-        IntersectResult result = fm_intersectLineSegments2dTime(a1,a2,b1,b2,t1,t2);
-
-        if ( result == IR_DO_INTERSECT )
-        {
-          addIntersect(t1,vpool);
-          segment->addIntersect(t2,vpool);
-        }
-
-
-      }
-    }
-  }
-
-  void addIntersect(NxF64 time,fm_VertexIndex *vpool)
-  {
-    Intersection intersect(time,mFrom,mTo,vpool);
-
-    if ( mE1 == intersect.mIndex || mE2 == intersect.mIndex )
-    {
-      //printf("Split too close to the beginning or the end of the line segment.\r\n");
-    }
-    else
-    {
-      if ( mIntersections.empty() )
-      {
-        mIntersections.pushBack(intersect);
-      }
-      else
-      {
-        IntersectionList::Iterator i;
-        for (i=mIntersections.begin(); i!=mIntersections.end(); ++i)
-        {
-          Intersection &it = (*i);
-          if ( it.mIndex == intersect.mIndex )
-          {
-            //printf("Duplicate Intersection, throwing it away.\r\n");
-            break;
-          }
-          else
-          {
-            if ( it.mTime > time )
-            {
-//*** TODO TODO TODO              mIntersections.insert(i,intersect);
-              break;
-            }
-          }
-        }
-        if ( i==mIntersections.end() )
-        {
-          mIntersections.pushBack(intersect);
-        }
-      }
-    }
-  }
-
-  void getResults(LineSegmentVector &results)
-  {
-    if ( mIntersections.empty() )
-    {
-      fm_LineSegment seg(mE1,mE2);
-      if ( mFlipped )
-      {
-        swap(seg.mE1,seg.mE2);
-      }
-      results.pushBack(seg);
-    }
-    else
-    {
-      NxU32 prev = mE1;
-      IntersectionList::Iterator i;
-      for (i=mIntersections.begin(); i!=mIntersections.end(); ++i)
-      {
-        Intersection &it = (*i);
-        fm_LineSegment seg(prev,it.mIndex);
-        if ( mFlipped )
-        {
-          swap(seg.mE1,seg.mE2);
-        }
-        results.pushBack(seg);
-        prev = it.mIndex;
-      }
-      fm_LineSegment seg(prev,mE2);
-      if ( mFlipped )
-      {
-        swap(seg.mE1,seg.mE2);
-      }
-      results.pushBack(seg);
-    }
-  }
-
-  void swap(NxU32 &a,NxU32 &b)
-  {
-    NxU32 temp = a;
-    a = b;
-    b = temp;
-  }
-
-  bool             mFlipped;
-  NxF64            mFrom[3];
-  NxF64            mTo[3];
-  NxF64            mMin[3];
-  NxF64            mMax[3];
-  IntersectionList mIntersections;
-};
-
-typedef CONVEX_DECOMPOSITION::Array< MyLineSegment > MyLineSegmentVector;
-
-class MyLineSweep : public fm_LineSweep, public fm_quickSort, public Memalloc
-{
-public:
-  virtual ~MyLineSweep(void)
-  {
-
-  }
-  fm_LineSegment * performLineSweep(const fm_LineSegment *segments,NxU32 icount,const NxF64 *planeEquation,fm_VertexIndex *pool,NxU32 &scount)
-  {
-    fm_LineSegment *ret = 0;
-
-    FM_Axis axis = fm_getDominantAxis(planeEquation);
-    switch ( axis )
-    {
-      case FM_XAXIS:
-        mX = 1;
-        mY = 2;
-        mZ = 0;
-        break;
-      case FM_YAXIS:
-        mX = 0;
-        mY = 2;
-        mZ = 1;
-        break;
-      case FM_ZAXIS:
-        mX = 0;
-        mY = 1;
-        mZ = 2;
-        break;
-    }
-
-
-    mResults.clear();
-    scount = 0;
-
-    MyLineSegment *mls   = MEMALLOC_NEW(MyLineSegment)[icount];
-    MyLineSegment **mptr = (MyLineSegment **)MEMALLOC_MALLOC(sizeof(MyLineSegment *)*icount);
-
-    for (NxU32 i=0; i<icount; i++)
-    {
-      mls[i].init(segments[i],pool,mX);
-      mptr[i] = &mls[i];
-    }
-
-    qsort((void **)mptr,(NxI32)icount);
-
-    for (NxU32 i=0; i<icount; i++)
-    {
-      MyLineSegment *segment = mptr[i];
-      NxF64 esegment = segment->mTo[mX];
-      for (NxU32 j=i+1; j<icount; j++)
-      {
-        MyLineSegment *test = mptr[j];
-        if ( test->mFrom[mX] >= esegment )
-        {
-          break;
-        }
-        else
-        {
-          test->intersect(segment,mX,mY,mZ,pool);
-        }
-      }
-    }
-
-    for (NxU32 i=0; i<icount; i++)
-    {
-      MyLineSegment *segment = mptr[i];
-      segment->getResults(mResults);
-    }
-
-
-    delete []mls;
-    MEMALLOC_FREE(mptr);
-
-    if ( !mResults.empty() )
-    {
-      scount = (NxU32)mResults.size();
-      ret = &mResults[0];
-    }
-
-    return ret;
-  }
-
-	NxI32 compare(void **p1,void **p2)
-  {
-    NxI32 ret = 0;
-
-    MyLineSegment **m1 = (MyLineSegment **) p1;
-    MyLineSegment **m2 = (MyLineSegment **) p2;
-
-    MyLineSegment *s1 = *m1;
-    MyLineSegment *s2 = *m2;
-
-    if ( s1->mFrom[mX] < s2->mFrom[mX] )
-      ret = -1;
-    else if ( s1->mFrom[mX] > s2->mFrom[mX] )
-      ret = 1;
-    else if ( s1->mFrom[mY] < s2->mFrom[mY] )
-      ret = -1;
-    else if ( s1->mFrom[mY] > s2->mFrom[mY] )
-      ret = 1;
-
-    return ret;
-  }
-
-  NxU32              mX;  // index for the x-axis
-  NxU32              mY;  // index for the y-axis
-  NxU32              mZ;
-  fm_VertexIndex        *mfm_VertexIndex;
-  LineSegmentVector  mResults;
-};
-
-
-fm_LineSweep * fm_createLineSweep(void)
-{
-  MyLineSweep *mls = MEMALLOC_NEW(MyLineSweep);
-  return static_cast< fm_LineSweep *>(mls);
-}
-
-void        fm_releaseLineSweep(fm_LineSweep *sweep)
-{
-  MyLineSweep *mls = static_cast< MyLineSweep *>(sweep);
-  delete mls;
-}
-
-
-
-#endif  // End of LineSweep code
-
-
-
-
-REAL fm_computeBestFitAABB(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *bmin,REAL *bmax) // returns the diagonal distance
-{
-
-  const NxU8 *source = (const NxU8 *) points;
-
-	bmin[0] = points[0];
-	bmin[1] = points[1];
-	bmin[2] = points[2];
-
-	bmax[0] = points[0];
-	bmax[1] = points[1];
-	bmax[2] = points[2];
-
-
-  for (NxU32 i=1; i<vcount; i++)
-  {
-  	source+=pstride;
-  	const REAL *p = (const REAL *) source;
-
-  	if ( p[0] < bmin[0] ) bmin[0] = p[0];
-  	if ( p[1] < bmin[1] ) bmin[1] = p[1];
-  	if ( p[2] < bmin[2] ) bmin[2] = p[2];
-
-		if ( p[0] > bmax[0] ) bmax[0] = p[0];
-		if ( p[1] > bmax[1] ) bmax[1] = p[1];
-		if ( p[2] > bmax[2] ) bmax[2] = p[2];
-
-  }
-
-  REAL dx = bmax[0] - bmin[0];
-  REAL dy = bmax[1] - bmin[1];
-  REAL dz = bmax[2] - bmin[2];
-
-	return (REAL) sqrt( dx*dx + dy*dy + dz*dz );
-
-}
-
-
-
-/* a = b - c */
-#define vector(a,b,c) \
-	(a)[0] = (b)[0] - (c)[0];	\
-	(a)[1] = (b)[1] - (c)[1];	\
-	(a)[2] = (b)[2] - (c)[2];
-
-
-
-#define innerProduct(v,q) \
-		((v)[0] * (q)[0] + \
-		(v)[1] * (q)[1] + \
-		(v)[2] * (q)[2])
-
-#define crossProduct(a,b,c) \
-	(a)[0] = (b)[1] * (c)[2] - (c)[1] * (b)[2]; \
-	(a)[1] = (b)[2] * (c)[0] - (c)[2] * (b)[0]; \
-	(a)[2] = (b)[0] * (c)[1] - (c)[0] * (b)[1];
-
-
-bool fm_lineIntersectsTriangle(const REAL *rayStart,const REAL *rayEnd,const REAL *p1,const REAL *p2,const REAL *p3,REAL *sect)
-{
-	REAL dir[3];
-
-  dir[0] = rayEnd[0] - rayStart[0];
-  dir[1] = rayEnd[1] - rayStart[1];
-  dir[2] = rayEnd[2] - rayStart[2];
-
-  REAL d = (REAL)sqrt(dir[0]*dir[0] + dir[1]*dir[1] + dir[2]*dir[2]);
-  REAL r = 1.0f / d;
-
-  dir[0]*=r;
-  dir[1]*=r;
-  dir[2]*=r;
-
-
-  REAL t;
-
-	bool ret = fm_rayIntersectsTriangle(rayStart, dir, p1, p2, p3, t );
-
-	if ( ret )
-	{
-		if ( t > d )
-		{
-			sect[0] = rayStart[0] + dir[0]*t;
-			sect[1] = rayStart[1] + dir[1]*t;
-			sect[2] = rayStart[2] + dir[2]*t;
-		}
-		else
-		{
-			ret = false;
-		}
-	}
-
-  return ret;
-}
-
-
-
-bool fm_rayIntersectsTriangle(const REAL *p,const REAL *d,const REAL *v0,const REAL *v1,const REAL *v2,REAL &t)
-{
-	REAL e1[3],e2[3],h[3],s[3],q[3];
-	REAL a,f,u,v;
-
-	vector(e1,v1,v0);
-	vector(e2,v2,v0);
-	crossProduct(h,d,e2);
-	a = innerProduct(e1,h);
-
-	if (a > -0.00001 && a < 0.00001)
-		return(false);
-
-	f = 1/a;
-	vector(s,p,v0);
-	u = f * (innerProduct(s,h));
-
-	if (u < 0.0 || u > 1.0)
-		return(false);
-
-	crossProduct(q,s,e1);
-	v = f * innerProduct(d,q);
-	if (v < 0.0 || u + v > 1.0)
-		return(false);
-	// at this stage we can compute t to find out where
-	// the intersection point is on the line
-	t = f * innerProduct(e2,q);
-	if (t > 0) // ray intersection
-		return(true);
-	else // this means that there is a line intersection
-		 // but not a ray intersection
-		 return (false);
-}
-
-
-inline REAL det(const REAL *p1,const REAL *p2,const REAL *p3)
-{
-  return  p1[0]*p2[1]*p3[2] + p2[0]*p3[1]*p1[2] + p3[0]*p1[1]*p2[2] -p1[0]*p3[1]*p2[2] - p2[0]*p1[1]*p3[2] - p3[0]*p2[1]*p1[2];
-}
-
-
-REAL  fm_computeMeshVolume(const REAL *vertices,NxU32 tcount,const NxU32 *indices)
-{
-	REAL volume = 0;
-
-	for (NxU32 i=0; i<tcount; i++,indices+=3)
-	{
-  	const REAL *p1 = &vertices[ indices[0]*3 ];
-		const REAL *p2 = &vertices[ indices[1]*3 ];
-		const REAL *p3 = &vertices[ indices[2]*3 ];
-		volume+=det(p1,p2,p3); // compute the volume of the tetrahedran relative to the origin.
-	}
-
-	volume*=(1.0f/6.0f);
-	if ( volume < 0 )
-		volume*=-1;
-	return volume;
-}
-
-
-const REAL * fm_getPoint(const REAL *points,NxU32 pstride,NxU32 index)
-{
-  const NxU8 *scan = (const NxU8 *)points;
-  scan+=(index*pstride);
-  return (REAL *)scan;
-}
-
-
-bool fm_insideTriangle(REAL Ax, REAL Ay,
-                      REAL Bx, REAL By,
-                      REAL Cx, REAL Cy,
-                      REAL Px, REAL Py)
-
-{
-  REAL ax, ay, bx, by, cx, cy, apx, apy, bpx, bpy, cpx, cpy;
-  REAL cCROSSap, bCROSScp, aCROSSbp;
-
-  ax = Cx - Bx;  ay = Cy - By;
-  bx = Ax - Cx;  by = Ay - Cy;
-  cx = Bx - Ax;  cy = By - Ay;
-  apx= Px - Ax;  apy= Py - Ay;
-  bpx= Px - Bx;  bpy= Py - By;
-  cpx= Px - Cx;  cpy= Py - Cy;
-
-  aCROSSbp = ax*bpy - ay*bpx;
-  cCROSSap = cx*apy - cy*apx;
-  bCROSScp = bx*cpy - by*cpx;
-
-  return ((aCROSSbp >= 0.0f) && (bCROSScp >= 0.0f) && (cCROSSap >= 0.0f));
-}
-
-
-REAL fm_areaPolygon2d(NxU32 pcount,const REAL *points,NxU32 pstride)
-{
-  NxI32 n = (NxI32)pcount;
-
-  REAL A=0.0f;
-  for(NxI32 p=n-1,q=0; q<n; p=q++)
-  {
-    const REAL *p1 = fm_getPoint(points,pstride,p);
-    const REAL *p2 = fm_getPoint(points,pstride,q);
-    A+= p1[0]*p2[1] - p2[0]*p1[1];
-  }
-  return A*0.5f;
-}
-
-
-bool  fm_pointInsidePolygon2d(NxU32 pcount,const REAL *points,NxU32 pstride,const REAL *point,NxU32 xindex,NxU32 yindex)
-{
-  NxU32 j = pcount-1;
-  NxI32 oddNodes = 0;
-
-  REAL x = point[xindex];
-  REAL y = point[yindex];
-
-  for (NxU32 i=0; i<pcount; i++)
-  {
-    const REAL *p1 = fm_getPoint(points,pstride,i);
-    const REAL *p2 = fm_getPoint(points,pstride,j);
-
-    REAL x1 = p1[xindex];
-    REAL y1 = p1[yindex];
-
-    REAL x2 = p2[xindex];
-    REAL y2 = p2[yindex];
-
-    if ( y1 < y && y2 >= y ||  y2 < y && y1 >= y )
-    {
-      if (x1+(y-y1)/(y2-y1)*(x2-x1)<x)
-      {
-        oddNodes = 1-oddNodes;
-      }
-    }
-    j = i;
-  }
-
-  return oddNodes ? true : false;
-}
-
-
-NxU32 fm_consolidatePolygon(NxU32 pcount,const REAL *points,NxU32 pstride,REAL *_dest,REAL epsilon) // collapses co-linear edges.
-{
-  NxU32 ret = 0;
-
-
-  if ( pcount >= 3 )
-  {
-    const REAL *prev = fm_getPoint(points,pstride,pcount-1);
-    const REAL *current = points;
-    const REAL *next    = fm_getPoint(points,pstride,1);
-    REAL *dest = _dest;
-
-    for (NxU32 i=0; i<pcount; i++)
-    {
-
-      next = (i+1)==pcount ? points : next;
-
-      if ( !fm_colinear(prev,current,next,epsilon) )
-      {
-        dest[0] = current[0];
-        dest[1] = current[1];
-        dest[2] = current[2];
-
-        dest+=3;
-        ret++;
-      }
-
-      prev = current;
-      current+=3;
-      next+=3;
-
-    }
-  }
-
-  return ret;
-}
-
-
-#ifndef RECT3D_TEMPLATE
-
-#define RECT3D_TEMPLATE
-
-template <class T> class Rect3d
-{
-public:
-  Rect3d(void) { };
-
-  Rect3d(const T *bmin,const T *bmax)
-  {
-
-    mMin[0] = bmin[0];
-    mMin[1] = bmin[1];
-    mMin[2] = bmin[2];
-
-    mMax[0] = bmax[0];
-    mMax[1] = bmax[1];
-    mMax[2] = bmax[2];
-
-  }
-
-  void SetMin(const T *bmin)
-  {
-    mMin[0] = bmin[0];
-    mMin[1] = bmin[1];
-    mMin[2] = bmin[2];
-  }
-
-  void SetMax(const T *bmax)
-  {
-    mMax[0] = bmax[0];
-    mMax[1] = bmax[1];
-    mMax[2] = bmax[2];
-  }
-
-	void SetMin(T x,T y,T z)
-	{
-		mMin[0] = x;
-		mMin[1] = y;
-		mMin[2] = z;
-	}
-
-	void SetMax(T x,T y,T z)
-	{
-		mMax[0] = x;
-		mMax[1] = y;
-		mMax[2] = z;
-	}
-
-  T mMin[3];
-  T mMax[3];
-};
-
-#endif
-
-void splitRect(NxU32 axis,
-						   const Rect3d<REAL> &source,
-							 Rect3d<REAL> &b1,
-							 Rect3d<REAL> &b2,
-							 const REAL *midpoint)
-{
-	switch ( axis )
-	{
-		case 0:
-			b1.SetMin(source.mMin);
-			b1.SetMax( midpoint[0], source.mMax[1], source.mMax[2] );
-
-			b2.SetMin( midpoint[0], source.mMin[1], source.mMin[2] );
-			b2.SetMax(source.mMax);
-
-			break;
-		case 1:
-			b1.SetMin(source.mMin);
-			b1.SetMax( source.mMax[0], midpoint[1], source.mMax[2] );
-
-			b2.SetMin( source.mMin[0], midpoint[1], source.mMin[2] );
-			b2.SetMax(source.mMax);
-
-			break;
-		case 2:
-			b1.SetMin(source.mMin);
-			b1.SetMax( source.mMax[0], source.mMax[1], midpoint[2] );
-
-			b2.SetMin( source.mMin[0], source.mMin[1], midpoint[2] );
-			b2.SetMax(source.mMax);
-
-			break;
-	}
-}
-
-bool fm_computeSplitPlane(NxU32 vcount,
-                          const REAL *vertices,
-                          NxU32 /* tcount */,
-                          const NxU32 * /* indices */,
-                          REAL *plane)
-{
-
-  REAL sides[3];
-  REAL matrix[16];
-
-  fm_computeBestFitOBB( vcount, vertices, sizeof(REAL)*3, sides, matrix );
-
-  REAL bmax[3];
-  REAL bmin[3];
-
-  bmax[0] = sides[0]*0.5f;
-  bmax[1] = sides[1]*0.5f;
-  bmax[2] = sides[2]*0.5f;
-
-  bmin[0] = -bmax[0];
-  bmin[1] = -bmax[1];
-  bmin[2] = -bmax[2];
-
-
-  REAL dx = sides[0];
-  REAL dy = sides[1];
-  REAL dz = sides[2];
-
-
-	REAL laxis = dx;
-
-	NxU32 axis = 0;
-
-	if ( dy > dx )
-	{
-		axis = 1;
-		laxis = dy;
-	}
-
-	if ( dz > dx && dz > dy )
-	{
-		axis = 2;
-		laxis = dz;
-	}
-
-  REAL p1[3];
-  REAL p2[3];
-  REAL p3[3];
-
-  p3[0] = p2[0] = p1[0] = bmin[0] + dx*0.5f;
-  p3[1] = p2[1] = p1[1] = bmin[1] + dy*0.5f;
-  p3[2] = p2[2] = p1[2] = bmin[2] + dz*0.5f;
-
-  Rect3d<REAL> b(bmin,bmax);
-
-  Rect3d<REAL> b1,b2;
-
-  splitRect(axis,b,b1,b2,p1);
-
-
-  switch ( axis )
-  {
-    case 0:
-      p2[1] = bmin[1];
-      p2[2] = bmin[2];
-
-      if ( dz > dy )
-      {
-        p3[1] = bmax[1];
-        p3[2] = bmin[2];
-      }
-      else
-      {
-        p3[1] = bmin[1];
-        p3[2] = bmax[2];
-      }
-
-      break;
-    case 1:
-      p2[0] = bmin[0];
-      p2[2] = bmin[2];
-
-      if ( dx > dz )
-      {
-        p3[0] = bmax[0];
-        p3[2] = bmin[2];
-      }
-      else
-      {
-        p3[0] = bmin[0];
-        p3[2] = bmax[2];
-      }
-
-      break;
-    case 2:
-      p2[0] = bmin[0];
-      p2[1] = bmin[1];
-
-      if ( dx > dy )
-      {
-        p3[0] = bmax[0];
-        p3[1] = bmin[1];
-      }
-      else
-      {
-        p3[0] = bmin[0];
-        p3[1] = bmax[1];
-      }
-
-      break;
-  }
-
-  REAL tp1[3];
-  REAL tp2[3];
-  REAL tp3[3];
-
-  fm_transform(matrix,p1,tp1);
-  fm_transform(matrix,p2,tp2);
-  fm_transform(matrix,p3,tp3);
-
-	plane[3] = fm_computePlane(tp1,tp2,tp3,plane);
-
-  return true;
-
-}
-
-#pragma warning(disable:4100)
-
-void fm_nearestPointInTriangle(const REAL *nearestPoint,const REAL *p1,const REAL *p2,const REAL *p3,REAL *nearest)
-{
-
-}
-
-static REAL Partial(const REAL *a,const REAL *p) 
-{
-	return (a[0]*p[1]) - (p[0]*a[1]);
-}
-
-REAL  fm_areaTriangle(const REAL *p0,const REAL *p1,const REAL *p2)
-{
-  REAL A = Partial(p0,p1);
-	A+= Partial(p1,p2);
-	A+= Partial(p2,p0);
-	return A*0.5f;
-}
-
-void fm_subtract(const REAL *A,const REAL *B,REAL *diff) // compute A-B and store the result in 'diff'
-{
-  diff[0] = A[0]-B[0];
-  diff[1] = A[1]-B[1];
-  diff[2] = A[2]-B[2];
-}
-
-
-void  fm_multiplyTransform(const REAL *pA,const REAL *pB,REAL *pM)
-{
-
-  REAL a = pA[0*4+0] * pB[0*4+0] + pA[0*4+1] * pB[1*4+0] + pA[0*4+2] * pB[2*4+0] + pA[0*4+3] * pB[3*4+0];
-  REAL b = pA[0*4+0] * pB[0*4+1] + pA[0*4+1] * pB[1*4+1] + pA[0*4+2] * pB[2*4+1] + pA[0*4+3] * pB[3*4+1];
-  REAL c = pA[0*4+0] * pB[0*4+2] + pA[0*4+1] * pB[1*4+2] + pA[0*4+2] * pB[2*4+2] + pA[0*4+3] * pB[3*4+2];
-  REAL d = pA[0*4+0] * pB[0*4+3] + pA[0*4+1] * pB[1*4+3] + pA[0*4+2] * pB[2*4+3] + pA[0*4+3] * pB[3*4+3];
-
-  REAL e = pA[1*4+0] * pB[0*4+0] + pA[1*4+1] * pB[1*4+0] + pA[1*4+2] * pB[2*4+0] + pA[1*4+3] * pB[3*4+0];
-  REAL f = pA[1*4+0] * pB[0*4+1] + pA[1*4+1] * pB[1*4+1] + pA[1*4+2] * pB[2*4+1] + pA[1*4+3] * pB[3*4+1];
-  REAL g = pA[1*4+0] * pB[0*4+2] + pA[1*4+1] * pB[1*4+2] + pA[1*4+2] * pB[2*4+2] + pA[1*4+3] * pB[3*4+2];
-  REAL h = pA[1*4+0] * pB[0*4+3] + pA[1*4+1] * pB[1*4+3] + pA[1*4+2] * pB[2*4+3] + pA[1*4+3] * pB[3*4+3];
-
-  REAL i = pA[2*4+0] * pB[0*4+0] + pA[2*4+1] * pB[1*4+0] + pA[2*4+2] * pB[2*4+0] + pA[2*4+3] * pB[3*4+0];
-  REAL j = pA[2*4+0] * pB[0*4+1] + pA[2*4+1] * pB[1*4+1] + pA[2*4+2] * pB[2*4+1] + pA[2*4+3] * pB[3*4+1];
-  REAL k = pA[2*4+0] * pB[0*4+2] + pA[2*4+1] * pB[1*4+2] + pA[2*4+2] * pB[2*4+2] + pA[2*4+3] * pB[3*4+2];
-  REAL l = pA[2*4+0] * pB[0*4+3] + pA[2*4+1] * pB[1*4+3] + pA[2*4+2] * pB[2*4+3] + pA[2*4+3] * pB[3*4+3];
-
-  REAL m = pA[3*4+0] * pB[0*4+0] + pA[3*4+1] * pB[1*4+0] + pA[3*4+2] * pB[2*4+0] + pA[3*4+3] * pB[3*4+0];
-  REAL n = pA[3*4+0] * pB[0*4+1] + pA[3*4+1] * pB[1*4+1] + pA[3*4+2] * pB[2*4+1] + pA[3*4+3] * pB[3*4+1];
-  REAL o = pA[3*4+0] * pB[0*4+2] + pA[3*4+1] * pB[1*4+2] + pA[3*4+2] * pB[2*4+2] + pA[3*4+3] * pB[3*4+2];
-  REAL p = pA[3*4+0] * pB[0*4+3] + pA[3*4+1] * pB[1*4+3] + pA[3*4+2] * pB[2*4+3] + pA[3*4+3] * pB[3*4+3];
-
-  pM[0] = a;  pM[1] = b;  pM[2] = c;  pM[3] = d;
-
-  pM[4] = e;  pM[5] = f;  pM[6] = g;  pM[7] = h;
-
-  pM[8] = i;  pM[9] = j;  pM[10] = k;  pM[11] = l;
-
-  pM[12] = m;  pM[13] = n;  pM[14] = o;  pM[15] = p;
-}
-
-void fm_multiply(REAL *A,REAL scaler)
-{
-  A[0]*=scaler;
-  A[1]*=scaler;
-  A[2]*=scaler;
-}
-
-void fm_add(const REAL *A,const REAL *B,REAL *sum)
-{
-  sum[0] = A[0]+B[0];
-  sum[1] = A[1]+B[1];
-  sum[2] = A[2]+B[2];
-}
-
-void fm_copy3(const REAL *source,REAL *dest)
-{
-  dest[0] = source[0];
-  dest[1] = source[1];
-  dest[2] = source[2];
-}
-
-
-NxU32  fm_copyUniqueVertices(NxU32 vcount,const REAL *input_vertices,REAL *output_vertices,NxU32 tcount,const NxU32 *input_indices,NxU32 *output_indices)
-{
-  NxU32 ret = 0;
-
-  REAL *vertices = (REAL *)MEMALLOC_MALLOC(sizeof(REAL)*vcount*3);
-  memcpy(vertices,input_vertices,sizeof(REAL)*vcount*3);
-  REAL *dest = output_vertices;
-
-  NxU32 *reindex = (NxU32 *)MEMALLOC_MALLOC(sizeof(NxU32)*vcount);
-  memset(reindex,0xFF,sizeof(NxU32)*vcount);
-
-  NxU32 icount = tcount*3;
-
-  for (NxU32 i=0; i<icount; i++)
-  {
-    NxU32 index = *input_indices++;
-
-    assert( index < vcount );
-
-    if ( reindex[index] == 0xFFFFFFFF )
-    {
-      *output_indices++ = ret;
-      reindex[index] = ret;
-      const REAL *pos = &vertices[index*3];
-      dest[0] = pos[0];
-      dest[1] = pos[1];
-      dest[2] = pos[2];
-      dest+=3;
-      ret++;
-    }
-    else
-    {
-      *output_indices++ = reindex[index];
-    }
-  }
-  MEMALLOC_FREE(vertices);
-  MEMALLOC_FREE(reindex);
-  return ret;
-}
-
-bool    fm_isMeshCoplanar(NxU32 tcount,const NxU32 *indices,const REAL *vertices,bool doubleSided) // returns true if this collection of indexed triangles are co-planar!
-{
-  bool ret = true;
-
-  if ( tcount > 0 )
-  {
-    NxU32 i1 = indices[0];
-    NxU32 i2 = indices[1];
-    NxU32 i3 = indices[2];
-    const REAL *p1 = &vertices[i1*3];
-    const REAL *p2 = &vertices[i2*3];
-    const REAL *p3 = &vertices[i3*3];
-    REAL plane[4];
-    plane[3] = fm_computePlane(p1,p2,p3,plane);
-    const NxU32 *scan = &indices[3];
-    for (NxU32 i=1; i<tcount; i++)
-    {
-      i1 = *scan++;
-      i2 = *scan++;
-      i3 = *scan++;
-      p1 = &vertices[i1*3];
-      p2 = &vertices[i2*3];
-      p3 = &vertices[i3*3];
-      REAL _plane[4];
-      _plane[3] = fm_computePlane(p1,p2,p3,_plane);
-      if ( !fm_samePlane(plane,_plane,0.01f,0.001f,doubleSided) )
-      {
-        ret = false;
-        break;
-      }
-    }
-  }
-  return ret;
-}
-
-
-bool fm_samePlane(const REAL p1[4],const REAL p2[4],REAL normalEpsilon,REAL dEpsilon,bool doubleSided)
-{
-  bool ret = false;
-
-  REAL diff = (REAL) fabs(p1[3]-p2[3]);
-  if ( diff < dEpsilon ) // if the plane -d  co-efficient is within our epsilon
-  {
-    REAL dot = fm_dot(p1,p2); // compute the dot-product of the vector normals.
-    if ( doubleSided ) dot = (REAL)fabs(dot);
-    REAL dmin = 1 - normalEpsilon;
-    REAL dmax = 1 + normalEpsilon;
-    if ( dot >= dmin && dot <= dmax )
-    {
-      ret = true; // then the plane equation is for practical purposes identical.
-    }
-  }
-
-  return ret;
-}
-
-
-void  fm_initMinMax(REAL bmin[3],REAL bmax[3])
-{
-  bmin[0] = FLT_MAX;
-  bmin[1] = FLT_MAX;
-  bmin[2] = FLT_MAX;
-  bmax[0] = FLT_MIN;
-  bmax[1] = FLT_MIN;
-  bmax[2] = FLT_MIN;
-}
-
-
-#ifndef TESSELATE_H
-
-#define TESSELATE_H
-
-typedef CONVEX_DECOMPOSITION::Array< NxU32 > UintVector;
-
-class Myfm_Tesselate : public fm_Tesselate, public Memalloc
-{
-public:
-  virtual ~Myfm_Tesselate(void)
-  {
-
-  }
-
-  const NxU32 * tesselate(fm_VertexIndex *vindex,NxU32 tcount,const NxU32 *indices,NxF32 longEdge,NxU32 maxDepth,NxU32 &outcount)
-  {
-    const NxU32 *ret = 0;
-
-    mMaxDepth = maxDepth;
-    mLongEdge  = longEdge*longEdge;
-    mLongEdgeD = mLongEdge;
-    mVertices = vindex;
-
-    if ( mVertices->isDouble() )
-    {
-      NxU32 vcount = mVertices->getVcount();
-      NxF64 *vertices = (NxF64 *)MEMALLOC_MALLOC(sizeof(NxF64)*vcount*3);
-      memcpy(vertices,mVertices->getVerticesDouble(),sizeof(NxF64)*vcount*3);
-
-      for (NxU32 i=0; i<tcount; i++)
-      {
-        NxU32 i1 = *indices++;
-        NxU32 i2 = *indices++;
-        NxU32 i3 = *indices++;
-
-        const NxF64 *p1 = &vertices[i1*3];
-        const NxF64 *p2 = &vertices[i2*3];
-        const NxF64 *p3 = &vertices[i3*3];
-
-        tesselate(p1,p2,p3,0);
-
-      }
-      MEMALLOC_FREE(vertices);
-    }
-    else
-    {
-      NxU32 vcount = mVertices->getVcount();
-      NxF32 *vertices = (NxF32 *)MEMALLOC_MALLOC(sizeof(NxF32)*vcount*3);
-      memcpy(vertices,mVertices->getVerticesFloat(),sizeof(NxF32)*vcount*3);
-
-
-      for (NxU32 i=0; i<tcount; i++)
-      {
-        NxU32 i1 = *indices++;
-        NxU32 i2 = *indices++;
-        NxU32 i3 = *indices++;
-
-        const NxF32 *p1 = &vertices[i1*3];
-        const NxF32 *p2 = &vertices[i2*3];
-        const NxF32 *p3 = &vertices[i3*3];
-
-        tesselate(p1,p2,p3,0);
-
-      }
-      MEMALLOC_FREE(vertices);
-    }
-
-    outcount = (NxU32)(mIndices.size()/3);
-    ret = &mIndices[0];
-
-
-    return ret;
-  }
-
-  void tesselate(const NxF32 *p1,const NxF32 *p2,const NxF32 *p3,NxU32 recurse)
-  {
-  	bool split = false;
-  	NxF32 l1,l2,l3;
-
-    l1 = l2 = l3 = 0;
-
-  	if ( recurse < mMaxDepth )
-  	{
-  	  l1 = fm_distanceSquared(p1,p2);
-    	l2 = fm_distanceSquared(p2,p3);
-    	l3 = fm_distanceSquared(p3,p1);
-
-  	  if (  l1 > mLongEdge || l2 > mLongEdge || l3 > mLongEdge )
-  	  	split = true;
-
-    }
-
-    if ( split )
-  	{
-  		NxU32 edge;
-
-  		if ( l1 >= l2 && l1 >= l3 )
-  			edge = 0;
-  		else if ( l2 >= l1 && l2 >= l3 )
-  			edge = 1;
-  		else
-  			edge = 2;
-
-			NxF32 split[3];
-
-  		switch ( edge )
-  		{
-  			case 0:
-  				{
-            fm_lerp(p1,p2,split,0.5f);
-            tesselate(p1,split,p3, recurse+1 );
-            tesselate(split,p2,p3, recurse+1 );
-  				}
-  				break;
-  			case 1:
-  				{
-            fm_lerp(p2,p3,split,0.5f);
-            tesselate(p1,p2,split, recurse+1 );
-            tesselate(p1,split,p3, recurse+1 );
-  				}
-  				break;
-  			case 2:
-  				{
-  					fm_lerp(p3,p1,split,0.5f);
-            tesselate(p1,p2,split, recurse+1 );
-            tesselate(split,p2,p3, recurse+1 );
-  				}
-  				break;
-  		}
-  	}
-  	else
-  	{
-      bool newp;
-
-      NxU32 i1 = mVertices->getIndex(p1,newp);
-      NxU32 i2 = mVertices->getIndex(p2,newp);
-      NxU32 i3 = mVertices->getIndex(p3,newp);
-
-      mIndices.pushBack(i1);
-      mIndices.pushBack(i2);
-      mIndices.pushBack(i3);
-    }
-
-  }
-
-  void tesselate(const NxF64 *p1,const NxF64 *p2,const NxF64 *p3,NxU32 recurse)
-  {
-  	bool split = false;
-  	NxF64 l1,l2,l3;
-
-    l1 = l2 = l3 = 0;
-
-  	if ( recurse < mMaxDepth )
-  	{
-  	  l1 = fm_distanceSquared(p1,p2);
-    	l2 = fm_distanceSquared(p2,p3);
-    	l3 = fm_distanceSquared(p3,p1);
-
-  	  if (  l1 > mLongEdgeD || l2 > mLongEdgeD || l3 > mLongEdgeD )
-  	  	split = true;
-
-    }
-
-    if ( split )
-  	{
-  		NxU32 edge;
-
-  		if ( l1 >= l2 && l1 >= l3 )
-  			edge = 0;
-  		else if ( l2 >= l1 && l2 >= l3 )
-  			edge = 1;
-  		else
-  			edge = 2;
-
-			NxF64 split[3];
-
-  		switch ( edge )
-  		{
-  			case 0:
-  				{
-            fm_lerp(p1,p2,split,0.5);
-            tesselate(p1,split,p3, recurse+1 );
-            tesselate(split,p2,p3, recurse+1 );
-  				}
-  				break;
-  			case 1:
-  				{
-            fm_lerp(p2,p3,split,0.5);
-            tesselate(p1,p2,split, recurse+1 );
-            tesselate(p1,split,p3, recurse+1 );
-  				}
-  				break;
-  			case 2:
-  				{
-  					fm_lerp(p3,p1,split,0.5);
-            tesselate(p1,p2,split, recurse+1 );
-            tesselate(split,p2,p3, recurse+1 );
-  				}
-  				break;
-  		}
-  	}
-  	else
-  	{
-      bool newp;
-
-      NxU32 i1 = mVertices->getIndex(p1,newp);
-      NxU32 i2 = mVertices->getIndex(p2,newp);
-      NxU32 i3 = mVertices->getIndex(p3,newp);
-
-      mIndices.pushBack(i1);
-      mIndices.pushBack(i2);
-      mIndices.pushBack(i3);
-    }
-
-  }
-
-private:
-  NxF32           mLongEdge;
-  NxF64          mLongEdgeD;
-  fm_VertexIndex *mVertices;
-  UintVector    mIndices;
-  NxU32          mMaxDepth;
-};
-
-fm_Tesselate * fm_createTesselate(void)
-{
-  Myfm_Tesselate *m = MEMALLOC_NEW(Myfm_Tesselate);
-  return static_cast< fm_Tesselate * >(m);
-}
-
-void           fm_releaseTesselate(fm_Tesselate *t)
-{
-  Myfm_Tesselate *m = static_cast< Myfm_Tesselate *>(t);
-  delete m;
-}
-
-#endif
-
-
-#ifndef RAY_ABB_INTERSECT
-
-#define RAY_ABB_INTERSECT
-
-//! Integer representation of a floating-point value.
-#define IR(x)	((NxU32&)x)
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-/**
-*	A method to compute a ray-AABB intersection.
-*	Original code by Andrew Woo, from "Graphics Gems", Academic Press, 1990
-*	Optimized code by Pierre Terdiman, 2000 (~20-30% faster on my Celeron 500)
-*	Epsilon value added by Klaus Hartmann. (discarding it saves a few cycles only)
-*
-*	Hence this version is faster as well as more robust than the original one.
-*
-*	Should work provided:
-*	1) the integer representation of 0.0f is 0x00000000
-*	2) the sign bit of the NxF32 is the most significant one
-*
-*	Report bugs: [email protected]
-*
-*	\param		aabb		[in] the axis-aligned bounding box
-*	\param		origin		[in] ray origin
-*	\param		dir			[in] ray direction
-*	\param		coord		[out] impact coordinates
-*	\return		true if ray intersects AABB
-*/
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-#define RAYAABB_EPSILON 0.00001f
-bool fm_intersectRayAABB(const NxF32 MinB[3],const NxF32 MaxB[3],const NxF32 origin[3],const NxF32 dir[3],NxF32 coord[3])
-{
-  bool Inside = true;
-  NxF32 MaxT[3];
-  MaxT[0]=MaxT[1]=MaxT[2]=-1.0f;
-
-  // Find candidate planes.
-  for(NxU32 i=0;i<3;i++)
-  {
-    if(origin[i] < MinB[i])
-    {
-      coord[i]	= MinB[i];
-      Inside		= false;
-
-      // Calculate T distances to candidate planes
-      if(IR(dir[i]))	MaxT[i] = (MinB[i] - origin[i]) / dir[i];
-    }
-    else if(origin[i] > MaxB[i])
-    {
-      coord[i]	= MaxB[i];
-      Inside		= false;
-
-      // Calculate T distances to candidate planes
-      if(IR(dir[i]))	MaxT[i] = (MaxB[i] - origin[i]) / dir[i];
-    }
-  }
-
-  // Ray origin inside bounding box
-  if(Inside)
-  {
-    coord[0] = origin[0];
-    coord[1] = origin[1];
-    coord[2] = origin[2];
-    return true;
-  }
-
-  // Get largest of the maxT's for final choice of intersection
-  NxU32 WhichPlane = 0;
-  if(MaxT[1] > MaxT[WhichPlane])	WhichPlane = 1;
-  if(MaxT[2] > MaxT[WhichPlane])	WhichPlane = 2;
-
-  // Check final candidate actually inside box
-  if(IR(MaxT[WhichPlane])&0x80000000) return false;
-
-  for(NxU32 i=0;i<3;i++)
-  {
-    if(i!=WhichPlane)
-    {
-      coord[i] = origin[i] + MaxT[WhichPlane] * dir[i];
-#ifdef RAYAABB_EPSILON
-      if(coord[i] < MinB[i] - RAYAABB_EPSILON || coord[i] > MaxB[i] + RAYAABB_EPSILON)	return false;
-#else
-      if(coord[i] < MinB[i] || coord[i] > MaxB[i])	return false;
-#endif
-    }
-  }
-  return true;	// ray hits box
-}
-
-bool fm_intersectLineSegmentAABB(const NxF32 bmin[3],const NxF32 bmax[3],const NxF32 p1[3],const NxF32 p2[3],NxF32 intersect[3])
-{
-  bool ret = false;
-
-  NxF32 dir[3];
-  dir[0] = p2[0] - p1[0];
-  dir[1] = p2[1] - p1[1];
-  dir[2] = p2[2] - p1[2];
-  NxF32 dist = fm_normalize(dir);
-  if ( dist > RAYAABB_EPSILON )
-  {
-    ret = fm_intersectRayAABB(bmin,bmax,p1,dir,intersect);
-    if ( ret )
-    {
-      NxF32 d = fm_distanceSquared(p1,intersect);
-      if ( d  > (dist*dist) )
-      {
-        ret = false;
-      }
-    }
-  }
-  return ret;
-}
-
-#endif
-
-#ifndef OBB_TO_AABB
-
-#define OBB_TO_AABB
-
-#pragma warning(disable:4100)
-void    fm_OBBtoAABB(const NxF32 obmin[3],const NxF32 obmax[3],const NxF32 matrix[16],NxF32 abmin[3],NxF32 abmax[3])
-{
-  assert(0); // not yet implemented.
-}
-
-
-const REAL * computePos(NxU32 index,const REAL *vertices,NxU32 vstride)
-{
-  const char *tmp = (const char *)vertices;
-  tmp+=(index*vstride);
-  return (const REAL*)tmp;
-}
-
-void computeNormal(NxU32 index,REAL *normals,NxU32 nstride,const REAL *normal)
-{
-  char *tmp = (char *)normals;
-  tmp+=(index*nstride);
-  REAL *dest = (REAL *)tmp;
-  dest[0]+=normal[0];
-  dest[1]+=normal[1];
-  dest[2]+=normal[2];
-}
-
-void fm_computeMeanNormals(NxU32 vcount,       // the number of vertices
-                           const REAL *vertices,     // the base address of the vertex position data.
-                           NxU32 vstride,      // the stride between position data.
-                           REAL *normals,            // the base address  of the destination for mean vector normals
-                           NxU32 nstride,      // the stride between normals
-                           NxU32 tcount,       // the number of triangles
-                           const NxU32 *indices)     // the triangle indices
-{
-
-  // Step #1 : Zero out the vertex normals
-  char *dest = (char *)normals;
-  for (NxU32 i=0; i<vcount; i++)
-  {
-    REAL *n = (REAL *)dest;
-    n[0] = 0;
-    n[1] = 0;
-    n[2] = 0;
-    dest+=nstride;
-  }
-
-  // Step #2 : Compute the face normals and accumulate them
-  const NxU32 *scan = indices;
-  for (NxU32 i=0; i<tcount; i++)
-  {
-
-    NxU32 i1 = *scan++;
-    NxU32 i2 = *scan++;
-    NxU32 i3 = *scan++;
-
-    const REAL *p1 = computePos(i1,vertices,vstride);
-    const REAL *p2 = computePos(i2,vertices,vstride);
-    const REAL *p3 = computePos(i3,vertices,vstride);
-
-    REAL normal[3];
-    fm_computePlane(p3,p2,p1,normal);
-
-    computeNormal(i1,normals,nstride,normal);
-    computeNormal(i2,normals,nstride,normal);
-    computeNormal(i3,normals,nstride,normal);
-  }
-
-
-  // Normalize the accumulated normals
-  dest = (char *)normals;
-  for (NxU32 i=0; i<vcount; i++)
-  {
-    REAL *n = (REAL *)dest;
-    fm_normalize(n);
-    dest+=nstride;
-  }
-
-}
-
-#endif
-
-
-#define BIGNUMBER 100000000.0  		/* hundred million */
-
-static inline void Set(REAL *n,REAL x,REAL y,REAL z)
-{
-	n[0] = x;
-	n[1] = y;
-	n[2] = z;
-};
-
-static inline void Copy(REAL *dest,const REAL *source)
-{
-	dest[0] = source[0];
-	dest[1] = source[1];
-	dest[2] = source[2];
-}
-
-
-REAL  fm_computeBestFitSphere(NxU32 vcount,const REAL *points,NxU32 pstride,REAL *center)
-{
-  REAL radius;
-  REAL radius2;
-
-	REAL xmin[3];
-	REAL xmax[3];
-	REAL ymin[3];
-	REAL ymax[3];
-	REAL zmin[3];
-	REAL zmax[3];
-	REAL dia1[3];
-	REAL dia2[3];
-
-  /* FIRST PASS: find 6 minima/maxima points */
-  Set(xmin,BIGNUMBER,BIGNUMBER,BIGNUMBER);
-  Set(xmax,-BIGNUMBER,-BIGNUMBER,-BIGNUMBER);
-  Set(ymin,BIGNUMBER,BIGNUMBER,BIGNUMBER);
-  Set(ymax,-BIGNUMBER,-BIGNUMBER,-BIGNUMBER);
-  Set(zmin,BIGNUMBER,BIGNUMBER,BIGNUMBER);
-  Set(zmax,-BIGNUMBER,-BIGNUMBER,-BIGNUMBER);
-
-  const char *scan = (const char *)points;
-
-
-  for (NxU32 i=0; i<vcount; i++)
-	{
-		const REAL *caller_p = (const REAL *)scan;
-
-   	if (caller_p[0]<xmin[0])
-  	  Copy(xmin,caller_p); /* New xminimum point */
-  	if (caller_p[0]>xmax[0])
-  	  Copy(xmax,caller_p);
-  	if (caller_p[1]<ymin[1])
-  	  Copy(ymin,caller_p);
-  	if (caller_p[1]>ymax[1])
-  	  Copy(ymax,caller_p);
-  	if (caller_p[2]<zmin[2])
-  	  Copy(zmin,caller_p);
-  	if (caller_p[2]>zmax[2])
-  	  Copy(zmax,caller_p);
-
-    scan+=pstride;
-	}
-
-  /* Set xspan = distance between the 2 points xmin & xmax (squared) */
-  REAL dx = xmax[0] - xmin[0];
-  REAL dy = xmax[1] - xmin[1];
-  REAL dz = xmax[2] - xmin[2];
-  REAL xspan = dx*dx + dy*dy + dz*dz;
-
-  /* Same for y & z spans */
-  dx = ymax[0] - ymin[0];
-  dy = ymax[1] - ymin[1];
-  dz = ymax[2] - ymin[2];
-  REAL yspan = dx*dx + dy*dy + dz*dz;
-
-  dx = zmax[0] - zmin[0];
-  dy = zmax[1] - zmin[1];
-  dz = zmax[2] - zmin[2];
-  REAL zspan = dx*dx + dy*dy + dz*dz;
-
-  /* Set points dia1 & dia2 to the maximally separated pair */
-  Copy(dia1,xmin);
-  Copy(dia2,xmax); /* assume xspan biggest */
-  REAL maxspan = xspan;
-
-  if (yspan>maxspan)
-	{
-	  maxspan = yspan;
-  	Copy(dia1,ymin);
-  	Copy(dia2,ymax);
-	}
-
-  if (zspan>maxspan)
-	{
-	  Copy(dia1,zmin);
-	  Copy(dia2,zmax);
-	}
-
-
-  /* dia1,dia2 is a diameter of initial sphere */
-  /* calc initial center */
-  center[0] = (dia1[0]+dia2[0])*0.5f;
-  center[1] = (dia1[1]+dia2[1])*0.5f;
-  center[2] = (dia1[2]+dia2[2])*0.5f;
-
-  /* calculate initial radius**2 and radius */
-
-  dx = dia2[0]-center[0]; /* x component of radius vector */
-  dy = dia2[1]-center[1]; /* y component of radius vector */
-  dz = dia2[2]-center[2]; /* z component of radius vector */
-
-  radius2 = dx*dx + dy*dy + dz*dz;
-  radius = REAL(sqrt(radius2));
-
-  /* SECOND PASS: increment current sphere */
-  {
-    const char *scan = (const char *)points;
-	  for (NxU32 i=0; i<vcount; i++)
-		{
-			const REAL *caller_p = (const REAL *)scan;
-
-  		dx = caller_p[0]-center[0];
-		  dy = caller_p[1]-center[1];
-  		dz = caller_p[2]-center[2];
-
-		  REAL old_to_p_sq = dx*dx + dy*dy + dz*dz;
-
-  		if (old_to_p_sq > radius2) 	/* do r**2 test first */
-			{ 	/* this point is outside of current sphere */
-	  		REAL old_to_p = REAL(sqrt(old_to_p_sq));
-			  /* calc radius of new sphere */
-  			radius = (radius + old_to_p) * 0.5f;
-	  		radius2 = radius*radius; 	/* for next r**2 compare */
-  			REAL old_to_new = old_to_p - radius;
-
-	  		/* calc center of new sphere */
-
-  		  REAL recip = 1.0f /old_to_p;
-
-  			REAL cx = (radius*center[0] + old_to_new*caller_p[0]) * recip;
-	  		REAL cy = (radius*center[1] + old_to_new*caller_p[1]) * recip;
-			  REAL cz = (radius*center[2] + old_to_new*caller_p[2]) * recip;
-
-		    Set(center,cx,cy,cz);
-
-        scan+=pstride;
-			}
-		}
-  }
-
-  return radius;
-}
-
-
-void fm_computeBestFitCapsule(NxU32 vcount,const REAL *points,NxU32 pstride,REAL &radius,REAL &height,REAL matrix[16],bool bruteForce)
-{
-  REAL sides[3];
-  REAL omatrix[16];
-  fm_computeBestFitOBB(vcount,points,pstride,sides,omatrix,bruteForce);
-
-  NxI32 axis = 0;
-  if ( sides[0] > sides[1] && sides[0] > sides[2] )
-    axis = 0;
-  else if ( sides[1] > sides[0] && sides[1] > sides[2] )
-    axis = 1;
-  else 
-    axis = 2;
-
-  REAL localTransform[16];
-
-  REAL maxDist = 0;
-  REAL maxLen = 0;
-
-  switch ( axis )
-  {
-    case 0:
-      {
-        fm_eulerMatrix(0,0,FM_PI/2,localTransform);
-        fm_matrixMultiply(localTransform,omatrix,matrix);
-
-        const NxU8 *scan = (const NxU8 *)points;
-        for (NxU32 i=0; i<vcount; i++)
-        {
-          const REAL *p = (const REAL *)scan;
-          REAL t[3];
-          fm_inverseRT(omatrix,p,t);
-          REAL dist = t[1]*t[1]+t[2]*t[2];
-          if ( dist > maxDist )
-          {
-            maxDist = dist;
-          }
-          REAL l = (REAL) fabs(t[0]);
-          if ( l > maxLen )
-          {
-            maxLen = l;
-          }
-          scan+=pstride;
-        }
-      }
-      height = sides[0];
-      break;
-    case 1:
-      {
-        fm_eulerMatrix(0,FM_PI/2,0,localTransform);
-        fm_matrixMultiply(localTransform,omatrix,matrix);
-
-        const NxU8 *scan = (const NxU8 *)points;
-        for (NxU32 i=0; i<vcount; i++)
-        {
-          const REAL *p = (const REAL *)scan;
-          REAL t[3];
-          fm_inverseRT(omatrix,p,t);
-          REAL dist = t[0]*t[0]+t[2]*t[2];
-          if ( dist > maxDist )
-          {
-            maxDist = dist;
-          }
-          REAL l = (REAL) fabs(t[1]);
-          if ( l > maxLen )
-          {
-            maxLen = l;
-          }
-          scan+=pstride;
-        }
-      }
-      height = sides[1];
-      break;
-    case 2:
-      {
-        fm_eulerMatrix(FM_PI/2,0,0,localTransform);
-        fm_matrixMultiply(localTransform,omatrix,matrix);
-
-        const NxU8 *scan = (const NxU8 *)points;
-        for (NxU32 i=0; i<vcount; i++)
-        {
-          const REAL *p = (const REAL *)scan;
-          REAL t[3];
-          fm_inverseRT(omatrix,p,t);
-          REAL dist = t[0]*t[0]+t[1]*t[1];
-          if ( dist > maxDist )
-          {
-            maxDist = dist;
-          }
-          REAL l = (REAL) fabs(t[2]);
-          if ( l > maxLen )
-          {
-            maxLen = l;
-          }
-          scan+=pstride;
-        }
-      }
-      height = sides[2];
-      break;
-  }
-  radius = (REAL)sqrt(maxDist);
-  height = (maxLen*2)-(radius*2);
-}
-
-
-//************* Triangulation
-
-#ifndef TRIANGULATE_H
-
-#define TRIANGULATE_H
-
-typedef NxU32 TU32;
-
-class TVec
-{
-public:
-	TVec(NxF64 _x,NxF64 _y,NxF64 _z) { x = _x; y = _y; z = _z; };
-	TVec(void) { };
-
-  NxF64 x;
-  NxF64 y;
-  NxF64 z;
-};
-
-typedef CONVEX_DECOMPOSITION::Array< TVec >  TVecVector;
-typedef CONVEX_DECOMPOSITION::Array< TU32 >  TU32Vector;
-
-class CTriangulator
-{
-public:
-    ///     Default constructor
-    CTriangulator();
-
-    ///     Default destructor
-    virtual ~CTriangulator();
-
-    ///     Triangulates the contour
-    void triangulate(TU32Vector &indices);
-
-    ///     Returns the given point in the triangulator array
-    inline TVec get(const TU32 id) { return mPoints[id]; }
-
-    virtual void reset(void)
-    {
-        mInputPoints.clear();
-        mPoints.clear();
-        mIndices.clear();
-    }
-
-    virtual void addPoint(NxF64 x,NxF64 y,NxF64 z)
-    {
-        TVec v(x,y,z);
-        // update bounding box...
-        if ( mInputPoints.empty() )
-        {
-            mMin = v;
-            mMax = v;
-        }
-        else
-        {
-            if ( x < mMin.x ) mMin.x = x;
-            if ( y < mMin.y ) mMin.y = y;
-            if ( z < mMin.z ) mMin.z = z;
-
-            if ( x > mMax.x ) mMax.x = x;
-            if ( y > mMax.y ) mMax.y = y;
-            if ( z > mMax.z ) mMax.z = z;
-        }
-        mInputPoints.pushBack(v);
-    }
-
-    // Triangulation happens in 2d.  We could inverse transform the polygon around the normal direction, or we just use the two most signficant axes
-    // Here we find the two longest axes and use them to triangulate.  Inverse transforming them would introduce more doubleing point error and isn't worth it.
-    virtual NxU32 * triangulate(NxU32 &tcount,NxF64 epsilon)
-    {
-        NxU32 *ret = 0;
-        tcount = 0;
-        mEpsilon = epsilon;
-
-        if ( !mInputPoints.empty() )
-        {
-            mPoints.clear();
-
-          NxF64 dx = mMax.x - mMin.x; // locate the first, second and third longest edges and store them in i1, i2, i3
-          NxF64 dy = mMax.y - mMin.y;
-          NxF64 dz = mMax.z - mMin.z;
-
-          NxU32 i1,i2,i3;
-
-          if ( dx > dy && dx > dz )
-          {
-              i1 = 0;
-              if ( dy > dz )
-              {
-                  i2 = 1;
-                  i3 = 2;
-              }
-              else
-              {
-                  i2 = 2;
-                  i3 = 1;
-              }
-          }
-          else if ( dy > dx && dy > dz )
-          {
-              i1 = 1;
-              if ( dx > dz )
-              {
-                  i2 = 0;
-                  i3 = 2;
-              }
-              else
-              {
-                  i2 = 2;
-                  i3 = 0;
-              }
-          }
-          else
-          {
-              i1 = 2;
-              if ( dx > dy )
-              {
-                  i2 = 0;
-                  i3 = 1;
-              }
-              else
-              {
-                  i2 = 1;
-                  i3 = 0;
-              }
-          }
-
-          NxU32 pcount = (NxU32)mInputPoints.size();
-          const NxF64 *points = &mInputPoints[0].x;
-          for (NxU32 i=0; i<pcount; i++)
-          {
-            TVec v( points[i1], points[i2], points[i3] );
-            mPoints.pushBack(v);
-            points+=3;
-          }
-
-          mIndices.clear();
-          triangulate(mIndices);
-          tcount = (NxU32)mIndices.size()/3;
-          if ( tcount )
-          {
-              ret = &mIndices[0];
-          }
-        }
-        return ret;
-    }
-
-    virtual const NxF64 * getPoint(NxU32 index)
-    {
-        return &mInputPoints[index].x;
-    }
-
-
-private:
-    NxF64                  mEpsilon;
-    TVec                   mMin;
-    TVec                   mMax;
-    TVecVector             mInputPoints;
-    TVecVector             mPoints;
-    TU32Vector             mIndices;
-
-    ///     Tests if a point is inside the given triangle
-    bool _insideTriangle(const TVec& A, const TVec& B, const TVec& C,const TVec& P);
-
-    ///     Returns the area of the contour
-    NxF64 _area();
-
-    bool _snip(NxI32 u, NxI32 v, NxI32 w, NxI32 n, NxI32 *V);
-
-    ///     Processes the triangulation
-    void _process(TU32Vector &indices);
-
-};
-
-///     Default constructor
-CTriangulator::CTriangulator(void)
-{
-}
-
-///     Default destructor
-CTriangulator::~CTriangulator()
-{
-}
-
-///     Triangulates the contour
-void CTriangulator::triangulate(TU32Vector &indices)
-{
-    _process(indices);
-}
-
-///     Processes the triangulation
-void CTriangulator::_process(TU32Vector &indices)
-{
-    const NxI32 n = (const NxI32)mPoints.size();
-    if (n < 3)
-        return;
-    NxI32 *V = (NxI32 *)MEMALLOC_MALLOC(sizeof(NxI32)*n);
-
-	bool flipped = false;
-
-    if (0.0f < _area())
-    {
-        for (NxI32 v = 0; v < n; v++)
-            V[v] = v;
-    }
-    else
-    {
-		flipped = true;
-        for (NxI32 v = 0; v < n; v++)
-            V[v] = (n - 1) - v;
-    }
-
-    NxI32 nv = n;
-    NxI32 count = 2 * nv;
-    for (NxI32 m = 0, v = nv - 1; nv > 2;)
-    {
-        if (0 >= (count--))
-            return;
-
-        NxI32 u = v;
-        if (nv <= u)
-            u = 0;
-        v = u + 1;
-        if (nv <= v)
-            v = 0;
-        NxI32 w = v + 1;
-        if (nv <= w)
-            w = 0;
-
-        if (_snip(u, v, w, nv, V))
-        {
-            NxI32 a, b, c, s, t;
-            a = V[u];
-            b = V[v];
-            c = V[w];
-			if ( flipped )
-			{
-				indices.pushBack(a);
-				indices.pushBack(b);
-				indices.pushBack(c);
-			}
-			else
-			{
-				indices.pushBack(c);
-				indices.pushBack(b);
-				indices.pushBack(a);
-			}
-            m++;
-            for (s = v, t = v + 1; t < nv; s++, t++)
-                V[s] = V[t];
-            nv--;
-            count = 2 * nv;
-        }
-    }
-
-    MEMALLOC_FREE(V);
-}
-
-///     Returns the area of the contour
-NxF64 CTriangulator::_area()
-{
-    NxI32 n = (NxU32)mPoints.size();
-    NxF64 A = 0.0f;
-    for (NxI32 p = n - 1, q = 0; q < n; p = q++)
-    {
-        const TVec &pval = mPoints[p];
-        const TVec &qval = mPoints[q];
-        A += pval.x * qval.y - qval.x * pval.y;
-    }
-	A*=0.5f;
-    return A;
-}
-
-bool CTriangulator::_snip(NxI32 u, NxI32 v, NxI32 w, NxI32 n, NxI32 *V)
-{
-    NxI32 p;
-
-    const TVec &A = mPoints[ V[u] ];
-    const TVec &B = mPoints[ V[v] ];
-    const TVec &C = mPoints[ V[w] ];
-
-    if (mEpsilon > (((B.x - A.x) * (C.y - A.y)) - ((B.y - A.y) * (C.x - A.x))) )
-        return false;
-
-    for (p = 0; p < n; p++)
-    {
-        if ((p == u) || (p == v) || (p == w))
-            continue;
-        const TVec &P = mPoints[ V[p] ];
-        if (_insideTriangle(A, B, C, P))
-            return false;
-    }
-    return true;
-}
-
-///     Tests if a point is inside the given triangle
-bool CTriangulator::_insideTriangle(const TVec& A, const TVec& B, const TVec& C,const TVec& P)
-{
-    NxF64 ax, ay, bx, by, cx, cy, apx, apy, bpx, bpy, cpx, cpy;
-    NxF64 cCROSSap, bCROSScp, aCROSSbp;
-
-    ax = C.x - B.x;  ay = C.y - B.y;
-    bx = A.x - C.x;  by = A.y - C.y;
-    cx = B.x - A.x;  cy = B.y - A.y;
-    apx = P.x - A.x;  apy = P.y - A.y;
-    bpx = P.x - B.x;  bpy = P.y - B.y;
-    cpx = P.x - C.x;  cpy = P.y - C.y;
-
-    aCROSSbp = ax * bpy - ay * bpx;
-    cCROSSap = cx * apy - cy * apx;
-    bCROSScp = bx * cpy - by * cpx;
-
-    return ((aCROSSbp >= 0.0f) && (bCROSScp >= 0.0f) && (cCROSSap >= 0.0f));
-}
-
-class Triangulate : public fm_Triangulate, public Memalloc
-{
-public:
-  Triangulate(void)
-  {
-    mPointsFloat = 0;
-    mPointsDouble = 0;
-  }
-
-  virtual ~Triangulate(void)
-  {
-    reset();
-  }
-  void reset(void)
-  {
-    MEMALLOC_FREE(mPointsFloat);
-    MEMALLOC_FREE(mPointsDouble);
-    mPointsFloat = 0;
-    mPointsDouble = 0;
-  }
-
-  virtual const NxF64 *       triangulate3d(NxU32 pcount,
-                                             const NxF64 *_points,
-                                             NxU32 vstride,
-                                             NxU32 &tcount,
-                                             bool consolidate,
-                                             NxF64 epsilon)
-  {
-    reset();
-
-    NxF64 *points = (NxF64 *)MEMALLOC_MALLOC(sizeof(NxF64)*pcount*3);
-    if ( consolidate )
-    {
-      pcount = fm_consolidatePolygon(pcount,_points,vstride,points,1-epsilon);
-    }
-    else
-    {
-      NxF64 *dest = points;
-      for (NxU32 i=0; i<pcount; i++)
-      {
-        const NxF64 *src = fm_getPoint(_points,vstride,i);
-        dest[0] = src[0];
-        dest[1] = src[1];
-        dest[2] = src[2];
-        dest+=3;
-      }
-      vstride = sizeof(NxF64)*3;
-    }
-
-    if ( pcount >= 3 )
-    {
-      CTriangulator ct;
-      for (NxU32 i=0; i<pcount; i++)
-      {
-        const NxF64 *src = fm_getPoint(points,vstride,i);
-        ct.addPoint( src[0], src[1], src[2] );
-      }
-      NxU32 _tcount;
-      NxU32 *indices = ct.triangulate(_tcount,epsilon);
-      if ( indices )
-      {
-        tcount = _tcount;
-        mPointsDouble = (NxF64 *)MEMALLOC_MALLOC(sizeof(NxF64)*tcount*3*3);
-        NxF64 *dest = mPointsDouble;
-        for (NxU32 i=0; i<tcount; i++)
-        {
-          NxU32 i1 = indices[i*3+0];
-          NxU32 i2 = indices[i*3+1];
-          NxU32 i3 = indices[i*3+2];
-          const NxF64 *p1 = ct.getPoint(i1);
-          const NxF64 *p2 = ct.getPoint(i2);
-          const NxF64 *p3 = ct.getPoint(i3);
-
-          dest[0] = p1[0];
-          dest[1] = p1[1];
-          dest[2] = p1[2];
-
-          dest[3] = p2[0];
-          dest[4] = p2[1];
-          dest[5] = p2[2];
-
-          dest[6] = p3[0];
-          dest[7] = p3[1];
-          dest[8] = p3[2];
-          dest+=9;
-        }
-      }
-    }
-    MEMALLOC_FREE(points);
-
-    return mPointsDouble;
-  }
-
-  virtual const NxF32  *       triangulate3d(NxU32 pcount,
-                                             const NxF32  *points,
-                                             NxU32 vstride,
-                                             NxU32 &tcount,
-                                             bool consolidate,
-                                             NxF32 epsilon)
-  {
-    reset();
-
-    NxF64 *temp = (NxF64 *)MEMALLOC_MALLOC(sizeof(NxF64)*pcount*3);
-    NxF64 *dest = temp;
-    for (NxU32 i=0; i<pcount; i++)
-    {
-      const NxF32 *p = fm_getPoint(points,vstride,i);
-      dest[0] = p[0];
-      dest[1] = p[1];
-      dest[2] = p[2];
-      dest+=3;
-    }
-    const NxF64 *results = triangulate3d(pcount,temp,sizeof(NxF64)*3,tcount,consolidate,epsilon);
-    if ( results )
-    {
-      NxU32 fcount = tcount*3*3;
-      mPointsFloat = (NxF32 *)MEMALLOC_MALLOC(sizeof(NxF32)*tcount*3*3);
-      NxF32 *dest = mPointsFloat;
-      for (NxU32 i=0; i<fcount; i++)
-      {
-        dest[i] = (NxF32) results[i];
-      }
-      MEMALLOC_FREE(mPointsDouble);
-      mPointsDouble = 0;
-    }
-    MEMALLOC_FREE(temp);
-
-    return mPointsFloat;
-  }
-
-private:
-  NxF32 *mPointsFloat;
-  NxF64 *mPointsDouble;
-};
-
-fm_Triangulate * fm_createTriangulate(void)
-{
-  Triangulate *t = MEMALLOC_NEW(Triangulate);
-  return static_cast< fm_Triangulate *>(t);
-}
-
-void             fm_releaseTriangulate(fm_Triangulate *t)
-{
-  Triangulate *tt = static_cast< Triangulate *>(t);
-  delete tt;
-}
-
-#endif
-
-bool validDistance(const REAL *p1,const REAL *p2,REAL epsilon)
-{
-	bool ret = true;
-
-	REAL dx = p1[0] - p2[0];
-	REAL dy = p1[1] - p2[1];
-	REAL dz = p1[2] - p2[2];
-	REAL dist = dx*dx+dy*dy+dz*dz;
-	if ( dist < (epsilon*epsilon) )
-	{
-		ret = false;
-	}
-	return ret;
-}
-
-bool fm_isValidTriangle(const REAL *p1,const REAL *p2,const REAL *p3,REAL epsilon)
-{
-  bool ret = false;
-
-  if ( validDistance(p1,p2,epsilon) &&
-	   validDistance(p1,p3,epsilon) &&
-	   validDistance(p2,p3,epsilon) )
-  {
-
-	  REAL area = fm_computeArea(p1,p2,p3);
-	  if ( area > epsilon )
-	  {
-		REAL _vertices[3*3],vertices[64*3];
-
-		_vertices[0] = p1[0];
-		_vertices[1] = p1[1];
-		_vertices[2] = p1[2];
-
-		_vertices[3] = p2[0];
-		_vertices[4] = p2[1];
-		_vertices[5] = p2[2];
-
-		_vertices[6] = p3[0];
-		_vertices[7] = p3[1];
-		_vertices[8] = p3[2];
-
-		NxU32 pcount = fm_consolidatePolygon(3,_vertices,sizeof(REAL)*3,vertices,1-epsilon);
-		if ( pcount == 3 )
-		{
-		  ret = true;
-		}
-	  }
-  }
-  return ret;
-}
-
-
-void  fm_multiplyQuat(const REAL *left,const REAL *right,REAL *quat)
-{
-	REAL a,b,c,d;
-
-	a = left[3]*right[3] - left[0]*right[0] - left[1]*right[1] - left[2]*right[2];
-	b = left[3]*right[0] + right[3]*left[0] + left[1]*right[2] - right[1]*left[2];
-	c = left[3]*right[1] + right[3]*left[1] + left[2]*right[0] - right[2]*left[0];
-	d = left[3]*right[2] + right[3]*left[2] + left[0]*right[1] - right[0]*left[1];
-
-	quat[3] = a;
-	quat[0] = b;
-	quat[1] = c;
-	quat[2] = d;
-}
-
-}; // end of namespace

+ 0 - 1905
Engine/lib/convexDecomp/NvHashMap.h

@@ -1,1905 +0,0 @@
-/*
-
-NvHashMap.h : A simple hash map and array template class to avoid introducing dependencies on the STL for containers.
-
-*/
-
-
-// This code contains NVIDIA Confidential Information and is disclosed
-// under the Mutual Non-Disclosure Agreement.
-//
-// Notice
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright � 2009 NVIDIA Corporation. All rights reserved.
-// Copyright � 2002-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright � 2001-2006 NovodeX. All rights reserved.
-
-#ifndef NV_HASH_MAP_H
-#define NV_HASH_MAP_H
-
-#include "NvUserMemAlloc.h"
-
-#if (defined(NX_WINDOWS) | defined(NX_X360))
-#include <typeinfo.h>
-#endif
-
-#include <new>
-#include <typeinfo>
-#include <stdlib.h>
-#include <string.h>
-//******************************************************
-//******************************************************
-//******************************************************
-
-
-#ifndef NV_FOUNDATION_BASIC_TEMPLATES_H
-#define NV_FOUNDATION_BASIC_TEMPLATES_H
-
-#pragma warning(push)
-#pragma warning(disable:4512) // suppress the 'assignment operator could not be generated' warning message.
-
-namespace CONVEX_DECOMPOSITION
-{
-	template<typename A>
-	struct Equal
-	{
-		bool operator()(const A& a, const A& b)	const { return a==b; }
-	};
-
-	template<typename A>
-	struct Less
-	{
-		bool operator()(const A& a, const A& b)	const { return a<b; }
-	};
-
-	template<typename A>
-	struct Greater
-	{
-		bool operator()(const A& a, const A& b)	const { return a>b; }
-	};
-
-
-	template <class F, class S> 
-	class Pair
-	{
-	public:
-		F	first;
-		S	second;
-		Pair(): first(F()), second(S()) {}
-		Pair(const F &f, const S &s): first(f), second(s) {}
-		Pair(const Pair &p): first(p.first), second(p.second) {}
-	};
-
-	template<unsigned int A>	struct LogTwo	{	static const unsigned int value  = LogTwo<(A>>1)>::value + 1; };
-	template<>					struct LogTwo<1>{	static const unsigned int value  = 0;	};
-
-	template<typename T> struct UnConst	{ typedef T Type; };
-	template<typename T> struct UnConst<const T> { typedef T Type; };
-}
-
-#pragma warning(pop)
-
-#endif
-
-#ifndef NV_FOUNDATION_ALLOCATOR
-#define NV_FOUNDATION_ALLOCATOR
-
-#pragma warning(push)
-#pragma warning(disable:4100)
-
-namespace CONVEX_DECOMPOSITION
-{
-
-
-/**
-\brief The return value is the greater of the two specified values.
-*/
-template<class N>
-NX_INLINE N NxMax(N a, N b)							{	return a<b ? b : a;						}
-
-
-/**
-\brief The return value is the greater of the two specified values.
-*/
-template <>
-NX_INLINE NxF32 NxMax(NxF32 a, NxF32 b)				{	return  a > b ? a : b;	}
-
-/**
-\brief The return value is the lesser of the two specified values.
-*/
-template<class N>
-NX_INLINE N NxMin(N a, N b)							{	return a<b ? a : b;						}
-
-/**
-\brief The return value is the lesser of the two specified values.
-*/
-template <>
-NX_INLINE NxF32 NxMin(NxF32 a, NxF32 b)				{	return a < b ? a : b;	}
-
-
-
-	/**
-	Allocator used to access the global NxUserAllocator instance without providing additional information.
-	*/
-	class Allocator
-	{
-	public:
-		Allocator(const char* dummy = 0) 
-		{
-		}
-		void* allocate(size_t size, const char* file, int line)
-		{
-      return MEMALLOC_MALLOC(size);
-		}
-		void deallocate(void* ptr)
-		{
-      MEMALLOC_FREE(ptr);
-		}
-	};
-
-	/**
-	Allocator used to access the global NxUserAllocator instance using a dynamic name.
-	*/
-	class NamedAllocator
-	{
-	public:
-		NamedAllocator(const char* name = 0) 
-			
-		{
-
-    }
-		void* allocate(size_t size, const char* filename, int line)
-		{
-      return MEMALLOC_MALLOC(size);
-		}
-		void deallocate(void* ptr)
-		{
-      MEMALLOC_FREE(ptr);
-		}
-	private:
-	};
-
-	/**
-	Allocator used to access the global NxUserAllocator instance using a static name derived from T.
-	*/
-	template <typename T>
-	class ReflectionAllocator
-	{
-		static const char* getName()
-		{
-#if defined NX_GNUC
-			return __PRETTY_FUNCTION__;
-#else
-			return typeid(T).name();
-#endif
-		}
-	public:
-		ReflectionAllocator(const char* dummy=0) 
-		{
-		}
-		void* allocate(size_t size, const char* filename, int line)
-		{
-      return MEMALLOC_MALLOC(size);
-		}
-		void deallocate(void* ptr)
-		{
-      MEMALLOC_FREE(ptr);
-		}
-	};
-
-	// if you get a build error here, you are trying to NX_NEW a class 
-	// that is neither plain-old-type nor derived from CONVEX_DECOMPOSITION::UserAllocated
-	template <typename T, typename X>
-	union EnableIfPod
-	{
-		int i; T t;
-		typedef X Type;
-	};
-
-}
-
-// Global placement new for ReflectionAllocator templated by plain-old-type. Allows using NX_NEW for pointers and built-in-types.
-// ATTENTION: You need to use NX_DELETE_POD or NX_FREE to deallocate memory, not NX_DELETE. NX_DELETE_POD redirects to NX_FREE.
-// Rationale: NX_DELETE uses global operator delete(void*), which we dont' want to overload. 
-// Any other definition of NX_DELETE couldn't support array syntax 'NX_DELETE([]a);'. 
-// NX_DELETE_POD was preferred over NX_DELETE_ARRAY because it is used less often and applies to both single instances and arrays.
-template <typename T>
-NX_INLINE void* operator new(size_t size, CONVEX_DECOMPOSITION::ReflectionAllocator<T> alloc, const char* fileName, typename CONVEX_DECOMPOSITION::EnableIfPod<T, int>::Type line)
-{
-	return alloc.allocate(size, fileName, line);
-}
-
-template <typename T>
-NX_INLINE void* operator new[](size_t size, CONVEX_DECOMPOSITION::ReflectionAllocator<T> alloc, const char* fileName, typename CONVEX_DECOMPOSITION::EnableIfPod<T, int>::Type line)
-{
-	return alloc.allocate(size, fileName, line);
-}
-
-// If construction after placement new throws, this placement delete is being called.
-template <typename T>
-NX_INLINE void  operator delete(void* ptr, CONVEX_DECOMPOSITION::ReflectionAllocator<T> alloc, const char* fileName, typename CONVEX_DECOMPOSITION::EnableIfPod<T, int>::Type line)
-{
-	alloc.deallocate(ptr);
-}
-
-// If construction after placement new throws, this placement delete is being called.
-template <typename T>
-NX_INLINE void  operator delete[](void* ptr, CONVEX_DECOMPOSITION::ReflectionAllocator<T> alloc, const char* fileName, typename CONVEX_DECOMPOSITION::EnableIfPod<T, int>::Type line)
-{
-	alloc.deallocate(ptr);
-}
-
-#pragma warning(pop)
-
-#endif
-
-
-#ifndef NV_FOUNDATION_USERALLOCATED
-#define NV_FOUNDATION_USERALLOCATED
-
-// an expression that should expand to nothing in _DEBUG builds.  We currently
-// use this only for tagging the purpose of containers for memory use tracking.
-#if defined(_DEBUG)
-#define NV_DEBUG_EXP(x) (x)
-#define NV_DEBUG_EXP_C(x) x,
-#else
-#define NV_DEBUG_EXP(x)
-#define NV_DEBUG_EXP_C(x)
-#endif
-
-#if defined (NX_X360) | defined (NX_WINDOWS) | defined (NX_CELL) | defined (NXLINUX) | defined(NX_WII) 
-// Stack allocation with alloc fallback for large allocations (>50% of default stack size for platform)
-#	define NX_ALLOCA(var, type, number)											\
-		bool alloced_##var = false;												\
-    if (sizeof(type)*number*2 > (CONVEX_DECOMPOSITION::gSystemServices ? gSystemServices->getAllocaThreshold() : 8192)  )	\
-		{																		\
-			var = (type *)MEMALLOC_MALLOC(sizeof(type)*number);					\
-			alloced_##var = true;												\
-		} else {																\
-			var = (type *)MEMALLOC_ALLOCA(sizeof(type)*number);						\
-		}
-#	define NX_FREEA(var) if (alloced_##var) MEMALLOC_FREE(var);
-#else
-#	define NX_ALLOCA(var, type, number)		var = (type *)NxAlloca(sizeof(type)*number);
-#	define NX_FREEA(var)					0;
-#endif
-
-namespace CONVEX_DECOMPOSITION
-{
-	/**
-	Provides new and delete using a UserAllocator.
-	Guarantees that 'delete x;' uses the UserAllocator too.
-	*/
-	class UserAllocated
-	{
-	public:
-
-		template <typename Alloc>
-		NX_INLINE void* operator new(size_t size, Alloc alloc, const char* fileName, int line)
-		{
-      return MEMALLOC_MALLOC(size);
-		}
-		template <typename Alloc>
-		NX_INLINE void* operator new[](size_t size, Alloc alloc, const char* fileName, int line)
-		{
-      return MEMALLOC_MALLOC(size);
-		}
-
-		NX_INLINE void  operator delete(void* ptr)
-		{
-      MEMALLOC_FREE(ptr);
-		}
-		NX_INLINE void  operator delete[](void* ptr)
-		{
-      MEMALLOC_FREE(ptr);
-		}
-	};
-};
-
-#endif
-
-
-#ifndef NV_FOUNDATION_ALIGNEDMALLOC_H
-#define NV_FOUNDATION_ALIGNEDMALLOC_H
-
-/*!
-Allocate aligned memory.
-Alignment must be a power of 2!
--- should be templated by a base allocator
-*/
-
-namespace CONVEX_DECOMPOSITION
-{
-	/**
-	Allocator, which is used to access the global NxUserAllocator instance
-	(used for dynamic data types template instantiation), which can align memory
-	*/
-
-	// SCS: AlignedMalloc with 3 params not found, seems not used on PC either
-	// disabled for now to avoid GCC error
-
-	template<NxU32 N, typename BaseAllocator = Allocator >
-	class AlignedAllocator : public BaseAllocator
-	{
-	public:
-		AlignedAllocator(const BaseAllocator& base = BaseAllocator()) 
-		: BaseAllocator(base) {}
-
-		void* allocate(size_t size, const char* file, int line)
-		{
-			size_t pad = N - 1 + sizeof(size_t); // store offset for delete.
-			NxU8* base = (NxU8*)BaseAllocator::allocate(size+pad, file, line);
-
-			NxU8* ptr = (NxU8*)(size_t(base + pad) & ~(N - 1)); // aligned pointer
-			((size_t*)ptr)[-1] = ptr - base; // store offset
-
-			return ptr;
-		}
-		void deallocate(void* ptr)
-		{
-			if(ptr == NULL)
-				return;
-
-			NxU8* base = ((NxU8*)ptr) - ((size_t*)ptr)[-1];
-			BaseAllocator::deallocate(base);
-		}
-	};
-}
-
-#endif
-
-
-#ifndef NV_FOUNDATION_INLINE_ALLOCATOR_H
-#define NV_FOUNDATION_INLINE_ALLOCATOR_H
-
-namespace CONVEX_DECOMPOSITION
-{
-	// this is used by the array class to allocate some space for a small number
-	// of objects along with the metadata
-	template<NxU32 N, typename BaseAllocator>
-	class InlineAllocator : private BaseAllocator
-	{
-	public:
-
-		InlineAllocator(const BaseAllocator& alloc = BaseAllocator())
-			: BaseAllocator(alloc)
-		{}
-
-		void* allocate(size_t size, const char* filename, int line)
-		{
-			return size <= N ? mBuffer : BaseAllocator::allocate(size, filename, line);
-		}
-
-		void deallocate(void* ptr)
-		{
-			if(ptr != mBuffer)
-				BaseAllocator::deallocate(ptr);
-		}
-
-	private:
-		NxU8 mBuffer[N];
-	};
-}
-
-#endif
-
-
-#ifndef NV_FOUNDATION_NXSTRIDEDDATA
-#define NV_FOUNDATION_NXSTRIDEDDATA
-/** \addtogroup foundation
-  @{
-*/
-
-template<typename T>
-class NvStrideIterator
-{
-	template <typename X>
-	struct StripConst
-	{
-		typedef X Type;
-	};
-
-	template <typename X>
-	struct StripConst<const X>
-	{
-		typedef X Type;
-	};
-
-public:
-	explicit NX_INLINE NvStrideIterator(T* ptr = NULL, NxU32 stride = sizeof(T)) :
-		mPtr(ptr), mStride(stride)
-	{
-		NX_ASSERT(mStride == 0 || sizeof(T) <= mStride);
-	}
-
-	NX_INLINE NvStrideIterator(const NvStrideIterator<typename StripConst<T>::Type>& strideIterator) :
-		mPtr(strideIterator.ptr()), mStride(strideIterator.stride())
-	{
-		NX_ASSERT(mStride == 0 || sizeof(T) <= mStride);
-	}
-
-	NX_INLINE T* ptr() const
-	{
-		return mPtr;
-	}
-
-	NX_INLINE NxU32 stride() const
-	{
-		return mStride;
-	}
-
-	NX_INLINE T& operator*() const
-	{
-		return *mPtr;
-	}
-
-	NX_INLINE T* operator->() const
-	{
-		return mPtr;
-	}
-
-	NX_INLINE T& operator[](int i) const
-	{
-		return *byteAdd(mPtr, i * stride());
-	}
-
-	// preincrement
-	NX_INLINE NvStrideIterator& operator++()
-	{
-		mPtr = byteAdd(mPtr, stride());
-		return *this;
-	}
-
-	// postincrement
-	NX_INLINE NvStrideIterator operator++(int)
-	{
-		NvStrideIterator tmp = *this;
-		mPtr = byteAdd(mPtr, stride());
-		return tmp;
-	}
-
-	// predecrement
-	NX_INLINE NvStrideIterator& operator--()
-	{
-		mPtr = byteSub(mPtr, stride());
-		return *this;
-	}
-
-	// postdecrement
-	NX_INLINE NvStrideIterator operator--(int)
-	{
-		NvStrideIterator tmp = *this;
-		mPtr = byteSub(mPtr, stride());
-		return tmp;
-	}
-
-	NX_INLINE NvStrideIterator& operator+=(int i)
-	{
-		mPtr = byteAdd(mPtr, i * stride());
-		return *this;
-	}
-
-	NX_INLINE NvStrideIterator operator+(int i) const
-	{	
-		return NvStrideIterator(byteAdd(mPtr, i * stride()), stride());
-	}
-
-	NX_INLINE NvStrideIterator& operator-=(int i)
-	{
-		mPtr = byteSub(mPtr, i * stride());
-		return *this;
-	}
-
-	NX_INLINE NvStrideIterator operator-(int i) const
-	{
-		return NvStrideIterator(byteSub(mPtr, i * stride()), stride());
-	}
-
-	// iterator difference
-	NX_INLINE int operator-(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		int byteDiff = static_cast<int>(reinterpret_cast<const NxU8*>(mPtr) - reinterpret_cast<const NxU8*>(other.mPtr));
-		return byteDiff / static_cast<int>(stride());
-	}
-
-	NX_INLINE bool operator==(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		return mPtr == other.mPtr;
-	}
-
-	NX_INLINE bool operator!=(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		return mPtr != other.mPtr;
-	}
-
-	NX_INLINE bool operator<(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		return mPtr < other.mPtr;
-	}
-
-	NX_INLINE bool operator>(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		return mPtr > other.mPtr;
-	}
-
-	NX_INLINE bool operator<=(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		return mPtr <= other.mPtr;
-	}
-
-	NX_INLINE bool operator>=(const NvStrideIterator& other) const
-	{
-		NX_ASSERT(isCompatible(other));
-		return mPtr >= other.mPtr;
-	}
-
-private:
-	NX_INLINE static T* byteAdd(T* ptr, NxU32 bytes) 
-	{ 
-		return const_cast<T*>(reinterpret_cast<const T*>(reinterpret_cast<const NxU8*>(ptr) + bytes));
-	}
-
-	NX_INLINE static T* byteSub(T* ptr, NxU32 bytes)
-	{ 
-		return const_cast<T*>(reinterpret_cast<const T*>(reinterpret_cast<const NxU8*>(ptr) - bytes));
-	}
-
-	NX_INLINE bool isCompatible(const NvStrideIterator& other) const
-	{
-		int byteDiff = static_cast<int>(reinterpret_cast<const NxU8*>(mPtr) - reinterpret_cast<const NxU8*>(other.mPtr));
-		return (stride() == other.stride()) && (abs(byteDiff) % stride() == 0);
-	}
-
-	T* mPtr;
-	NxU32 mStride;
-};
-
-
-template<typename T>
-NX_INLINE NvStrideIterator<T> operator+(int i, NvStrideIterator<T> it)
-{
-	it += i;
-	return it;
-}
-
- /** @} */
-#endif
-
-#ifndef NV_FOUNDATION_ARRAY
-#define NV_FOUNDATION_ARRAY
-
-namespace CONVEX_DECOMPOSITION
-{
-	namespace Internal
-	{
-		template <typename T>
-		struct ArrayMetaData
-		{
-			T*					mData;
-			NxU32				mCapacity;
-			NxU32				mSize;
-			ArrayMetaData(): mSize(0), mCapacity(0), mData(0) {}
-		};
-
-		template <typename T>
-		struct AllocatorTraits
-		{
-#if defined _DEBUG
-			typedef NamedAllocator Type;
-#else
-			typedef ReflectionAllocator<T> Type;
-#endif
-		};
-	}
-
-	/*!
-	An array is a sequential container.
-
-	Implementation note
-	* entries between 0 and size are valid objects
-	* we use inheritance to build this because the array is included inline in a lot
-	  of objects and we want the allocator to take no space if it's not stateful, which
-	  aggregation doesn't allow. Also, we want the metadata at the front for the inline
-	  case where the allocator contains some inline storage space
-	*/
-	template<class T, class Alloc = typename Internal::AllocatorTraits<T>::Type >
-	class Array : private Internal::ArrayMetaData<T>, private Alloc
-	{
-		typedef Internal::ArrayMetaData<T> MetaData;
-
-		using MetaData::mCapacity;
-		using MetaData::mData;
-		using MetaData::mSize;
-
-	public:
-
-		typedef T*			Iterator;
-		typedef const T*	ConstIterator;
-
-		/*!
-		Default array constructor. Initialize an empty array
-		*/
-		NX_INLINE Array(const Alloc& alloc = Alloc()) : Alloc(alloc) {}
-
-		/*!
-		Initialize array with given length
-		*/
-		NX_INLINE  explicit Array(NxU32 capacity, const Alloc& alloc = Alloc())
-		: Alloc(alloc)
-		{
-			if(mCapacity>0)
-				allocate(mCapacity);
-		}
-
-		/*!
-		Copy-constructor. Copy all entries from other array
-		*/
-		template <class A> 
-		NX_INLINE Array(const Array<T,A>& other, const Alloc& alloc = Alloc()) 
-		{
-			if(other.mSize > 0)
-			{
-				mData = allocate(mSize = mCapacity = other.mSize);
-				copy(mData, other.mData, mSize);
-			}
-		}
-
-		/*!
-		Default destructor
-		*/
-		NX_INLINE ~Array()
-		{
-			destroy(0, mSize);
-			if(mCapacity)
-				deallocate(mData);
-		}
-
-		/*!
-		Assignment operator. Copy content (deep-copy)
-		*/
-		template <class A> 
-		NX_INLINE const Array& operator= (const Array<T,A>& t)
-		{
-			if(&t == this)
-				return *this;
-
-			if(mCapacity < t.mSize)
-			{
-				destroy(0,mSize);
-				deallocate(mData);
-
-				mData = allocate(t.mCapacity);
-				mCapacity = t.mCapacity;
-
-				copy(mData,t.mData,t.mSize);
-			}
-			else
-			{
-				NxU32 m = NxMin(t.mSize,mSize);
-				copy(mData,t.mData,m);
-				for(NxU32 i = m; i < mSize;i++)
-					mData[i].~T();
-				for(NxU32 i = m; i < t.mSize; i++)
-					new(mData+i)T(t.mData[i]);
-			}
-
-			mSize = t.mSize;
-			return *this;
-		}
-
-		/*!
-		Array indexing operator.
-		\param i
-		The index of the element that will be returned.
-		\return
-		The element i in the array.
-		*/
-		NX_INLINE const T& operator[] (NxU32 i) const 
-		{
-			return mData[i];
-		}
-
-		/*!
-		Array indexing operator.
-		\param i
-		The index of the element that will be returned.
-		\return
-		The element i in the array.
-		*/
-		NX_INLINE T& operator[] (NxU32 i) 
-		{
-			return mData[i];
-		}
-
-		/*!
-		Returns a pointer to the initial element of the array.
-		\return
-		a pointer to the initial element of the array.
-		*/
-		NX_INLINE ConstIterator begin() const 
-		{
-			return mData;
-		}
-
-		NX_INLINE Iterator begin()
-		{
-			return mData;
-		}
-
-		/*!
-		Returns an iterator beyond the last element of the array. Do not dereference.
-		\return
-		a pointer to the element beyond the last element of the array.
-		*/
-
-		NX_INLINE ConstIterator end() const 
-		{
-			return mData+mSize;
-		}
-
-		NX_INLINE Iterator end()
-		{
-			return mData+mSize;
-		}
-
-		/*!
-		Returns a reference to the first element of the array. Undefined if the array is empty.
-		\return a reference to the first element of the array
-		*/
-
-		NX_INLINE const T& front() const 
-		{
-			NX_ASSERT(mSize);
-			return mData[0];
-		}
-
-		NX_INLINE T& front()
-		{
-			NX_ASSERT(mSize);
-			return mData[0];
-		}
-
-		/*!
-		Returns a reference to the last element of the array. Undefined if the array is empty
-		\return a reference to the last element of the array
-		*/
-
-		NX_INLINE const T& back() const 
-		{
-			NX_ASSERT(mSize);
-			return mData[mSize-1];
-		}
-
-		NX_INLINE T& back()
-		{
-			NX_ASSERT(mSize);
-			return mData[mSize-1];
-		}
-
-
-		/*!
-		Returns the number of entries in the array. This can, and probably will,
-		differ from the array capacity.
-		\return
-		The number of of entries in the array.
-		*/
-		NX_INLINE NxU32 size() const 
-		{
-			return mSize;
-		}
-
-		/*!
-		Clears the array.
-		*/
-		NX_INLINE void clear()
-		{
-			destroy(0,mSize);
-			mSize = 0;
-		}
-
-		/*!
-		Returns whether the array is empty (i.e. whether its size is 0).
-		\return
-		true if the array is empty
-		*/
-		NX_INLINE bool empty() const
-		{
-			return mSize==0;
-		}
-
-		/*!
-		Finds the first occurrence of an element in the array.
-		\param a
-		The element that will be removed. 
-		*/
-
-
-		NX_INLINE Iterator find(const T&a)
-		{
-			NxU32 index;
-			for(index=0;index<mSize && mData[index]!=a;index++)
-				;
-			return mData+index;
-		}
-
-		NX_INLINE ConstIterator find(const T&a) const
-		{
-			NxU32 index;
-			for(index=0;index<mSize && mData[index]!=a;index++)
-				;
-			return mData+index;
-		}
-
-
-		/////////////////////////////////////////////////////////////////////////
-		/*!
-		Adds one element to the end of the array. Operation is O(1).
-		\param a
-		The element that will be added to this array.
-		*/
-		/////////////////////////////////////////////////////////////////////////
-
-		NX_INLINE T& pushBack(const T& a)
-		{
-			if(mCapacity<=mSize) 
-				grow(capacityIncrement());
-
-			new((void*)(mData + mSize)) T(a);
-
-			return mData[mSize++];
-		}
-
-		/////////////////////////////////////////////////////////////////////////
-		/*!
-		Returns the element at the end of the array. Only legal if the array is non-empty.
-		*/
-		/////////////////////////////////////////////////////////////////////////
-		NX_INLINE T popBack() 
-		{
-			NX_ASSERT(mSize);
-			T t = mData[mSize-1];
-			mData[--mSize].~T();
-			return t;
-		}
-
-
-		/////////////////////////////////////////////////////////////////////////
-		/*!
-		Construct one element at the end of the array. Operation is O(1).
-		*/
-		/////////////////////////////////////////////////////////////////////////
-		NX_INLINE T& insert()
-		{
-			if(mCapacity<=mSize) 
-				grow(capacityIncrement());
-
-			return *(new (mData+mSize++)T);
-		}
-
-		/////////////////////////////////////////////////////////////////////////
-		/*!
-		Subtracts the element on position i from the array and replace it with
-		the last element.
-		Operation is O(1)
-		\param i
-		The position of the element that will be subtracted from this array.
-		\return
-		The element that was removed.
-		*/
-		/////////////////////////////////////////////////////////////////////////
-		NX_INLINE void replaceWithLast(NxU32 i)
-		{
-			NX_ASSERT(i<mSize);
-			mData[i] = mData[--mSize];
-			mData[mSize].~T();
-		}
-
-		NX_INLINE void replaceWithLast(Iterator i) 
-		{
-			replaceWithLast(static_cast<NxU32>(i-mData));
-		}
-
-		/////////////////////////////////////////////////////////////////////////
-		/*!
-		Replaces the first occurrence of the element a with the last element
-		Operation is O(n)
-		\param i
-		The position of the element that will be subtracted from this array.
-		\return Returns true if the element has been removed.
-		*/
-		/////////////////////////////////////////////////////////////////////////
-
-		NX_INLINE bool findAndReplaceWithLast(const T& a)
-		{
-			NxU32 index;
-			for(index=0;index<mSize && mData[index]!=a;index++)
-				;
-			if(index >= mSize)
-				return false;
-			replaceWithLast(index);
-			return true;
-		}
-
-		/////////////////////////////////////////////////////////////////////////
-		/*!
-		Subtracts the element on position i from the array. Shift the entire
-		array one step.
-		Operation is O(n)
-		\param i
-		The position of the element that will be subtracted from this array.
-		\return
-		The element that was removed.
-		*/
-		/////////////////////////////////////////////////////////////////////////
-		NX_INLINE void remove(NxU32 i) 
-		{
-			NX_ASSERT(i<mSize);
-			while(i+1<mSize)
-			{
-				mData[i] = mData[i+1];
-				i++;
-			}
-
-			mData[--mSize].~T();
-		}
-
-
-		//////////////////////////////////////////////////////////////////////////
-		/*!
-		Resize array
-		\param compaction
-		If set to true and the specified size is smaller than the capacity, a new
-		memory block which fits the size is allocated and the old one gets freed.
-		*/
-		//////////////////////////////////////////////////////////////////////////
-		NX_INLINE void resize(const NxU32 size, const bool compaction = false, const T& a = T())
-		{
-			if(size > mCapacity)
-			{
-				grow(size);
-			}
-			else if (compaction && (size != mCapacity))
-			{
-				recreate(size, NxMin(mSize, size));
-			}
-
-			for(NxU32 i = mSize; i < size; i++)
-				::new(mData+i)T(a);
-
-			if (!compaction)  // With compaction, these elements have been deleted already
-			{
-				for(NxU32 i = size; i < mSize; i++)
-					mData[i].~T();
-			}
-
-			mSize = size;
-		}
-
-
-		//////////////////////////////////////////////////////////////////////////
-		/*!
-		Resize array such that only as much memory is allocated to hold the 
-		existing elements
-		*/
-		//////////////////////////////////////////////////////////////////////////
-		NX_INLINE void shrink()
-		{
-			resize(mSize, true);
-		}
-
-
-		//////////////////////////////////////////////////////////////////////////
-		/*!
-		Deletes all array elements and frees memory.
-		*/
-		//////////////////////////////////////////////////////////////////////////
-		NX_INLINE void reset()
-		{
-			resize(0, true);
-		}
-
-
-		//////////////////////////////////////////////////////////////////////////
-		/*!
-		Ensure that the array has at least size capacity.
-		*/
-		//////////////////////////////////////////////////////////////////////////
-		NX_INLINE void reserve(const NxU32 size)
-		{
-			if(size > mCapacity)
-				grow(size);
-		}
-
-		//////////////////////////////////////////////////////////////////////////
-		/*!
-		Query the capacity(allocated mem) for the array.
-		*/
-		//////////////////////////////////////////////////////////////////////////
-		NX_INLINE NxU32 capacity()	const
-		{
-			return mCapacity;
-		}
-
-
-	private:
-
-		NX_INLINE T* allocate(size_t capacity)
-		{
-			return (T*)Alloc::allocate(sizeof(T) * capacity, __FILE__, __LINE__);
-		}
-
-		NX_INLINE void deallocate(void *mem)
-		{
-			Alloc::deallocate(mem);
-		}
-
-		NX_INLINE void copy(T* dst, const T* src, size_t count)
-		{
-			for(size_t i=0;i<count;i++)
-				::new (dst+i)T(src[i]);
-		}
-
-		NX_INLINE void destroy(size_t start, size_t end)
-		{
-			for(size_t i = start; i<end; i++)
-				mData[i].~T();
-		}
-
-		// The idea here is to prevent accidental brain-damage with pushBack or insert. Unfortunately
-		// it interacts badly with InlineArrays with smaller inline allocations.
-		// TODO(dsequeira): policy template arg, this is exactly what they're for.
-		NX_INLINE NxU32 capacityIncrement()	const
-		{
-			return mCapacity == 0 ? 1 : mCapacity * 2;
-		}
-
-		/*!
-		Creates a new memory block, copies all entries to the new block and destroys old entries.
-
-		\param capacity
-		The number of entries that the set should be able to hold.
-		\param copyCount
-		The number of entries to copy.
-		*/
-		NX_INLINE void recreate(NxU32 capacity, NxU32 copyCount)
-		{
-			NX_ASSERT(capacity >= copyCount);
-			NX_ASSERT(mSize >= copyCount);
-			T* newData = allocate(capacity);
-			NX_ASSERT(	((newData != NULL) && (capacity > 0)) ||
-						((newData == NULL) && (capacity == 0)) );
-
-			if(mCapacity)
-			{
-				copy(newData,mData,copyCount);
-				destroy(0,mSize);
-				deallocate(mData);
-			}
-
-			mData = newData;
-			mCapacity = capacity;
-		}
-
-		/*!
-		Resizes the available memory for the array.
-
-		\param capacity
-		The number of entries that the set should be able to hold.
-		*/	
-		NX_INLINE void grow(NxU32 capacity) 
-		{
-			NX_ASSERT(mCapacity < capacity);
-			recreate(capacity, mSize);
-		}
-	};
-
-	// array that pre-allocates for N elements
-	template <typename T, NxU32 N, typename Alloc = typename Internal::AllocatorTraits<T>::Type>
-	class InlineArray : public Array<T, InlineAllocator<N * sizeof(T), Alloc> >
-	{
-		typedef InlineAllocator<N * sizeof(T), Alloc> Allocator;
-	public:
-		NX_INLINE InlineArray(const Alloc& alloc = Alloc()) 
-			: Array<T, Allocator>(alloc) 
-		{}
-	};
-}
-
-template <typename T>
-NX_INLINE NvStrideIterator<T> getStrideIterator(CONVEX_DECOMPOSITION::Array<T>& array)
-{
-	return NvStrideIterator<T>(array.begin(), sizeof(T));
-}
-
-template <typename T>
-NX_INLINE NvStrideIterator<const T> getConstStrideIterator(CONVEX_DECOMPOSITION::Array<T>& array)
-{
-	return NvStrideIterator<const T>(array.begin(), sizeof(T));
-}
-
-
-#endif
-
-#ifndef NV_FOUNDATION_BITUTILS_H
-#define NV_FOUNDATION_BITUTILS_H
-
-namespace CONVEX_DECOMPOSITION
-{
-	NX_INLINE NxU32 bitCount32(NxU32 v)
-	{
-		// from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-		NxU32 const w = v - ((v >> 1) & 0x55555555);
-		NxU32 const x = (w & 0x33333333) + ((w >> 2) & 0x33333333);
-		return ((x + (x >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
-	}
-
-	/*!
-	Return the index of the highest set bit. Or 0 if no bits are set.
-	*/
-	NX_INLINE NxU32 highestSetBit32(NxU32 v)
-	{
-		for(NxU32 j = 32; j-- > 0;)
-		{
-			if(v&(1<<j))
-				return j;
-		}
-		return 0;
-	}
-
-	NX_INLINE bool isPowerOfTwo(NxU32 x)
-	{
-		return x!=0 && (x & x-1) == 0;
-	}
-
-	// "Next Largest Power of 2
-	// Given a binary integer value x, the next largest power of 2 can be computed by a SWAR algorithm
-	// that recursively "folds" the upper bits into the lower bits. This process yields a bit vector with
-	// the same most significant 1 as x, but all 1's below it. Adding 1 to that value yields the next
-	// largest power of 2. For a 32-bit value:"
-	NX_INLINE NxU32 nextPowerOfTwo(NxU32 x)
-	{
-		x |= (x >> 1);
-		x |= (x >> 2);
-		x |= (x >> 4);
-		x |= (x >> 8);
-		x |= (x >> 16);
-		return x+1;
-	}
-
-	// Helper function to approximate log2 of an integer value (assumes that the input is actually power of two)
-	NX_INLINE NxU32 ilog2(NxU32 num)
-	{
-		for (NxU32 i=0; i<32; i++)
-		{
-			num >>= 1;
-			if (num == 0) return i;
-		}
-
-		NX_ASSERT(0);
-		return (NxU32)-1;
-	}
-
-	NX_INLINE int intChop(const NxF32& f)
-	{
-		NxI32 a			= *reinterpret_cast<const NxI32*>(&f);			// take bit pattern of float into a register
-		NxI32 sign		= (a>>31);										// sign = 0xFFFFFFFF if original value is negative, 0 if positive
-		NxI32 mantissa	= (a&((1<<23)-1))|(1<<23);						// extract mantissa and add the hidden bit
-		NxI32 exponent	= ((a&0x7fffffff)>>23)-127;						// extract the exponent
-		NxI32 r			= ((NxU32)(mantissa)<<8)>>(31-exponent);		// ((1<<exponent)*mantissa)>>24 -- (we know that mantissa > (1<<24))
-		return ((r ^ (sign)) - sign ) &~ (exponent>>31);				// add original sign. If exponent was negative, make return value 0.
-	}
-
-	NX_INLINE int intFloor(const NxF32& f)
-	{
-		NxI32 a			= *reinterpret_cast<const NxI32*>(&f);									// take bit pattern of float into a register
-		NxI32 sign		= (a>>31);																// sign = 0xFFFFFFFF if original value is negative, 0 if positive
-		a&=0x7fffffff;																			// we don't need the sign any more
-		NxI32 exponent	= (a>>23)-127;															// extract the exponent
-		NxI32 expsign   = ~(exponent>>31);														// 0xFFFFFFFF if exponent is positive, 0 otherwise
-		NxI32 imask		= ( (1<<(31-(exponent))))-1;											// mask for true integer values
-		NxI32 mantissa	= (a&((1<<23)-1));														// extract mantissa (without the hidden bit)
-		NxI32 r			= ((NxU32)(mantissa|(1<<23))<<8)>>(31-exponent);						// ((1<<exponent)*(mantissa|hidden bit))>>24 -- (we know that mantissa > (1<<24))
-		r = ((r & expsign) ^ (sign)) + ((!((mantissa<<8)&imask)&(expsign^((a-1)>>31)))&sign);	// if (fabs(value)<1.0) value = 0; copy sign; if (value < 0 && value==(int)(value)) value++;
-		return r;
-	}
-
-	NX_INLINE int intCeil(const NxF32& f)
-	{
-		NxI32 a			= *reinterpret_cast<const NxI32*>(&f) ^ 0x80000000;						// take bit pattern of float into a register
-		NxI32 sign		= (a>>31);																// sign = 0xFFFFFFFF if original value is negative, 0 if positive
-		a&=0x7fffffff;																			// we don't need the sign any more
-		NxI32 exponent	= (a>>23)-127;															// extract the exponent
-		NxI32 expsign   = ~(exponent>>31);														// 0xFFFFFFFF if exponent is positive, 0 otherwise
-		NxI32 imask		= ( (1<<(31-(exponent))))-1;											// mask for true integer values
-		NxI32 mantissa	= (a&((1<<23)-1));														// extract mantissa (without the hidden bit)
-		NxI32 r			= ((NxU32)(mantissa|(1<<23))<<8)>>(31-exponent);						// ((1<<exponent)*(mantissa|hidden bit))>>24 -- (we know that mantissa > (1<<24))
-		r = ((r & expsign) ^ (sign)) + ((!((mantissa<<8)&imask)&(expsign^((a-1)>>31)))&sign);	// if (fabs(value)<1.0) value = 0; copy sign; if (value < 0 && value==(int)(value)) value++;
-		return -r;
-	}
-
-}
-
-#endif
-
-#ifndef NV_FOUNDATION_HASHFUNCTION_H
-#define NV_FOUNDATION_HASHFUNCTION_H
-
-/*!
-Central definition of hash functions
-*/
-
-namespace CONVEX_DECOMPOSITION
-{
-	// Hash functions
-	template<class T>
-	NxU32 hash(const T& key)
-	{
-		return (NxU32)key;
-	}
-
-	// Thomas Wang's 32 bit mix
-	// http://www.cris.com/~Ttwang/tech/inthash.htm
-	template<>
-	NX_INLINE NxU32 hash<NxU32>(const NxU32& key)
-	{
-		NxU32 k = key;
-		k += ~(k << 15);
-		k ^= (k >> 10);
-		k += (k << 3);
-		k ^= (k >> 6);
-		k += ~(k << 11);
-		k ^= (k >> 16);
-		return (NxU32)k;
-	}
-
-	template<>
-	NX_INLINE NxU32 hash<NxI32>(const NxI32& key)
-	{
-		return hash((NxU32)key);
-	}
-
-	// Thomas Wang's 64 bit mix
-	// http://www.cris.com/~Ttwang/tech/inthash.htm
-	template<>
-	NX_INLINE NxU32 hash<NxU64>(const NxU64& key)
-	{
-		NxU64 k = key;
-		k += ~(k << 32);
-		k ^= (k >> 22);
-		k += ~(k << 13);
-		k ^= (k >> 8);
-		k += (k << 3);
-		k ^= (k >> 15);
-		k += ~(k << 27);
-		k ^= (k >> 31);
-		return (NxU32)k;
-	}
-
-	// Helper for pointer hashing
-	template<int size>
-	NxU32 PointerHash(const void* ptr);
-
-	template<>
-	NX_INLINE NxU32 PointerHash<4>(const void* ptr)
-	{
-		return hash<NxU32>(static_cast<NxU32>(reinterpret_cast<size_t>(ptr)));
-	}
-
-
-	template<>
-	NX_INLINE NxU32 PointerHash<8>(const void* ptr)
-	{
-		return hash<NxU64>(reinterpret_cast<size_t>(ptr));
-	}
-
-	// Hash function for pointers
-	template<class T>
-	NX_INLINE NxU32 hash(T* key)
-	{
-		return PointerHash<sizeof(const void*)>(key);
-	}
-
-	// Hash function object for pointers
-	template <class T>
-	struct PointerHashFunctor
-	{
-		NxU32 operator()(const T* t) const
-		{
-			return PointerHash<sizeof(T*)>(t);
-		}
-		bool operator()(const T* t0, const T* t1) const
-		{
-			return t0 == t1;
-		}
-	};
-
-	/*
-	--------------------------------------------------------------------
-	lookup2.c, by Bob Jenkins, December 1996, Public Domain.
-	--------------------------------------------------------------------
-	--------------------------------------------------------------------
-	mix -- mix 3 32-bit values reversibly.
-	For every delta with one or two bit set, and the deltas of all three
-	high bits or all three low bits, whether the original value of a,b,c
-	is almost all zero or is uniformly distributed,
-	* If mix() is run forward or backward, at least 32 bits in a,b,c
-	have at least 1/4 probability of changing.
-	* If mix() is run forward, every bit of c will change between 1/3 and
-	2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
-	mix() was built out of 36 single-cycle latency instructions in a 
-	structure that could supported 2x parallelism, like so:
-	a -= b; 
-	a -= c; x = (c>>13);
-	b -= c; a ^= x;
-	b -= a; x = (a<<8);
-	c -= a; b ^= x;
-	c -= b; x = (b>>13);
-	...
-	Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
-	of that parallelism.  They've also turned some of those single-cycle
-	latency instructions into multi-cycle latency instructions.  Still,
-	this is the fastest good hash I could find.  There were about 2^^68
-	to choose from.  I only looked at a billion or so.
-	--------------------------------------------------------------------
-	*/
-	NX_INLINE NxU32 hashMix(NxU32 &a, NxU32 &b, NxU32 &c)
-	{
-		a -= b; a -= c; a ^= (c>>13);
-		b -= c; b -= a; b ^= (a<<8);
-		c -= a; c -= b; c ^= (b>>13);
-		a -= b; a -= c; a ^= (c>>12);
-		b -= c; b -= a; b ^= (a<<16);
-		c -= a; c -= b; c ^= (b>>5);
-		a -= b; a -= c; a ^= (c>>3);
-		b -= c; b -= a; b ^= (a<<10);
-		c -= a; c -= b; c ^= (b>>15);
-	}
-
-	NX_INLINE NxU32 hash(const NxU32 *k, NxU32 length)
-	{
-		NxU32 a,b,c,len;
-
-		/* Set up the internal state */
-		len = length;
-		a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-		c = 0;           /* the previous hash value */
-
-		/*---------------------------------------- handle most of the key */
-		while (len >= 3)
-		{
-			a += k[0];
-			b += k[1];
-			c += k[2];
-			hashMix(a,b,c);
-			k += 3; 
-			len -= 3;
-		}
-
-		/*-------------------------------------- handle the last 2 ub4's */
-		c += length;
-		switch(len)              /* all the case statements fall through */
-		{
-			/* c is reserved for the length */
-		case 2 : b+=k[1];
-		case 1 : a+=k[0];
-			/* case 0: nothing left to add */
-		}
-		hashMix(a,b,c);
-		/*-------------------------------------------- report the result */
-		return c;
-	}
-
-	template <class Key>
-	class Hash
-	{
-	public:
-		NxU32 operator()(const Key &k) const { return hash<Key>(k); }
-		bool operator()(const Key &k0, const Key &k1) const { return k0 == k1; }
-	};
-
-	class NvStringHash
-	{
-	public:
-		NxU32 operator()(const char *string) const
-		{
-			// "DJB" string hash 
-			NxU32 h = 5381;
-			for(const char *ptr = string; *ptr; ptr++)
-				h = ((h<<5)+h)^*ptr;
-			return h;
-		}
-		bool operator()(const char* string0, const char* string1) const
-		{
-			return !strcmp(string0, string1);
-		}
-	};
-}
-
-#endif
-
-
-#ifndef NV_FOUNDATION_HASHINTERNALS
-#define NV_FOUNDATION_HASHINTERNALS
-
-
-#pragma warning(push)
-#pragma warning(disable:4127 4512) // disable the 'conditoinal expression is constant' warning message
-
-namespace CONVEX_DECOMPOSITION
-{
-	namespace Internal
-	{
-		template <class Entry,
-				  class Key,
-				  class HashFn,
-				  class GetKey,
-				  class Allocator,
-				  bool compacting>
-		class HashBase
-		{
-		public:
-			typedef Entry EntryType;
-
-			HashBase(NxU32 initialTableSize = 64, float loadFactor = 0.75f):
-			mLoadFactor(loadFactor),
-				mFreeList((NxU32)EOL),
-				mTimestamp(0),
-				mSize(0),
-				mEntries(Allocator(NV_DEBUG_EXP("hashBaseEntries"))),
-				mNext(Allocator(NV_DEBUG_EXP("hashBaseNext"))),
-				mHash(Allocator(NV_DEBUG_EXP("hashBaseHash")))
-			{
-				if(initialTableSize)
-					reserveInternal(initialTableSize);
-			}
-
-			~HashBase()
-			{
-				for(NxU32 i = 0;i<mHash.size();i++)
-				{				
-					for(NxU32 j = mHash[i]; j != EOL; j = mNext[j])
-						mEntries[j].~Entry();
-				}
-			}
-
-			static const int EOL = 0xffffffff;
-
-			NX_INLINE Entry *create(const Key &k, bool &exists)
-			{
-				NxU32 h=0;
-				if(mHash.size())
-				{
-					h = hash(k);
-					NxU32 index = mHash[h];
-					while(index!=EOL && !HashFn()(GetKey()(mEntries[index]), k))
-						index = mNext[index];
-					exists = index!=EOL;
-					if(exists)
-						return &mEntries[index];
-				}
-
-				if(freeListEmpty())
-				{
-					grow();
-					h = hash(k);
-				}
-
-				NxU32 entryIndex = freeListGetNext();
-
-				mNext[entryIndex] = mHash[h];
-				mHash[h] = entryIndex;
-
-				mSize++;
-				mTimestamp++;
-
-				return &mEntries[entryIndex];
-			}
-
-			NX_INLINE const Entry *find(const Key &k) const
-			{
-				if(!mHash.size())
-					return NULL;
-
-				NxU32 h = hash(k);
-				NxU32 index = mHash[h];
-				while(index!=EOL && !HashFn()(GetKey()(mEntries[index]), k))
-					index = mNext[index];
-				return index != EOL ? &mEntries[index]:0;
-			}
-
-			NX_INLINE bool erase(const Key &k)
-			{
-				if(!mHash.size())
-					return false;
-
-				NxU32 h = hash(k);
-				NxU32 *ptr = &mHash[h];
-				while(*ptr!=EOL && !HashFn()(GetKey()(mEntries[*ptr]), k))
-					ptr = &mNext[*ptr];
-
-				if(*ptr == EOL)
-					return false;
-
-				NxU32 index = *ptr;
-				*ptr = mNext[index];
-
-				mEntries[index].~Entry();
-
-				mSize--;
-				mTimestamp++;
-
-				if(compacting && index!=mSize)
-					replaceWithLast(index);
-
-				freeListAdd(index);
-
-				return true;
-			}
-
-			NX_INLINE NxU32 size() const
-			{ 
-				return mSize; 
-			}
-
-			void clear()
-			{
-				if(!mHash.size())
-					return;
-
-				for(NxU32 i = 0;i<mHash.size();i++)
-					mHash[i] = (NxU32)EOL;
-				for(NxU32 i = 0;i<mEntries.size()-1;i++)
-					mNext[i] = i+1;
-				mNext[mEntries.size()-1] = (NxU32)EOL;
-				mFreeList = 0;
-				mSize = 0;
-			}
-
-			void reserve(NxU32 size)
-			{
-				if(size>mHash.size())
-					reserveInternal(size);
-			}
-
-			NX_INLINE const Entry *getEntries() const
-			{
-				return &mEntries[0];
-			}
-
-		private:
-
-			// free list management - if we're coalescing, then we use mFreeList to hold
-			// the top of the free list and it should always be equal to size(). Otherwise,
-			// we build a free list in the next() pointers.
-
-			NX_INLINE void freeListAdd(NxU32 index)
-			{
-				if(compacting)
-				{
-					mFreeList--;
-					NX_ASSERT(mFreeList == mSize);
-				}
-				else
-				{
-					mNext[index] = mFreeList;
-					mFreeList = index;
-				}
-			}
-
-			NX_INLINE void freeListAdd(NxU32 start, NxU32 end)
-			{
-				if(!compacting)
-				{
-					for(NxU32 i = start; i<end-1; i++)	// add the new entries to the free list
-						mNext[i] = i+1;
-					mNext[end-1] = (NxU32)EOL;
-				}
-				mFreeList = start;
-			}
-
-			NX_INLINE NxU32 freeListGetNext()
-			{
-				NX_ASSERT(!freeListEmpty());
-				if(compacting)
-				{
-					NX_ASSERT(mFreeList == mSize);
-					return mFreeList++;
-				}
-				else
-				{
-					NxU32 entryIndex = mFreeList;
-					mFreeList = mNext[mFreeList];
-					return entryIndex;
-				}
-			}
-
-			NX_INLINE bool freeListEmpty()
-			{
-				if(compacting)
-					return mSize == mEntries.size();
-				else
-					return mFreeList == EOL;
-			}
-
-			NX_INLINE void replaceWithLast(NxU32 index)
-			{
-				new(&mEntries[index])Entry(mEntries[mSize]);
-				mEntries[mSize].~Entry();
-				mNext[index] = mNext[mSize];
-
-				NxU32 h = hash(GetKey()(mEntries[index]));
-				NxU32 *ptr;
-				for(ptr = &mHash[h]; *ptr!=mSize; ptr = &mNext[*ptr])
-					NX_ASSERT(*ptr!=EOL);
-				*ptr = index;
-			}
-
-
-			NX_INLINE NxU32 hash(const Key &k) const
-			{
-				return HashFn()(k)&(mHash.size()-1);
-			}
-
-			void reserveInternal(NxU32 size)
-			{
-				size = nextPowerOfTwo(size);
-				// resize the hash and reset
-				mHash.resize(size);
-				for(NxU32 i=0;i<mHash.size();i++)
-					mHash[i] = (NxU32)EOL;
-
-				NX_ASSERT(!(mHash.size()&(mHash.size()-1)));
-
-				NxU32 oldSize = mEntries.size();
-				NxU32 newSize = NxU32(float(mHash.size())*mLoadFactor);
-
-				mEntries.resize(newSize);
-				mNext.resize(newSize);
-
-				freeListAdd(oldSize,newSize);
-
-				// rehash all the existing entries
-				for(NxU32 i=0;i<oldSize;i++)
-				{
-					NxU32 h = hash(GetKey()(mEntries[i]));
-					mNext[i] = mHash[h];
-					mHash[h] = i;
-				}
-			}
-
-			void grow()
-			{
-				NX_ASSERT(mFreeList == EOL || compacting && mSize == mEntries.size());
-
-				NxU32 size = mHash.size()==0 ? 16 : mHash.size()*2;
-				reserve(size);
-			}
-
-
-			Array<Entry, Allocator>	mEntries;
-			Array<NxU32, Allocator>	mNext;
-			Array<NxU32, Allocator>	mHash;
-			float					mLoadFactor;
-			NxU32					mFreeList;
-			NxU32					mTimestamp;
-			NxU32					mSize;
-
-			friend class Iter;
-
-		public:
-			class Iter
-			{
-			public:
-				NX_INLINE Iter(HashBase &b): mBase(b), mTimestamp(b.mTimestamp), mBucket(0), mEntry((NxU32)b.EOL)
-				{
-					if(mBase.mEntries.size()>0)
-					{
-						mEntry = mBase.mHash[0];
-						skip();
-					}
-				}
-
-				NX_INLINE void check()				{ NX_ASSERT(mTimestamp == mBase.mTimestamp);	}
-				NX_INLINE Entry operator*()			{ check(); return mBase.mEntries[mEntry];		}
-				NX_INLINE Entry *operator->()		{ check(); return &mBase.mEntries[mEntry];		}
-				NX_INLINE Iter operator++()			{ check(); advance(); return *this;				}
-				NX_INLINE Iter operator++(int)		{ check(); Iter i = *this; advance(); return i;	}
-				NX_INLINE bool done()				{ check(); return mEntry == mBase.EOL;			}
-
-			private:
-				NX_INLINE void advance()			{	mEntry = mBase.mNext[mEntry]; skip();		}
-				NX_INLINE void skip()
-				{
-					while(mEntry==mBase.EOL) 
-					{ 
-						if(++mBucket == mBase.mHash.size())
-							break;
-						mEntry = mBase.mHash[mBucket];
-					}
-				}
-
-				NxU32 mBucket;
-				NxU32 mEntry;
-				NxU32 mTimestamp;
-				HashBase &mBase;
-			};
-		};
-
-		template <class Key, 
-				  class HashFn, 
-				  class Allocator = Allocator,
-				  bool Coalesced = false>
-		class HashSetBase
-		{ 
-		public:
-			struct GetKey { NX_INLINE const Key &operator()(const Key &e) {	return e; }	};
-
-			typedef HashBase<Key, Key, HashFn, GetKey, Allocator, Coalesced> BaseMap;
-			typedef typename BaseMap::Iter Iterator;
-
-			HashSetBase(NxU32 initialTableSize = 64, 
-						float loadFactor = 0.75f):	mBase(initialTableSize,loadFactor)	{}
-
-			bool insert(const Key &k)
-			{
-				bool exists;
-				Key *e = mBase.create(k,exists);
-				if(!exists)
-					new(e)Key(k);
-				return !exists;
-			}
-
-			NX_INLINE bool		contains(const Key &k)	const	{	return mBase.find(k)!=0;		}
-			NX_INLINE bool		erase(const Key &k)				{	return mBase.erase(k);			}
-			NX_INLINE NxU32		size()					const	{	return mBase.size();			}
-			NX_INLINE void		reserve(NxU32 size)				{	mBase.reserve(size);			}
-			NX_INLINE void		clear()							{	mBase.clear();					}
-		protected:
-			BaseMap mBase;
-
-		};
-
-		template <class Key, 
-			  class Value,
-			  class HashFn, 
-			  class Allocator = Allocator >
-
-		class HashMapBase
-		{ 
-		public:
-			typedef Pair<const Key,Value> Entry;
-			struct GetKey { NX_INLINE const Key &operator()(const Entry &e) {	return e.first; }	};
-			typedef HashBase<Pair<const Key,Value>, Key, HashFn, GetKey, Allocator, true> BaseMap;
-			typedef typename BaseMap::Iter Iterator;
-
-			HashMapBase(NxU32 initialTableSize = 64, float loadFactor = 0.75f):	mBase(initialTableSize,loadFactor)	{}
-
-			bool insert(const Key &k, const Value &v)
-			{
-				bool exists;
-				Entry *e = mBase.create(k,exists);
-				if(!exists)
-					new(e)Entry(k,v);
-				return !exists;
-			}
-
-			Value &operator [](const Key &k)
-			{
-				bool exists;
-				Entry *e = mBase.create(k, exists);
-				if(!exists)
-					new(e)Entry(k,Value());
-		
-				return e->second;
-			}
-
-			NX_INLINE const Entry *	find(const Key &k)		const	{	return mBase.find(k);			}
-			NX_INLINE bool			erase(const Key &k)				{	return mBase.erase(k);			}
-			NX_INLINE NxU32			size()					const	{	return mBase.size();			}
-			NX_INLINE Iterator		getIterator()					{	return Iterator(mBase);			}
-			NX_INLINE void			reserve(NxU32 size)				{	mBase.reserve(size);			}
-			NX_INLINE void			clear()							{	mBase.clear();					}
-
-		protected:
-			BaseMap mBase;
-		};
-
-	}
-}
-
-#pragma warning(pop)
-
-#endif
-
-#ifndef NV_FOUNDATION_HASHMAP
-#define NV_FOUNDATION_HASHMAP
-
-
-// TODO: make this doxy-format
-//
-// This header defines two hash maps. Hash maps
-// * support custom initial table sizes (rounded up internally to power-of-2)
-// * support custom static allocator objects
-// * auto-resize, based on a load factor (i.e. a 64-entry .75 load factor hash will resize 
-//                                        when the 49th element is inserted)
-// * are based on open hashing
-// * have O(1) contains, erase
-//
-// Maps have STL-like copying semantics, and properly initialize and destruct copies of objects
-// 
-// There are two forms of map: coalesced and uncoalesced. Coalesced maps keep the entries in the
-// initial segment of an array, so are fast to iterate over; however deletion is approximately
-// twice as expensive.
-//
-// HashMap<T>:
-//		bool			insert(const Key &k, const Value &v)	O(1) amortized (exponential resize policy)
-//		Value &			operator[](const Key &k)				O(1) for existing objects, else O(1) amortized
-//		const Entry *	find(const Key &k);						O(1)
-//		bool			erase(const T &k);						O(1)
-//		NxU32			size();									constant
-//		void			reserve(NxU32 size);					O(MAX(currentOccupancy,size))
-//		void			clear();								O(currentOccupancy) (with zero constant for objects without destructors) 
-//      Iterator		getIterator();
-//
-// operator[] creates an entry if one does not exist, initializing with the default constructor.
-// CoalescedHashMap<T> does not support getInterator, but instead supports
-// 		const Key *getEntries();
-//
-// Use of iterators:
-// 
-// for(HashMap::Iterator iter = test.getIterator(); !iter.done(); ++iter)
-//			myFunction(iter->first, iter->second);
-
-namespace CONVEX_DECOMPOSITION
-{
-	template <class Key,
-			  class Value,
-			  class HashFn = Hash<Key>,
-			  class Allocator = Allocator >
-	class HashMap: public Internal::HashMapBase<Key, Value, HashFn, Allocator>
-	{
-	public:
-
-		typedef Internal::HashMapBase<Key, Value, HashFn, Allocator> HashMapBase;
-		typedef typename HashMapBase::Iterator Iterator;
-
-		HashMap(NxU32 initialTableSize = 64, float loadFactor = 0.75f):	HashMapBase(initialTableSize,loadFactor) {}
-		Iterator getIterator() { return Iterator(HashMapBase::mBase); }
-	};
-
-	template <class Key, 
-			  class Value,
-			  class HashFn = Hash<Key>, 
-			  class Allocator = Allocator >
-	class CoalescedHashMap: public Internal::HashMapBase<Key, Value, HashFn, Allocator>
-	{
-		typedef Internal::HashMapBase<Key, Value, HashFn, Allocator> HashMapBase;
-
-		CoalescedHashMap(NxU32 initialTableSize = 64, float loadFactor = 0.75f): HashMapBase(initialTableSize,loadFactor){}
-		const Key *getEntries() const { return HashMapBase::mBase.getEntries(); }
-	};
-
-}
-#endif
-
-#endif

+ 0 - 783
Engine/lib/convexDecomp/NvMeshIslandGeneration.cpp

@@ -1,783 +0,0 @@
-/*
-
-NvMeshIslandGeneration.cpp : This code snippet walks the toplogy of a triangle mesh and detects the set of unique connected 'mesh islands'
-
-*/
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#pragma warning(disable:4100 4288)
-#include "NvMeshIslandGeneration.h"
-#include "NvFloatMath.h"
-#include "NvHashMap.h"
-
-namespace CONVEX_DECOMPOSITION
-{
-
-typedef CONVEX_DECOMPOSITION::Array< NxU32 > NxU32Vector;
-
-class Edge;
-class Island;
-
-class AABB
-{
-public:
-  NxF32 mMin[3];
-  NxF32 mMax[3];
-};
-
-class Triangle
-{
-public:
-  Triangle(void)
-  {
-    mConsumed = false;
-    mIsland   = 0;
-    mHandle   = 0;
-    mId       = 0;
-  }
-
-  void minmax(const NxF32 *p,AABB &box)
-  {
-    if ( p[0] < box.mMin[0] ) box.mMin[0] = p[0];
-    if ( p[1] < box.mMin[1] ) box.mMin[1] = p[1];
-    if ( p[2] < box.mMin[2] ) box.mMin[2] = p[2];
-
-    if ( p[0] > box.mMax[0] ) box.mMax[0] = p[0];
-    if ( p[1] > box.mMax[1] ) box.mMax[1] = p[1];
-    if ( p[2] > box.mMax[2] ) box.mMax[2] = p[2];
-  }
-
-  void minmax(const NxF64 *p,AABB &box)
-  {
-    if ( (NxF32)p[0] < box.mMin[0] ) box.mMin[0] = (NxF32)p[0];
-    if ( (NxF32)p[1] < box.mMin[1] ) box.mMin[1] = (NxF32)p[1];
-    if ( (NxF32)p[2] < box.mMin[2] ) box.mMin[2] = (NxF32)p[2];
-    if ( (NxF32)p[0] > box.mMax[0] ) box.mMax[0] = (NxF32)p[0];
-    if ( (NxF32)p[1] > box.mMax[1] ) box.mMax[1] = (NxF32)p[1];
-    if ( (NxF32)p[2] > box.mMax[2] ) box.mMax[2] = (NxF32)p[2];
-  }
-
-  void buildBox(const NxF32 *vertices_f,const NxF64 *vertices_d,NxU32 id);
-
-  void render(NxU32 color)
-  {
-//    gRenderDebug->DebugBound(&mBox.mMin[0],&mBox.mMax[0],color,60.0f);
-  }
-
-  void getTriangle(NxF32 *tri,const NxF32 *vertices_f,const NxF64 *vertices_d);
-
-  NxU32    mHandle;
-  bool      mConsumed;
-  Edge     *mEdges[3];
-  Island   *mIsland;   // identifies which island it is a member of
-  unsigned short  mId;
-  AABB      mBox;
-};
-
-
-class Edge
-{
-public:
-  Edge(void)
-  {
-    mI1 = 0;
-    mI2 = 0;
-    mHash = 0;
-    mNext = 0;
-    mPrevious = 0;
-    mParent = 0;
-    mNextTriangleEdge = 0;
-  }
-
-  void init(NxU32 i1,NxU32 i2,Triangle *parent)
-  {
-    assert( i1 < 65536 );
-    assert( i2 < 65536 );
-
-    mI1 = i1;
-    mI2 = i2;
-    mHash        = (i2<<16)|i1;
-    mReverseHash = (i1<<16)|i2;
-    mNext = 0;
-    mPrevious = 0;
-    mParent = parent;
-  }
-
-  NxU32  mI1;
-  NxU32  mI2;
-  NxU32  mHash;
-  NxU32  mReverseHash;
-
-  Edge     *mNext;
-  Edge     *mPrevious;
-  Edge     *mNextTriangleEdge;
-  Triangle *mParent;
-};
-
-typedef CONVEX_DECOMPOSITION::HashMap< NxU32, Edge * > EdgeHashMap;
-typedef CONVEX_DECOMPOSITION::Array< Triangle * > TriangleVector;
-
-class EdgeCheck
-{
-public:
-  EdgeCheck(Triangle *t,Edge *e)
-  {
-    mTriangle = t;
-    mEdge     = e;
-  }
-
-  Triangle  *mTriangle;
-  Edge      *mEdge;
-};
-
-typedef CONVEX_DECOMPOSITION::Array< EdgeCheck > EdgeCheckQueue;
-
-class Island 
-{
-public:
-  Island(Triangle *t,Triangle *root)
-  {
-    mVerticesFloat = 0;
-    mVerticesDouble = 0;
-    t->mIsland = this;
-    mTriangles.pushBack(t);
-    mCoplanar = false;
-    fm_initMinMax(mMin,mMax);
-  }
-
-  void add(Triangle *t,Triangle *root)
-  {
-    t->mIsland = this;
-    mTriangles.pushBack(t);
-  }
-
-  void merge(Island &isl)
-  {
-    TriangleVector::Iterator i;
-    for (i=isl.mTriangles.begin(); i!=isl.mTriangles.end(); ++i)
-    {
-      Triangle *t = (*i);
-      mTriangles.pushBack(t);
-    }
-    isl.mTriangles.clear();
-  }
-
-  bool isTouching(Island *isl,const NxF32 *vertices_f,const NxF64 *vertices_d)
-  {
-    bool ret = false;
-
-    mVerticesFloat = vertices_f;
-    mVerticesDouble = vertices_d;
-
-    if ( fm_intersectAABB(mMin,mMax,isl->mMin,isl->mMax) ) // if the two islands has an intersecting AABB
-    {
-      // todo..
-    }
-
-
-    return ret;
-  }
-
-
-  void SAP_DeletePair(const void* object0, const void* object1, void* user_data, void* pair_user_data)
-  {
-  }
-
-  void render(NxU32 color)
-  {
-//    gRenderDebug->DebugBound(mMin,mMax,color,60.0f);
-    TriangleVector::Iterator i;
-    for (i=mTriangles.begin(); i!=mTriangles.end(); ++i)
-    {
-      Triangle *t = (*i);
-      t->render(color);
-    }
-  }
-
-
-  const NxF64   *mVerticesDouble;
-  const NxF32    *mVerticesFloat;
-
-  NxF32           mMin[3];
-  NxF32           mMax[3];
-  bool            mCoplanar; // marked as co-planar..
-  TriangleVector  mTriangles;
-};
-
-
-void Triangle::getTriangle(NxF32 *tri,const NxF32 *vertices_f,const NxF64 *vertices_d)
-{
-  NxU32 i1 = mEdges[0]->mI1;
-  NxU32 i2 = mEdges[1]->mI1;
-  NxU32 i3 = mEdges[2]->mI1;
-  if ( vertices_f )
-  {
-    const NxF32 *p1 = &vertices_f[i1*3];
-    const NxF32 *p2 = &vertices_f[i2*3];
-    const NxF32 *p3 = &vertices_f[i3*3];
-    fm_copy3(p1,tri);
-    fm_copy3(p2,tri+3);
-    fm_copy3(p3,tri+6);
-  }
-  else
-  {
-    const NxF64 *p1 = &vertices_d[i1*3];
-    const NxF64 *p2 = &vertices_d[i2*3];
-    const NxF64 *p3 = &vertices_d[i3*3];
-    fm_doubleToFloat3(p1,tri);
-    fm_doubleToFloat3(p2,tri+3);
-    fm_doubleToFloat3(p3,tri+6);
-  }
-}
-
-void Triangle::buildBox(const NxF32 *vertices_f,const NxF64 *vertices_d,NxU32 id)
-{
-  mId = (unsigned short)id;
-  NxU32 i1 = mEdges[0]->mI1;
-  NxU32 i2 = mEdges[1]->mI1;
-  NxU32 i3 = mEdges[2]->mI1;
-
-  if ( vertices_f )
-  {
-    const NxF32 *p1 = &vertices_f[i1*3];
-    const NxF32 *p2 = &vertices_f[i2*3];
-    const NxF32 *p3 = &vertices_f[i3*3];
-    mBox.mMin[0] = p1[0];
-    mBox.mMin[1] = p1[1];
-    mBox.mMin[2] = p1[2];
-    mBox.mMax[0] = p1[0];
-    mBox.mMax[1] = p1[1];
-    mBox.mMax[2] = p1[2];
-    minmax(p2,mBox);
-    minmax(p3,mBox);
-  }
-  else
-  {
-    const NxF64 *p1 = &vertices_d[i1*3];
-    const NxF64 *p2 = &vertices_d[i2*3];
-    const NxF64 *p3 = &vertices_d[i3*3];
-    mBox.mMin[0] = (NxF32)p1[0];
-    mBox.mMin[1] = (NxF32)p1[1];
-    mBox.mMin[2] = (NxF32)p1[2];
-    mBox.mMax[0] = (NxF32)p1[0];
-    mBox.mMax[1] = (NxF32)p1[1];
-    mBox.mMax[2] = (NxF32)p1[2];
-    minmax(p2,mBox);
-    minmax(p3,mBox);
-  }
-
-  assert(mIsland);
-  if ( mIsland )
-  {
-    if ( mBox.mMin[0] < mIsland->mMin[0] ) mIsland->mMin[0] = mBox.mMin[0];
-    if ( mBox.mMin[1] < mIsland->mMin[1] ) mIsland->mMin[1] = mBox.mMin[1];
-    if ( mBox.mMin[2] < mIsland->mMin[2] ) mIsland->mMin[2] = mBox.mMin[2];
-
-    if ( mBox.mMax[0] > mIsland->mMax[0] ) mIsland->mMax[0] = mBox.mMax[0];
-    if ( mBox.mMax[1] > mIsland->mMax[1] ) mIsland->mMax[1] = mBox.mMax[1];
-    if ( mBox.mMax[2] > mIsland->mMax[2] ) mIsland->mMax[2] = mBox.mMax[2];
-  }
-
-}
-
-
-typedef CONVEX_DECOMPOSITION::Array< Island * > IslandVector;
-
-class MyMeshIslandGeneration : public MeshIslandGeneration
-{
-public:
-  MyMeshIslandGeneration(void)
-  {
-    mTriangles = 0;
-    mEdges     = 0;
-    mVerticesDouble = 0;
-    mVerticesFloat  = 0;
-  }
-
-  ~MyMeshIslandGeneration(void)
-  {
-    reset();
-  }
-
-  void reset(void)
-  {
-    delete []mTriangles;
-    delete []mEdges;
-    mTriangles = 0;
-    mEdges = 0;
-    mTriangleEdges.clear();
-    IslandVector::Iterator i;
-    for (i=mIslands.begin(); i!=mIslands.end(); ++i)
-    {
-      Island *_i = (*i);
-      delete _i;
-    }
-    mIslands.clear();
-  }
-
-  NxU32 islandGenerate(NxU32 tcount,const NxU32 *indices,const NxF64 *vertices)
-  {
-    mVerticesDouble = vertices;
-    mVerticesFloat  = 0;
-    return islandGenerate(tcount,indices);
-  }
-
-  NxU32 islandGenerate(NxU32 tcount,const NxU32 *indices,const NxF32 *vertices)
-  {
-    mVerticesDouble = 0;
-    mVerticesFloat  = vertices;
-    return islandGenerate(tcount,indices);
-  }
-
-  NxU32 islandGenerate(NxU32 tcount,const NxU32 *indices)
-  {
-    NxU32 ret = 0;
-
-    reset();
-
-    mTcount = tcount;
-    mTriangles = new Triangle[tcount];
-    mEdges     = new Edge[tcount*3];
-    Edge *e = mEdges;
-
-    for (NxU32 i=0; i<tcount; i++)
-    {
-      Triangle &t = mTriangles[i];
-
-      NxU32 i1 = *indices++;
-      NxU32 i2 = *indices++;
-      NxU32 i3 = *indices++;
-
-      t.mEdges[0] = e;
-      t.mEdges[1] = e+1;
-      t.mEdges[2] = e+2;
-
-      e = addEdge(e,&t,i1,i2);
-      e = addEdge(e,&t,i2,i3);
-      e = addEdge(e,&t,i3,i1);
-
-    }
-
-    // while there are still edges to process...
-    while ( mTriangleEdges.size() != 0 )
-    {
-
-      EdgeHashMap::Iterator iter = mTriangleEdges.getIterator();
-
-      Triangle *t = iter->second->mParent;
-
-      Island *i = new Island(t,mTriangles);  // the initial triangle...
-      removeTriangle(t); // remove this triangle from the triangle-edges hashmap
-
-      mIslands.pushBack(i);
-
-      // now keep adding to this island until we can no longer walk any shared edges..
-      addEdgeCheck(t,t->mEdges[0]);
-      addEdgeCheck(t,t->mEdges[1]);
-      addEdgeCheck(t,t->mEdges[2]);
-
-      while ( !mEdgeCheckQueue.empty() )
-      {
-
-        EdgeCheck e = mEdgeCheckQueue.popBack();
-
-        // Process all triangles which share this edge
-        Edge *edge = locateSharedEdge(e.mEdge);
-
-        while ( edge )
-        {
-          Triangle *t = edge->mParent;
-          assert(!t->mConsumed);
-          i->add(t,mTriangles);
-          removeTriangle(t); // remove this triangle from the triangle-edges hashmap
-
-          // now keep adding to this island until we can no longer walk any shared edges..
-
-          if ( edge != t->mEdges[0] )
-          {
-            addEdgeCheck(t,t->mEdges[0]);
-          }
-
-          if ( edge != t->mEdges[1] )
-          {
-            addEdgeCheck(t,t->mEdges[1]);
-          }
-
-          if ( edge != t->mEdges[2] )
-          {
-            addEdgeCheck(t,t->mEdges[2]);
-          }
-
-          edge = locateSharedEdge(e.mEdge); // keep going until all shared edges have been processed!
-        }
-
-      }
-    }
-
-    ret = (NxU32)mIslands.size();
-
-    return ret;
-  }
-
-  NxU32 *   getIsland(NxU32 index,NxU32 &otcount)
-  {
-    NxU32 *ret  = 0;
-
-    mIndices.clear();
-    if ( index < mIslands.size() )
-    {
-      Island *i = mIslands[index];
-      otcount = (NxU32)i->mTriangles.size();
-      TriangleVector::Iterator j;
-      for (j=i->mTriangles.begin(); j!=i->mTriangles.end(); ++j)
-      {
-        Triangle *t = (*j);
-        mIndices.pushBack(t->mEdges[0]->mI1);
-        mIndices.pushBack(t->mEdges[1]->mI1);
-        mIndices.pushBack(t->mEdges[2]->mI1);
-      }
-      ret = &mIndices[0];
-    }
-
-    return ret;
-  }
-
-private:
-
-  void removeTriangle(Triangle *t)
-  {
-    t->mConsumed = true;
-
-    removeEdge(t->mEdges[0]);
-    removeEdge(t->mEdges[1]);
-    removeEdge(t->mEdges[2]);
-
-  }
-
-
-  Edge * locateSharedEdge(Edge *e)
-  {
-    Edge *ret = 0;
-
-    const EdgeHashMap::Entry *found = mTriangleEdges.find( e->mReverseHash );
-    if ( found != NULL )
-    {
-      ret = (*found).second;
-      assert( ret->mHash == e->mReverseHash );
-    }
-    return ret;
-  }
-
-  void removeEdge(Edge *e)
-  {
-    const EdgeHashMap::Entry *found = mTriangleEdges.find( e->mHash );
-
-    if ( found != NULL )
-    {
-      Edge *prev = 0;
-      Edge *scan = (*found).second;
-      while ( scan && scan != e )
-      {
-        prev = scan;
-        scan = scan->mNextTriangleEdge;
-      }
-
-      if ( scan )
-      {
-        if ( prev == 0 )
-        {
-          if ( scan->mNextTriangleEdge )
-          {
-            mTriangleEdges.erase(e->mHash);
-            mTriangleEdges[e->mHash] = scan->mNextTriangleEdge;
-          }
-          else
-          {
-            mTriangleEdges.erase(e->mHash); // no more polygons have an edge here
-          }
-        }
-        else
-        {
-          prev->mNextTriangleEdge = scan->mNextTriangleEdge;
-        }
-      }
-      else
-      {
-        assert(0);
-      }
-    }
-    else
-    {
-      assert(0); // impossible!
-    }
-  }
-
-
-  Edge * addEdge(Edge *e,Triangle *t,NxU32 i1,NxU32 i2)
-  {
-
-    e->init(i1,i2,t);
-
-    const EdgeHashMap::Entry *found = mTriangleEdges.find(e->mHash);
-    if ( found == NULL )
-    {
-      mTriangleEdges[ e->mHash ] = e;
-    }
-    else
-    {
-      Edge *pn = (*found).second;
-      e->mNextTriangleEdge = pn;
-      mTriangleEdges.erase(e->mHash);
-      mTriangleEdges[e->mHash] = e;
-    }
-
-    e++;
-
-    return e;
-  }
-
-  void addEdgeCheck(Triangle *t,Edge *e)
-  {
-    EdgeCheck ec(t,e);
-    mEdgeCheckQueue.pushBack(ec);
-  }
-
-  NxU32 mergeCoplanarIslands(const NxF32 *vertices)
-  {
-    mVerticesFloat = vertices;
-    mVerticesDouble = 0;
-    return mergeCoplanarIslands();
-  }
-
-  NxU32 mergeCoplanarIslands(const NxF64 *vertices)
-  {
-    mVerticesDouble = vertices;
-    mVerticesFloat = 0;
-    return mergeCoplanarIslands();
-  }
-
-  // this island needs to be merged
-  void mergeTouching(Island *isl)
-  {
-    Island *touching = 0;
-
-    IslandVector::Iterator i;
-    for (i=mIslands.begin(); i!=mIslands.end(); ++i)
-    {
-      Island *_i = (*i);
-      if ( !_i->mCoplanar ) // can't merge with coplanar islands!
-      {
-        if ( _i->isTouching(isl,mVerticesFloat,mVerticesDouble) )
-        {
-          touching = _i;
-        }
-      }
-    }
-  }
-
-  NxU32 mergeCoplanarIslands(void)
-  {
-    NxU32  ret = 0;
-
-    if ( !mIslands.empty() )
-    {
-
-
-      NxU32  cp_count  = 0;
-      NxU32  npc_count = 0;
-
-      NxU32  count = (NxU32)mIslands.size();
-
-      for (NxU32 i=0; i<count; i++)
-      {
-
-        NxU32 otcount;
-        const NxU32 *oindices = getIsland(i,otcount);
-
-        if ( otcount )
-        {
-
-          bool isCoplanar;
-
-          if ( mVerticesFloat )
-            isCoplanar = fm_isMeshCoplanar(otcount, oindices, mVerticesFloat, true);
-          else
-            isCoplanar = fm_isMeshCoplanar(otcount, oindices, mVerticesDouble, true);
-
-          if ( isCoplanar )
-          {
-            Island *isl = mIslands[i];
-            isl->mCoplanar = true;
-            cp_count++;
-          }
-          else
-          {
-            npc_count++;
-          }
-        }
-        else
-        {
-          assert(0);
-        }
-      }
-
-      if ( cp_count )
-      {
-        if ( npc_count == 0 ) // all islands are co-planar!
-        {
-          IslandVector temp = mIslands;
-          mIslands.clear();
-          Island *isl = mIslands[0];
-          mIslands.pushBack(isl);
-          for (NxU32 i=1; i<cp_count; i++)
-          {
-            Island *_i = mIslands[i];
-            isl->merge(*_i);
-            delete _i;
-          }
-        }
-        else
-        {
-
-
-          Triangle *t = mTriangles;
-          for (NxU32 i=0; i<mTcount; i++)
-          {
-            t->buildBox(mVerticesFloat,mVerticesDouble,i);
-            t++;
-          }
-
-          IslandVector::Iterator i;
-          for (i=mIslands.begin(); i!=mIslands.end(); ++i)
-          {
-            Island *isl = (*i);
-
-            NxU32 color = 0x00FF00;
-
-            if ( isl->mCoplanar )
-            {
-              color = 0xFFFF00;
-            }
-
-            mergeTouching(isl);
-
-          }
-
-          IslandVector temp = mIslands;
-          mIslands.clear();
-          for (i=temp.begin(); i!=temp.end(); i++)
-          {
-            Island *isl = (*i);
-            if ( isl->mCoplanar )
-            {
-              delete isl; // kill it
-            }
-            else
-            {
-              mIslands.pushBack(isl);
-            }
-          }
-          ret = (NxU32)mIslands.size();
-        }
-      }
-      else
-      {
-        ret = npc_count;
-      }
-    }
-
-
-    return ret;
-  }
-
-  NxU32 mergeTouchingIslands(const NxF32 *vertices)
-  {
-    NxU32 ret = 0;
-
-    return ret;
-  }
-
-  NxU32 mergeTouchingIslands(const NxF64 *vertices)
-  {
-    NxU32 ret = 0;
-
-    return ret;
-  }
-
-  NxU32           mTcount;
-  Triangle        *mTriangles;
-  Edge            *mEdges;
-  EdgeHashMap      mTriangleEdges;
-  IslandVector     mIslands;
-  EdgeCheckQueue   mEdgeCheckQueue;
-  const NxF64    *mVerticesDouble;
-  const NxF32     *mVerticesFloat;
-  NxU32Vector     mIndices;
-};
-
-
-MeshIslandGeneration * createMeshIslandGeneration(void)
-{
-  MyMeshIslandGeneration *mig = new MyMeshIslandGeneration;
-  return static_cast< MeshIslandGeneration *>(mig);
-}
-
-void                   releaseMeshIslandGeneration(MeshIslandGeneration *cm)
-{
-  MyMeshIslandGeneration *mig = static_cast< MyMeshIslandGeneration *>(cm);
-  delete mig;
-}
-
-}; // end of namespace
-

+ 0 - 91
Engine/lib/convexDecomp/NvMeshIslandGeneration.h

@@ -1,91 +0,0 @@
-#ifndef MESH_ISLAND_GENERATION_H
-
-#define MESH_ISLAND_GENERATION_H
-
-/*
-
-NvMeshIslandGeneration.h : This code snippet walks the toplogy of a triangle mesh and detects the set of unique connected 'mesh islands'
-
-*/
-
-
-#include "NvUserMemAlloc.h"
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class MeshIslandGeneration
-{
-public:
-
-  virtual NxU32 islandGenerate(NxU32 tcount,const NxU32 *indices,const NxF32 *vertices) = 0;
-  virtual NxU32 islandGenerate(NxU32 tcount,const NxU32 *indices,const NxF64 *vertices) = 0;
-
-  // sometimes island generation can produce co-planar islands.  Slivers if you will.  If you are passing these islands into a geometric system
-  // that wants to turn them into physical objects, they may not be acceptable.  In this case it may be preferable to merge the co-planar islands with
-  // other islands that it 'touches'.
-  virtual NxU32 mergeCoplanarIslands(const NxF32 *vertices) = 0;
-  virtual NxU32 mergeCoplanarIslands(const NxF64 *vertices) = 0;
-
-  virtual NxU32 mergeTouchingIslands(const NxF32 *vertices) = 0;
-  virtual NxU32 mergeTouchingIslands(const NxF64 *vertices) = 0;
-
-  virtual NxU32 *   getIsland(NxU32 index,NxU32 &tcount) = 0;
-
-
-};
-
-MeshIslandGeneration * createMeshIslandGeneration(void);
-void                   releaseMeshIslandGeneration(MeshIslandGeneration *cm);
-
-}; // end of namespace
-
-#endif

+ 0 - 153
Engine/lib/convexDecomp/NvRayCast.cpp

@@ -1,153 +0,0 @@
-/*
-
-NvRayCast.cpp : A code snippet to cast a ray against a triangle mesh. This implementation does not use any acceleration data structures.  That is a 'to do' item.
-
-*/
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include "NvRayCast.h"
-#include "NvUserMemAlloc.h"
-#include "NvFloatMath.h"
-
-#pragma warning(disable:4100)
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class RayCast : public iRayCast, public Memalloc
-{
-public:
-	RayCast(const NxF32 *vertices,NxU32 tcount,const NxU32 *indices)
-	{
-		mVertices = vertices;
-		mTcount	  = tcount;
-		mIndices  = indices;
-	}
-
-	~RayCast(void)
-	{
-	}
-
-	virtual bool castRay(const NxF32 *orig,const NxF32 *dir,NxF32 *dest,NxF32 *hitNormal)
-	{
-		bool ret = false;
-
-    	NxF32	p2[3];
-
-    	const NxF32 RAY_DIST=50;
-
-    	dest[0] = p2[0] = orig[0]+ dir[0]*RAY_DIST;
-    	dest[1] = p2[1] = orig[1]+ dir[1]*RAY_DIST;
-    	dest[2] = p2[2] = orig[2]+ dir[2]*RAY_DIST;
-
-    	NxF32 nearest = 1e9;
-    	NxU32 near_face=0;
-
-
-    	for (NxU32 i=0; i<mTcount; i++)
-    	{
-    		NxU32 i1 = mIndices[i*3+0];
-    		NxU32 i2 = mIndices[i*3+1];
-    		NxU32 i3 = mIndices[i*3+2];
-
-    		const NxF32 *t1 = &mVertices[i1*3];
-    		const NxF32 *t2 = &mVertices[i2*3];
-    		const NxF32 *t3 = &mVertices[i3*3];
-
-    		NxF32 t;
-    		if ( fm_rayIntersectsTriangle(orig,dir,t1,t2,t3,t) )
-    		{
-    			if ( t < nearest )
-    			{
-    				dest[0] = orig[0]+dir[0]*t;
-    				dest[1] = orig[1]+dir[1]*t;
-    				dest[2] = orig[2]+dir[2]*t;
-    				ret = true;
-    				near_face = i;
-    				nearest = t;
-    			}
-    		}
-    	}
-    	if ( ret )
-    	{
-    		// If the nearest face we hit was back-facing, then reject this cast!
-    		NxU32 i1 = mIndices[near_face*3+0];
-    		NxU32 i2 = mIndices[near_face*3+1];
-    		NxU32 i3 = mIndices[near_face*3+2];
-
-    		const NxF32 *t1 = &mVertices[i1*3];
-    		const NxF32 *t2 = &mVertices[i2*3];
-    		const NxF32 *t3 = &mVertices[i3*3];
-
-    		fm_computePlane(t3,t2,t1,hitNormal);
-    	}
-
-		return ret;
-	}
-private:
-	const	NxF32	*mVertices;
-	NxU32			 mTcount;
-	const   NxU32	*mIndices;
-
-};
-
-
-iRayCast *createRayCast(const NxF32 *vertices,NxU32 tcount,const NxU32 *indices)
-{
-	RayCast *rc = MEMALLOC_NEW(RayCast)(vertices,tcount,indices);
-	return static_cast< iRayCast *>(rc);
-}
-
-void	  releaseRayCast(iRayCast *rc)
-{
-	RayCast *r = static_cast< RayCast *>(rc);
-	delete r;
-}
-
-};
-

+ 0 - 79
Engine/lib/convexDecomp/NvRayCast.h

@@ -1,79 +0,0 @@
-#ifndef NV_RAYCAST_H
-
-#define NV_RAYCAST_H
-
-/*
-
-NvRayCast.h : A code snippet to cast a ray against a triangle mesh. This implementation does not use any acceleration data structures.  That is a 'to do' item.
-
-*/
-
-
-#include "NvSimpleTypes.h"
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class iRayCast
-{
-public:
-	virtual bool castRay(const NxF32 *orig,const NxF32 *dir,NxF32 *hitPoint,NxF32 *hitNormal) = 0;
-protected:
-	virtual ~iRayCast(void) { };
-};
-
-
-iRayCast *createRayCast(const NxF32 *vertices,NxU32 tcount,const NxU32 *indices);
-void	  releaseRayCast(iRayCast *rc);
-
-};
-
-#endif

+ 0 - 713
Engine/lib/convexDecomp/NvRemoveTjunctions.cpp

@@ -1,713 +0,0 @@
-/*
-
-NvRemoveTjunctions.cpp : A code snippet to remove tjunctions from a triangle mesh.  This version is currently disabled as it appears to have a bug.
-
-*/
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#pragma warning(disable:4702)
-#pragma warning(disable:4127) //conditional expression is constant (because _HAS_EXCEPTIONS=0)
-#include <vector>
-#if defined( __APPLE__ ) || defined( __FreeBSD__)
-   #include <ext/hash_map>
-#elif LINUX
-   #include <hash_map>
-#elif _MSC_VER < 1500
-   #include <hash_map>
-#elif _MSC_VER > 1800
-   #include <unordered_map>
-#endif
-#include "NvUserMemAlloc.h"
-#include "NvHashMap.h"
-#include "NvRemoveTjunctions.h"
-#include "NvFloatMath.h"
-#ifdef LINUX
-   #include <climits>
-#endif
-
-#pragma warning(disable:4189)
-
-using namespace CONVEX_DECOMPOSITION;
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class AABB
-{
-public:
-  NxF32 mMin[3];
-  NxF32 mMax[3];
-};
-
-bool gDebug=false;
-NxU32 gCount=0;
-
-typedef CONVEX_DECOMPOSITION::Array< NxU32 > NxU32Vector;
-
-class Triangle
-{
-public:
-  Triangle(void)
-  {
-    mPending = false;
-    mSplit = false;
-    mI1 = mI2 = mI3 = 0xFFFFFFFF;
-    mId = 0;
-  }
-
-  Triangle(NxU32 i1,NxU32 i2,NxU32 i3,const float *vertices,NxU32 id)
-  {
-    mPending = false;
-    init(i1,i2,i3,vertices,id);
-    mSplit = false;
-  }
-
-  void init(NxU32 i1,NxU32 i2,NxU32 i3,const float *vertices,NxU32 id)
-  {
-    mSplit = false;
-    mI1 = i1;
-    mI2 = i2;
-    mI3 = i3;
-    mId = id;
-
-    const float *p1 = &vertices[mI1*3];
-    const float *p2 = &vertices[mI2*3];
-    const float *p3 = &vertices[mI3*3];
-
-    initMinMax(p1,p2,p3);
-  }
-
-  void initMinMax(const float *p1,const float *p2,const float *p3)
-  {
-    fm_copy3(p1,mBmin);
-    fm_copy3(p1,mBmax);
-    fm_minmax(p2,mBmin,mBmax);
-    fm_minmax(p3,mBmin,mBmax);
-  }
-
-  void init(const NxU32 *idx,const float *vertices,NxU32 id)
-  {
-    mSplit = false;
-    mI1 = idx[0];
-    mI2 = idx[1];
-    mI3 = idx[2];
-    mId = id;
-
-    const float *p1 = &vertices[mI1*3];
-    const float *p2 = &vertices[mI2*3];
-    const float *p3 = &vertices[mI3*3];
-
-    initMinMax(p1,p2,p3);
-
-  }
-
-  bool intersects(const float *pos,const float *p1,const float *p2,float epsilon) const
-  {
-    bool ret = false;
-
-    float sect[3];
-    LineSegmentType type;
-
-    float dist = fm_distancePointLineSegment(pos,p1,p2,sect,type,epsilon);
-
-    if ( type == LS_MIDDLE && dist < epsilon )
-    {
-      ret = true;
-    }
-
-    return ret;
-  }
-
-  bool intersects(NxU32 i,const float *vertices,NxU32 &edge,float epsilon) const
-  {
-    bool ret = true;
-
-    const float *pos = &vertices[i*3];
-    const float *p1  = &vertices[mI1*3];
-    const float *p2  = &vertices[mI2*3];
-    const float *p3  = &vertices[mI3*3];
-    if ( intersects(pos,p1,p2,epsilon) )
-    {
-      edge = 0;
-    }
-    else if ( intersects(pos,p2,p3,epsilon) )
-    {
-      edge = 1;
-    }
-    else if ( intersects(pos,p3,p1,epsilon) )
-    {
-      edge = 2;
-    }
-    else
-    {
-      ret = false;
-    }
-    return ret;
-  }
-
-  bool intersects(const Triangle *t,const float *vertices,NxU32 &intersection_index,NxU32 &edge,float epsilon)
-  {
-    bool ret = false;
-
-    if ( fm_intersectAABB(mBmin,mBmax,t->mBmin,t->mBmax) ) // only if the AABB's of the two triangles intersect...
-    {
-
-      if ( t->intersects(mI1,vertices,edge,epsilon) )
-      {
-        intersection_index = mI1;
-        ret = true;
-      }
-
-      if ( t->intersects(mI2,vertices,edge,epsilon) )
-      {
-        intersection_index = mI2;
-        ret = true;
-      }
-
-      if ( t->intersects(mI3,vertices,edge,epsilon) )
-      {
-        intersection_index = mI3;
-        ret = true;
-      }
-
-    }
-
-    return ret;
-  }
-
-  bool    mSplit:1;
-  bool    mPending:1;
-  NxU32   mI1;
-  NxU32   mI2;
-  NxU32   mI3;
-  NxU32   mId;
-  float   mBmin[3];
-  float   mBmax[3];
-};
-
-class RtEdge
-{
-public:
-  RtEdge(void)
-  {
-    mNextEdge = 0;
-    mTriangle = 0;
-    mHash = 0;
-  }
-
-  NxU32 init(Triangle *t,NxU32 i1,NxU32 i2)
-  {
-    mTriangle = t;
-    mNextEdge = 0;
-    NX_ASSERT( i1 < 65536 );
-    NX_ASSERT( i2 < 65536 );
-    if ( i1 < i2 )
-    {
-      mHash = (i1<<16)|i2;
-    }
-    else
-    {
-      mHash = (i2<<16)|i1;
-    }
-    return mHash;
-  }
-  RtEdge     *mNextEdge;
-  Triangle *mTriangle;
-  NxU32    mHash;
-};
-
-
-typedef CONVEX_DECOMPOSITION::Array< Triangle * > TriangleVector;
-typedef CONVEX_DECOMPOSITION::HashMap< NxU32, RtEdge * > EdgeMap;
-
-class MyRemoveTjunctions : public RemoveTjunctions
-{
-public:
-  MyRemoveTjunctions(void)
-  {
-    mInputTriangles = 0;
-    mEdges = 0;
-    mVcount = 0;
-    mVertices = 0;
-    mEdgeCount = 0;
-  }
-  ~MyRemoveTjunctions(void)
-  {
-    release();
-  }
-
-  virtual NxU32 removeTjunctions(RemoveTjunctionsDesc &desc)
-  {
-    NxU32 ret = 0;
-
-	mEpsilon = desc.mEpsilon;
-
-	size_t TcountOut;
-
-    desc.mIndicesOut = removeTjunctions(desc.mVcount, desc.mVertices, desc.mTcount, desc.mIndices, TcountOut, desc.mIds);
-
-#ifdef WIN32
-#	pragma warning(push)
-#	pragma warning(disable:4267)
-#endif
-
-	NX_ASSERT( TcountOut < UINT_MAX );
-	desc.mTcountOut = TcountOut;
-
-#ifdef WIN32
-#	pragma warning(pop)
-#endif
-
-    if ( !mIds.empty() )
-    {
-      desc.mIdsOut = &mIds[0];
-    }
-
-    ret = desc.mTcountOut;
-
-    bool check = ret != desc.mTcount;
-#if 0
-    while ( check )
-    {
-        NxU32 tcount = ret;
-        NxU32 *indices  = new NxU32[tcount*3];
-        NxU32 *ids      = new NxU32[tcount];
-        memcpy(indices,desc.mIndicesOut,sizeof(NxU32)*ret*3);
-        memcpy(ids,desc.mIdsOut,sizeof(NxU32)*ret);
-        desc.mIndicesOut = removeTjunctions(desc.mVcount, desc.mVertices, tcount, indices, desc.mTcountOut, ids );
-        if ( !mIds.empty() )
-        {
-          desc.mIdsOut = &mIds[0];
-        }
-        ret = desc.mTcountOut;
-        delete []indices;
-        delete []ids;
-        check = ret != tcount;
-    }
-#endif
-    return ret;
-  }
-
-  RtEdge * addEdge(Triangle *t,RtEdge *e,NxU32 i1,NxU32 i2)
-  {
-    NxU32 hash = e->init(t,i1,i2);
-    const EdgeMap::Entry *found = mEdgeMap.find(hash);
-    if ( found == NULL )
-    {
-      mEdgeMap[hash] = e;
-    }
-    else
-    {
-      RtEdge *old_edge = (*found).second;
-      e->mNextEdge = old_edge;
-      mEdgeMap.erase(hash);
-      mEdgeMap[hash] = e;
-    }
-    e++;
-    mEdgeCount++;
-    return e;
-  }
-
-  RtEdge * init(Triangle *t,const NxU32 *indices,const float *vertices,RtEdge *e,NxU32 id)
-  {
-    t->init(indices,vertices,id);
-    e = addEdge(t,e,t->mI1,t->mI2);
-    e = addEdge(t,e,t->mI2,t->mI3);
-    e = addEdge(t,e,t->mI3,t->mI1);
-    return e;
-  }
-
-  void release(void)
-  {
-    mIds.clear();
-    mEdgeMap.clear();
-    mIndices.clear();
-    mSplit.clear();
-    delete []mInputTriangles;
-    delete []mEdges;
-    mInputTriangles = 0;
-    mEdges = 0;
-    mVcount = 0;
-    mVertices = 0;
-    mEdgeCount = 0;
-
-  }
-
-  virtual NxU32 * removeTjunctions(NxU32 vcount,
-                                    const float *vertices,
-                                    size_t tcount,
-                                    const NxU32 *indices,
-                                    size_t &tcount_out,
-                                    const NxU32 * ids)
-  {
-    NxU32 *ret  = 0;
-
-    release();
-
-    mVcount   = vcount;
-    mVertices = vertices;
-    mTcount   = (NxU32)tcount;
-    tcount_out = 0;
-
-    mTcount         = (NxU32)tcount;
-    mMaxTcount      = (NxU32)tcount*2;
-    mInputTriangles = new Triangle[mMaxTcount];
-    Triangle *t     = mInputTriangles;
-
-    mEdges          = new RtEdge[mMaxTcount*3];
-    mEdgeCount      = 0;
-
-    NxU32 id = 0;
-
-    RtEdge *e = mEdges;
-    for (NxU32 i=0; i<tcount; i++)
-    {
-      if ( ids ) id = *ids++;
-      e =init(t,indices,vertices,e,id);
-      indices+=3;
-      t++;
-    }
-
-    {
-      TriangleVector test;
-      for (EdgeMap::Iterator i = mEdgeMap.getIterator(); !i.done(); ++i)
-      {
-        RtEdge *e = (*i).second;
-        if ( e->mNextEdge == 0 ) // open edge!
-        {
-          Triangle *t = e->mTriangle;
-          if ( !t->mPending )
-          {
-            test.pushBack(t);
-            t->mPending = true;
-          }
-        }
-      }
-
-      if ( !test.empty() )
-      {
-        TriangleVector::Iterator i;
-        for (i=test.begin(); i!=test.end(); ++i)
-        {
-          Triangle *t = (*i);
-          locateIntersection(t);
-        }
-      }
-
-    }
-
-    while ( !mSplit.empty() )
-    {
-      TriangleVector scan = mSplit;
-      mSplit.clear();
-      TriangleVector::Iterator i;
-      for (i=scan.begin(); i!=scan.end(); ++i)
-      {
-        Triangle *t = (*i);
-        locateIntersection(t);
-      }
-    }
-
-
-    mIndices.clear();
-    mIds.clear();
-
-    t = mInputTriangles;
-    for (NxU32 i=0; i<mTcount; i++)
-    {
-      mIndices.pushBack(t->mI1);
-      mIndices.pushBack(t->mI2);
-      mIndices.pushBack(t->mI3);
-      mIds.pushBack(t->mId);
-      t++;
-    }
-
-
-   mEdgeMap.clear();
-
-   delete []mEdges;
-   mEdges = 0;
-   delete []mInputTriangles;
-   mInputTriangles = 0;
-   tcount_out = mIndices.size()/3;
-   ret = tcount_out ? &mIndices[0] : 0;
-#ifdef _DEBUG
-   if ( ret )
-   {
-	   const NxU32 *scan = ret;
-	   for (NxU32 i=0; i<tcount_out; i++)
-	   {
-		   NxU32 i1 = scan[0];
-		   NxU32 i2 = scan[1];
-		   NxU32 i3 = scan[2];
-		   assert( i1 != i2 && i1 != i3 && i2 != i3 );
-		   scan+=3;
-	   }
-   }
-#endif
-    return ret;
-  }
-
-  Triangle * locateIntersection(Triangle *scan,Triangle *t)
-  {
-    Triangle *ret = 0;
-
-    NxU32 t1 = (NxU32)(scan-mInputTriangles);
-    NxU32 t2 = (NxU32)(t-mInputTriangles);
-
-    NX_ASSERT( t1 < mTcount );
-    NX_ASSERT( t2 < mTcount );
-
-    NX_ASSERT( scan->mI1 < mVcount );
-    NX_ASSERT( scan->mI2 < mVcount );
-    NX_ASSERT( scan->mI3 < mVcount );
-
-    NX_ASSERT( t->mI1 < mVcount );
-    NX_ASSERT( t->mI2 < mVcount );
-    NX_ASSERT( t->mI3 < mVcount );
-
-
-    NxU32 intersection_index;
-    NxU32 edge;
-
-    if ( scan != t && scan->intersects(t,mVertices,intersection_index,edge,mEpsilon) )
-    {
-
-	  if ( t->mI1 == intersection_index || t->mI2 == intersection_index || t->mI3 == intersection_index )
-	  {
-	  }
-	  else
-	  {
-		  // here is where it intersects!
-		  NxU32 i1,i2,i3;
-		  NxU32 j1,j2,j3;
-		  NxU32 id = t->mId;
-
-		  switch ( edge )
-		  {
-			case 0:
-			  i1 = t->mI1;
-			  i2 = intersection_index;
-			  i3 = t->mI3;
-			  j1 = intersection_index;
-			  j2 = t->mI2;
-			  j3 = t->mI3;
-			  break;
-			case 1:
-			  i1 = t->mI2;
-			  i2 = intersection_index;
-			  i3 = t->mI1;
-			  j1 = intersection_index;
-			  j2 = t->mI3;
-			  j3 = t->mI1;
-			  break;
-			case 2:
-			  i1 = t->mI3;
-			  i2 = intersection_index;
-			  i3 = t->mI2;
-			  j1 = intersection_index;
-			  j2 = t->mI1;
-			  j3 = t->mI2;
-			  break;
-			default:
-			  NX_ASSERT(0);
-			  i1 = i2 = i3 = 0;
-			  j1 = j2 = j3 = 0;
-			  break;
-		  }
-
-		  if ( mTcount < mMaxTcount )
-		  {
-			t->init(i1,i2,i3,mVertices,id);
-			Triangle *newt = &mInputTriangles[mTcount];
-			newt->init(j1,j2,j3,mVertices,id);
-			mTcount++;
-			t->mSplit = true;
-			newt->mSplit = true;
-
-			mSplit.pushBack(t);
-			mSplit.pushBack(newt);
-			ret = scan;
-		  }
-	    }
-    }
-    return ret;
-  }
-
-  Triangle * testIntersection(Triangle *scan,Triangle *t)
-  {
-    Triangle *ret = 0;
-
-    NxU32 t1 = (NxU32)(scan-mInputTriangles);
-    NxU32 t2 = (NxU32)(t-mInputTriangles);
-
-    NX_ASSERT( t1 < mTcount );
-    NX_ASSERT( t2 < mTcount );
-
-    NX_ASSERT( scan->mI1 < mVcount );
-    NX_ASSERT( scan->mI2 < mVcount );
-    NX_ASSERT( scan->mI3 < mVcount );
-
-    NX_ASSERT( t->mI1 < mVcount );
-    NX_ASSERT( t->mI2 < mVcount );
-    NX_ASSERT( t->mI3 < mVcount );
-
-
-    NxU32 intersection_index;
-    NxU32 edge;
-
-    assert( scan != t );
-
-    if ( scan->intersects(t,mVertices,intersection_index,edge,mEpsilon) )
-    {
-      // here is where it intersects!
-      NxU32 i1,i2,i3;
-      NxU32 j1,j2,j3;
-      NxU32 id = t->mId;
-
-      switch ( edge )
-      {
-        case 0:
-          i1 = t->mI1;
-          i2 = intersection_index;
-          i3 = t->mI3;
-          j1 = intersection_index;
-          j2 = t->mI2;
-          j3 = t->mI3;
-          break;
-        case 1:
-          i1 = t->mI2;
-          i2 = intersection_index;
-          i3 = t->mI1;
-          j1 = intersection_index;
-          j2 = t->mI3;
-          j3 = t->mI1;
-          break;
-        case 2:
-          i1 = t->mI3;
-          i2 = intersection_index;
-          i3 = t->mI2;
-          j1 = intersection_index;
-          j2 = t->mI1;
-          j3 = t->mI2;
-          break;
-        default:
-          NX_ASSERT(0);
-          i1 = i2 = i3 = 0;
-          j1 = j2 = j3 = 0;
-          break;
-      }
-
-      if ( mTcount < mMaxTcount )
-      {
-        t->init(i1,i2,i3,mVertices,id);
-        Triangle *newt = &mInputTriangles[mTcount];
-        newt->init(j1,j2,j3,mVertices,id);
-        mTcount++;
-        t->mSplit = true;
-        newt->mSplit = true;
-
-        mSplit.pushBack(t);
-        mSplit.pushBack(newt);
-        ret = scan;
-      }
-    }
-    return ret;
-  }
-
-  Triangle * locateIntersection(Triangle *t)
-  {
-    Triangle *ret = 0;
-
-    Triangle *scan = mInputTriangles;
-
-    for (NxU32 i=0; i<mTcount; i++)
-    {
-      ret = locateIntersection(scan,t);
-      if ( ret )
-        break;
-      scan++;
-    }
-    return ret;
-  }
-
-
-  Triangle             *mInputTriangles;
-  NxU32                mVcount;
-  NxU32                mMaxTcount;
-  NxU32                mTcount;
-  const float          *mVertices;
-  NxU32Vector          mIndices;
-  NxU32Vector          mIds;
-  TriangleVector        mSplit;
-  NxU32                mEdgeCount;
-  RtEdge                 *mEdges;
-  EdgeMap               mEdgeMap;
-  NxF32                 mEpsilon;
-};
-
-RemoveTjunctions * createRemoveTjunctions(void)
-{
-  MyRemoveTjunctions *m = new MyRemoveTjunctions;
-  return static_cast< RemoveTjunctions *>(m);
-}
-
-void               releaseRemoveTjunctions(RemoveTjunctions *tj)
-{
-  MyRemoveTjunctions *m = static_cast< MyRemoveTjunctions *>(tj);
-  delete m;
-}
-
-
-}; // end of namespace
-

+ 0 - 110
Engine/lib/convexDecomp/NvRemoveTjunctions.h

@@ -1,110 +0,0 @@
-#ifndef REMOVE_TJUNCTIONS_H
-
-#define REMOVE_TJUNCTIONS_H
-
-/*
-
-NvRemoveTjunctions.h : A code snippet to remove tjunctions from a triangle mesh.  This version is currently disabled as it appears to have a bug.
-
-*/
-
-
-#include "NvUserMemAlloc.h"
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class RemoveTjunctionsDesc
-{
-public:
-  RemoveTjunctionsDesc(void)
-  {
-    mVcount = 0;
-    mVertices = 0;
-    mTcount = 0;
-    mIndices = 0;
-    mIds = 0;
-    mTcountOut = 0;
-    mIndicesOut = 0;
-    mIdsOut = 0;
-	mEpsilon = 0.00000001f;
-  }
-
-// input
-  NxF32        mEpsilon;
-  NxF32        mDistanceEpsilon;
-  NxU32        mVcount;  		// input vertice count.
-  const NxF32 *mVertices; 		// input vertices as NxF32s or...
-  NxU32        mTcount;    		// number of input triangles.
-  const NxU32 *mIndices;   		// triangle indices.
-  const NxU32 *mIds;       		// optional triangle Id numbers.
-// output..
-  NxU32        mTcountOut;  // number of output triangles.
-  const NxU32 *mIndicesOut; // output triangle indices
-  const NxU32 *mIdsOut;     // output retained id numbers.
-};
-
-// Removes t-junctions from an input mesh.  Does not generate any new data points, but may possible produce additional triangles and new indices.
-class RemoveTjunctions
-{
-public:
-
-   virtual NxU32 removeTjunctions(RemoveTjunctionsDesc &desc) =0; // returns number of triangles output and the descriptor is filled with the appropriate results.
-
-
-};
-
-RemoveTjunctions * createRemoveTjunctions(void);
-void               releaseRemoveTjunctions(RemoveTjunctions *tj);
-
-}; // end of namespace
-
-#endif

+ 0 - 189
Engine/lib/convexDecomp/NvSimpleTypes.h

@@ -1,189 +0,0 @@
-#ifndef NV_SIMPLE_TYPES_H
-
-#define NV_SIMPLE_TYPES_H
-
-/*
-
-NvSimpleTypes.h : Defines basic data types for integers and floats.
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-
-#if defined(__APPLE__)
-   #include <sys/malloc.h>
-#else
-#if defined( __FreeBSD__)
-   #include <stdlib.h>
-#else
-   #include <malloc.h>
-#endif
-#endif
-#include <assert.h>
-
-#if defined(__APPLE__) || defined(__CELLOS_LV2__) || defined(LINUX)
-
-#ifndef stricmp
-#define stricmp(a, b) strcasecmp((a), (b))
-#define _stricmp(a, b) strcasecmp((a), (b))
-#endif
-
-#endif
-
-#if defined(WIN32)
-	typedef __int64				NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned __int64	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#elif defined(LINUX)
-	typedef long long			NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned long long	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#elif defined(__APPLE__)
-	typedef long long			NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned long long	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#elif defined(__FreeBSD__)
-	typedef long long			NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned long long	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#elif defined(__CELLOS_LV2__)
-	typedef long long			NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned long long	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#elif defined(_XBOX)
-	typedef __int64				NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned __int64	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#elif defined(__PPCGEKKO__)
-	typedef long long			NxI64;
-	typedef signed int			NxI32;
-	typedef signed short		NxI16;
-	typedef signed char			NxI8;
-
-	typedef unsigned long long	NxU64;
-	typedef unsigned int		NxU32;
-	typedef unsigned short		NxU16;
-	typedef unsigned char		NxU8;
-
-	typedef float				NxF32;
-	typedef double				NxF64;
-
-#else
-	#error Unknown platform!
-#endif
-
-#ifndef NX_INLINE
-#define NX_INLINE inline
-#define NX_ASSERT assert
-#endif
-
-
-#endif

+ 0 - 224
Engine/lib/convexDecomp/NvSplitMesh.cpp

@@ -1,224 +0,0 @@
-/*
-
-NvSplitMesh.cpp : A code snippet to split a mesh into two seperate meshes based on a slicing plane.
-
-*/
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#define SHOW_DEBUG 0
-#if SHOW_DEBUG
-#include "RenderDebug.h"
-#endif
-
-#include "NvSplitMesh.h"
-#include "NvFloatMath.h"
-#include "NvHashMap.h"
-
-#pragma warning(disable:4100)
-
-namespace CONVEX_DECOMPOSITION
-{
-
-
-typedef Array< NxU32 > NxU32Array;
-
-class SplitMesh : public iSplitMesh, public Memalloc
-{
-public:
-	SplitMesh(void)
-	{
-		mLeftVertices = 0;
-		mRightVertices = 0;
-	}
-
-	~SplitMesh(void)
-	{
-		reset();
-	}
-
-	void reset(void)
-	{
-		if ( mLeftVertices )
-		{
-			fm_releaseVertexIndex(mLeftVertices);
-			mLeftVertices = 0;
-		}
-		if ( mRightVertices )
-		{
-			fm_releaseVertexIndex(mRightVertices);
-			mRightVertices = 0;
-		}
-		mLeftIndices.clear();
-		mRightIndices.clear();
-	}
-
-
-	virtual void splitMesh(const NvSplitMesh &source,NvSplitMesh &leftMesh,NvSplitMesh &rightMesh,const NxF32 *planeEquation,NxF32 precision)
-	{
-		reset();
-
-		mLeftVertices 	= fm_createVertexIndex(precision,false);
-		mRightVertices 	= fm_createVertexIndex(precision,false);
-
-		for (NxU32 i=0; i<source.mTcount; i++)
-		{
-			NxU32 i1 = source.mIndices[i*3+0];
-			NxU32 i2 = source.mIndices[i*3+1];
-			NxU32 i3 = source.mIndices[i*3+2];
-
-			const NxF32 *p1 = &source.mVertices[i1*3];
-			const NxF32 *p2 = &source.mVertices[i2*3];
-			const NxF32 *p3 = &source.mVertices[i3*3];
-
-			NxF32 source_tri[3*3];
-
-			source_tri[0] = p1[0];
-			source_tri[1] = p1[1];
-			source_tri[2] = p1[2];
-
-			source_tri[3] = p2[0];
-			source_tri[4] = p2[1];
-			source_tri[5] = p2[2];
-
-			source_tri[6] = p3[0];
-			source_tri[7] = p3[1];
-			source_tri[8] = p3[2];
-
-			NxF32 	front_tri[3*5];
-			NxF32 	back_tri[3*5];
-
-			NxU32	fcount,bcount;
-
-			fm_planeTriIntersection(planeEquation,source_tri,sizeof(NxF32)*3,0.000001f,front_tri,fcount,back_tri,bcount);
-			bool newPos;
-
-			if ( fcount )
-			{
-				NxU32 i1,i2,i3,i4;
-				i1 = mLeftVertices->getIndex( &front_tri[0],newPos );
-				i2 = mLeftVertices->getIndex( &front_tri[3],newPos );
-				i3 = mLeftVertices->getIndex( &front_tri[6],newPos );
-				mLeftIndices.pushBack(i1);
-				mLeftIndices.pushBack(i2);
-				mLeftIndices.pushBack(i3);
-				#if SHOW_DEBUG
-				NVSHARE::gRenderDebug->setCurrentColor(0xFFFFFF);
-				NVSHARE::gRenderDebug->DebugTri(&front_tri[0],&front_tri[3],&front_tri[6]);
-				#endif
-				if ( fcount == 4 )
-				{
-					i4 = mLeftVertices->getIndex( &front_tri[9],newPos );
-					mLeftIndices.pushBack(i1);
-					mLeftIndices.pushBack(i3);
-					mLeftIndices.pushBack(i4);
- 							#if SHOW_DEBUG
-					NVSHARE::gRenderDebug->setCurrentColor(0xFFFF00);
- 							NVSHARE::gRenderDebug->DebugTri(&front_tri[0],&front_tri[6],&front_tri[9]);
- 							#endif
-				}
-			}
-			if ( bcount )
-			{
-				NxU32 i1,i2,i3,i4;
-				i1 = mRightVertices->getIndex( &back_tri[0],newPos );
-				i2 = mRightVertices->getIndex( &back_tri[3],newPos );
-				i3 = mRightVertices->getIndex( &back_tri[6],newPos );
-				mRightIndices.pushBack(i1);
-				mRightIndices.pushBack(i2);
-				mRightIndices.pushBack(i3);
-				#if SHOW_DEBUG
-				NVSHARE::gRenderDebug->setCurrentColor(0xFF8080);
-				NVSHARE::gRenderDebug->DebugTri(&back_tri[0],&back_tri[3],&back_tri[6]);
-				#endif
-				if ( bcount == 4 )
-				{
-					i4 = mRightVertices->getIndex( &back_tri[9],newPos );
-					mRightIndices.pushBack(i1);
-					mRightIndices.pushBack(i3);
-					mRightIndices.pushBack(i4);
- 							#if SHOW_DEBUG
-					NVSHARE::gRenderDebug->setCurrentColor(0x00FF00);
- 							NVSHARE::gRenderDebug->DebugTri(&back_tri[0],&back_tri[6],&back_tri[9]);
- 							#endif
-				}
-			}
-		}
-
-		leftMesh.mVcount 	= mLeftVertices->getVcount();
-		leftMesh.mVertices 	= mLeftVertices->getVerticesFloat();
-		leftMesh.mTcount	= mLeftIndices.size()/3;
-		leftMesh.mIndices	= &mLeftIndices[0];
-
-		rightMesh.mVcount	= mRightVertices->getVcount();
-		rightMesh.mVertices	= mRightVertices->getVerticesFloat();
-		rightMesh.mTcount	= mRightIndices.size()/3;
-		rightMesh.mIndices	= &mRightIndices[0];
-
-	}
-
-
-	fm_VertexIndex	*mLeftVertices;
-	fm_VertexIndex	*mRightVertices;
- 	NxU32Array		 mLeftIndices;
- 	NxU32Array		 mRightIndices;
-};
-
-iSplitMesh *createSplitMesh(void)
-{
-	SplitMesh *sm = MEMALLOC_NEW(SplitMesh);
-	return static_cast< iSplitMesh *>(sm);
-}
-
-void        releaseSplitMesh(iSplitMesh *splitMesh)
-{
-	SplitMesh *sm = static_cast< SplitMesh *>(splitMesh);
-	delete sm;
-}
-
-}; // end of namespace

+ 0 - 88
Engine/lib/convexDecomp/NvSplitMesh.h

@@ -1,88 +0,0 @@
-#ifndef NV_SPLIT_MESH_H
-
-#define NV_SPLIT_MESH_H
-
-/*
-
-NvSplitMesh.h : A code snippet to split a mesh into two seperate meshes based on a slicing plane.
-
-*/
-
-
-#include "NvUserMemAlloc.h"
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-namespace CONVEX_DECOMPOSITION
-{
-
-struct NvSplitMesh
-{
-	NxU32	mVcount;
-	const NxF32	*mVertices;
-	NxU32	mTcount;
-	const NxU32	*mIndices;
-};
-
-
-class iSplitMesh
-{
-public:
-	virtual void splitMesh(const NvSplitMesh &source,NvSplitMesh &leftMesh,NvSplitMesh &rightMesh,const NxF32 *planeEquation,NxF32 precision) = 0;
-protected:
-	virtual ~iSplitMesh(void) { };
-};
-
-iSplitMesh *createSplitMesh(void);
-void        releaseSplitMesh(iSplitMesh *splitMesh);
-
-
-}; // end of namespace
-
-#endif

+ 0 - 3464
Engine/lib/convexDecomp/NvStanHull.cpp

@@ -1,3464 +0,0 @@
-
-/*
-
-NvStanHull.cpp : A convex hull generator written by Stan Melax
-
-*/
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <math.h>
-#include <float.h>
-
-
-#include <stdarg.h>
-#include <setjmp.h>
-
-#include "NvStanHull.h"
-
-namespace CONVEX_DECOMPOSITION
-{
-
-//*****************************************************
-//*** DARRAY.H
-//*****************************************************
-
-template <class Type> class ArrayRet;
-template <class Type> class Array
-{
-	public:
-				Array(NxI32 s=0);
-				Array(Array<Type> &array);
-				Array(ArrayRet<Type> &array);
-				~Array();
-	void		allocate(NxI32 s);
-	void		SetSize(NxI32 s);
-	void		Pack();
-	Type&		Add(Type);
-	void		AddUnique(Type);
-	NxI32 		Contains(Type);
-	void		Insert(Type,NxI32);
-	NxI32			IndexOf(Type);
-	void		Remove(Type);
-	void		DelIndex(NxI32 i);
-	Type *		element;
-	NxI32			count;
-	NxI32			array_size;
-	const Type	&operator[](NxI32 i) const { assert(i>=0 && i<count);  return element[i]; }
-	Type		&operator[](NxI32 i)  { assert(i>=0 && i<count);  return element[i]; }
-	Type		&Pop() { assert(count); count--;  return element[count]; }
-	Array<Type> &operator=(Array<Type> &array);
-	Array<Type> &operator=(ArrayRet<Type> &array);
-	// operator ArrayRet<Type> &() { return *(ArrayRet<Type> *)this;} // this worked but i suspect could be dangerous
-};
-
-template <class Type> class ArrayRet:public Array<Type>
-{
-};
-
-template <class Type> Array<Type>::Array(NxI32 s)
-{
-	count=0;
-	array_size = 0;
-	element = NULL;
-	if(s)
-	{
-		allocate(s);
-	}
-}
-
-
-template <class Type> Array<Type>::Array(Array<Type> &array)
-{
-	count=0;
-	array_size = 0;
-	element = NULL;
-	for(NxI32 i=0;i<array.count;i++)
-	{
-		Add(array[i]);
-	}
-}
-
-
-template <class Type> Array<Type>::Array(ArrayRet<Type> &array)
-{
-	*this = array;
-}
-template <class Type> Array<Type> &Array<Type>::operator=(ArrayRet<Type> &array)
-{
-	count=array.count;
-	array_size = array.array_size;
-	element = array.element;
-	array.element=NULL;
-	array.count=0;
-	array.array_size=0;
-	return *this;
-}
-
-
-template <class Type> Array<Type> &Array<Type>::operator=(Array<Type> &array)
-{
-	count=0;
-	for(NxI32 i=0;i<array.count;i++)
-	{
-		Add(array[i]);
-	}
-	return *this;
-}
-
-template <class Type> Array<Type>::~Array()
-{
-	if (element != NULL)
-	{
-	  MEMALLOC_FREE(element);
-	}
-	count=0;array_size=0;element=NULL;
-}
-
-template <class Type> void Array<Type>::allocate(NxI32 s)
-{
-	assert(s>0);
-	assert(s>=count);
-	Type *old = element;
-	array_size =s;
-	element = (Type *) MEMALLOC_MALLOC( sizeof(Type)*array_size );
-	assert(element);
-	for(NxI32 i=0;i<count;i++)
-	{
-		element[i]=old[i];
-	}
-	if(old)
-	{
-		MEMALLOC_FREE(old);
-	}
-}
-
-template <class Type> void Array<Type>::SetSize(NxI32 s)
-{
-	if(s==0)
-	{
-		if(element)
-		{
-			MEMALLOC_FREE(element);
-			element = NULL;
-		}
- 	  array_size = s;
-	}
-	else
-	{
-		allocate(s);
-	}
-	count=s;
-}
-
-template <class Type> void Array<Type>::Pack()
-{
-	allocate(count);
-}
-
-template <class Type> Type& Array<Type>::Add(Type t)
-{
-	assert(count<=array_size);
-	if(count==array_size)
-	{
-		allocate((array_size)?array_size *2:16);
-	}
-	element[count++] = t;
-	return element[count-1];
-}
-
-template <class Type> NxI32 Array<Type>::Contains(Type t)
-{
-	NxI32 i;
-	NxI32 found=0;
-	for(i=0;i<count;i++)
-	{
-		if(element[i] == t) found++;
-	}
-	return found;
-}
-
-template <class Type> void Array<Type>::AddUnique(Type t)
-{
-	if(!Contains(t)) Add(t);
-}
-
-
-template <class Type> void Array<Type>::DelIndex(NxI32 i)
-{
-	assert(i<count);
-	count--;
-	while(i<count)
-	{
-		element[i] = element[i+1];
-		i++;
-	}
-}
-
-template <class Type> void Array<Type>::Remove(Type t)
-{
-	NxI32 i;
-	for(i=0;i<count;i++)
-	{
-		if(element[i] == t)
-		{
-			break;
-		}
-	}
-	assert(i<count); // assert object t is in the array.
-	DelIndex(i);
-	for(i=0;i<count;i++)
-	{
-		assert(element[i] != t);
-	}
-}
-
-template <class Type> void Array<Type>::Insert(Type t,NxI32 k)
-{
-	NxI32 i=count;
-	Add(t); // to allocate space
-	while(i>k)
-	{
-		element[i]=element[i-1];
-		i--;
-	}
-	assert(i==k);
-	element[k]=t;
-}
-
-
-template <class Type> NxI32 Array<Type>::IndexOf(Type t)
-{
-	NxI32 i;
-	for(i=0;i<count;i++)
-	{
-		if(element[i] == t)
-		{
-			return i;
-		}
-	}
-	assert(0);
-	return -1;
-}
-
-//****************************************************
-//** VECMATH.H
-//****************************************************
-#define PI (3.1415926535897932384626433832795f)
-
-#define DEG2RAD (PI / 180.0f)
-#define RAD2DEG (180.0f / PI)
-#define SQRT_OF_2 (1.4142135f)
-#define OFFSET(Class,Member)  (((char*) (&(((Class*)NULL)-> Member )))- ((char*)NULL))
-
-
-
-NxI32    argmin(NxF32 a[],NxI32 n);
-NxF32  sqr(NxF32 a);
-NxF32  clampf(NxF32 a) ;
-NxF32  Round(NxF32 a,NxF32 precision);
-NxF32  Interpolate(const NxF32 &f0,const NxF32 &f1,NxF32 alpha) ;
-
-template <class T>
-void Swap(T &a,T &b)
-{
-	T tmp = a;
-	a=b;
-	b=tmp;
-}
-
-
-
-template <class T>
-T Max(const T &a,const T &b)
-{
-	return (a>b)?a:b;
-}
-
-template <class T>
-T Min(const T &a,const T &b)
-{
-	return (a<b)?a:b;
-}
-
-//----------------------------------
-
-class int3  : public Memalloc
-{
-public:
-	NxI32 x,y,z;
-	int3(){};
-	int3(NxI32 _x,NxI32 _y, NxI32 _z){x=_x;y=_y;z=_z;}
-	const NxI32& operator[](NxI32 i) const {return (&x)[i];}
-	NxI32& operator[](NxI32 i) {return (&x)[i];}
-};
-
-
-//-------- 2D --------
-
-class float2  : public Memalloc
-{
-public:
-	NxF32 x,y;
-	float2(){x=0;y=0;};
-	float2(NxF32 _x,NxF32 _y){x=_x;y=_y;}
-	NxF32& operator[](NxI32 i) {assert(i>=0&&i<2);return ((NxF32*)this)[i];}
-	const NxF32& operator[](NxI32 i) const {assert(i>=0&&i<2);return ((NxF32*)this)[i];}
-};
-inline float2 operator-( const float2& a, const float2& b ){return float2(a.x-b.x,a.y-b.y);}
-inline float2 operator+( const float2& a, const float2& b ){return float2(a.x+b.x,a.y+b.y);}
-
-//--------- 3D ---------
-
-class float3  : public Memalloc // 3D
-{
-	public:
-	NxF32 x,y,z;
-	float3(){x=0;y=0;z=0;};
-	float3(NxF32 _x,NxF32 _y,NxF32 _z){x=_x;y=_y;z=_z;};
-	//operator NxF32 *() { return &x;};
-	NxF32& operator[](NxI32 i) {assert(i>=0&&i<3);return ((NxF32*)this)[i];}
-	const NxF32& operator[](NxI32 i) const {assert(i>=0&&i<3);return ((NxF32*)this)[i];}
-};
-
-
-float3& operator+=( float3 &a, const float3& b );
-float3& operator-=( float3 &a ,const float3& b );
-float3& operator*=( float3 &v ,const NxF32 s );
-float3& operator/=( float3 &v, const NxF32 s );
-
-NxF32  magnitude( const float3& v );
-float3 normalize( const float3& v );
-float3 safenormalize(const float3 &v);
-float3 vabs(const float3 &v);
-float3 operator+( const float3& a, const float3& b );
-float3 operator-( const float3& a, const float3& b );
-float3 operator-( const float3& v );
-float3 operator*( const float3& v, const NxF32 s );
-float3 operator*( const NxF32 s, const float3& v );
-float3 operator/( const float3& v, const NxF32 s );
-inline NxI32 operator==( const float3 &a, const float3 &b ) { return (a.x==b.x && a.y==b.y && a.z==b.z); }
-inline NxI32 operator!=( const float3 &a, const float3 &b ) { return (a.x!=b.x || a.y!=b.y || a.z!=b.z); }
-// due to ambiguity and inconsistent standards ther are no overloaded operators for mult such as va*vb.
-NxF32  dot( const float3& a, const float3& b );
-float3 cmul( const float3 &a, const float3 &b);
-float3 cross( const float3& a, const float3& b );
-float3 Interpolate(const float3 &v0,const float3 &v1,NxF32 alpha);
-float3 Round(const float3& a,NxF32 precision);
-float3	VectorMax(const float3 &a, const float3 &b);
-float3	VectorMin(const float3 &a, const float3 &b);
-
-
-
-class float3x3  : public Memalloc
-{
-	public:
-	float3 x,y,z;  // the 3 rows of the Matrix
-	float3x3(){}
-	float3x3(NxF32 xx,NxF32 xy,NxF32 xz,NxF32 yx,NxF32 yy,NxF32 yz,NxF32 zx,NxF32 zy,NxF32 zz):x(xx,xy,xz),y(yx,yy,yz),z(zx,zy,zz){}
-	float3x3(float3 _x,float3 _y,float3 _z):x(_x),y(_y),z(_z){}
-	float3&       operator[](NxI32 i)       {assert(i>=0&&i<3);return (&x)[i];}
-	const float3& operator[](NxI32 i) const {assert(i>=0&&i<3);return (&x)[i];}
-	NxF32&        operator()(NxI32 r, NxI32 c)       {assert(r>=0&&r<3&&c>=0&&c<3);return ((&x)[r])[c];}
-	const NxF32&  operator()(NxI32 r, NxI32 c) const {assert(r>=0&&r<3&&c>=0&&c<3);return ((&x)[r])[c];}
-};
-float3x3 Transpose( const float3x3& m );
-float3   operator*( const float3& v  , const float3x3& m  );
-float3   operator*( const float3x3& m , const float3& v   );
-float3x3 operator*( const float3x3& m , const NxF32& s   );
-float3x3 operator*( const float3x3& ma, const float3x3& mb );
-float3x3 operator/( const float3x3& a, const NxF32& s ) ;
-float3x3 operator+( const float3x3& a, const float3x3& b );
-float3x3 operator-( const float3x3& a, const float3x3& b );
-float3x3 &operator+=( float3x3& a, const float3x3& b );
-float3x3 &operator-=( float3x3& a, const float3x3& b );
-float3x3 &operator*=( float3x3& a, const NxF32& s );
-NxF32    Determinant(const float3x3& m );
-float3x3 Inverse(const float3x3& a);  // its just 3x3 so we simply do that cofactor method
-
-
-//-------- 4D Math --------
-
-class float4  : public Memalloc
-{
-public:
-	NxF32 x,y,z,w;
-	float4(){x=0;y=0;z=0;w=0;};
-	float4(NxF32 _x,NxF32 _y,NxF32 _z,NxF32 _w){x=_x;y=_y;z=_z;w=_w;}
-	float4(const float3 &v,NxF32 _w){x=v.x;y=v.y;z=v.z;w=_w;}
-	//operator NxF32 *() { return &x;};
-	NxF32& operator[](NxI32 i) {assert(i>=0&&i<4);return ((NxF32*)this)[i];}
-	const NxF32& operator[](NxI32 i) const {assert(i>=0&&i<4);return ((NxF32*)this)[i];}
-	const float3& xyz() const { return *((float3*)this);}
-	float3&       xyz()       { return *((float3*)this);}
-};
-
-
-struct D3DXMATRIX;
-
-class float4x4  : public Memalloc
-{
-	public:
-	float4 x,y,z,w;  // the 4 rows
-	float4x4(){}
-	float4x4(const float4 &_x, const float4 &_y, const float4 &_z, const float4 &_w):x(_x),y(_y),z(_z),w(_w){}
-	float4x4(NxF32 m00, NxF32 m01, NxF32 m02, NxF32 m03,
-						NxF32 m10, NxF32 m11, NxF32 m12, NxF32 m13,
-				NxF32 m20, NxF32 m21, NxF32 m22, NxF32 m23,
-				NxF32 m30, NxF32 m31, NxF32 m32, NxF32 m33 )
-			:x(m00,m01,m02,m03),y(m10,m11,m12,m13),z(m20,m21,m22,m23),w(m30,m31,m32,m33){}
-	NxF32&       operator()(NxI32 r, NxI32 c)       {assert(r>=0&&r<4&&c>=0&&c<4);return ((&x)[r])[c];}
-	const NxF32& operator()(NxI32 r, NxI32 c) const {assert(r>=0&&r<4&&c>=0&&c<4);return ((&x)[r])[c];}
-		operator       NxF32* ()       {return &x.x;}
-		operator const NxF32* () const {return &x.x;}
-	operator       struct D3DXMATRIX* ()       { return (struct D3DXMATRIX*) this;}
-	operator const struct D3DXMATRIX* () const { return (struct D3DXMATRIX*) this;}
-};
-
-
-NxI32     operator==( const float4 &a, const float4 &b );
-float4 Homogenize(const float3 &v3,const NxF32 &w=1.0f); // Turns a 3D float3 4D vector4 by appending w
-float4 cmul( const float4 &a, const float4 &b);
-float4 operator*( const float4 &v, NxF32 s);
-float4 operator*( NxF32 s, const float4 &v);
-float4 operator+( const float4 &a, const float4 &b);
-float4 operator-( const float4 &a, const float4 &b);
-float4x4 operator*( const float4x4& a, const float4x4& b );
-float4 operator*( const float4& v, const float4x4& m );
-float4x4 Inverse(const float4x4 &m);
-float4x4 MatrixRigidInverse(const float4x4 &m);
-float4x4 MatrixTranspose(const float4x4 &m);
-float4x4 MatrixPerspectiveFov(NxF32 fovy, NxF32 Aspect, NxF32 zn, NxF32 zf );
-float4x4 MatrixTranslation(const float3 &t);
-float4x4 MatrixRotationZ(const NxF32 angle_radians);
-float4x4 MatrixLookAt(const float3& eye, const float3& at, const float3& up);
-NxI32     operator==( const float4x4 &a, const float4x4 &b );
-
-
-//-------- Quaternion ------------
-
-class Quaternion :public float4
-{
- public:
-	Quaternion() { x = y = z = 0.0f; w = 1.0f; }
-	Quaternion( float3 v, NxF32 t ) { v = normalize(v); w = cosf(t/2.0f); v = v*sinf(t/2.0f); x = v.x; y = v.y; z = v.z; }
-	Quaternion(NxF32 _x, NxF32 _y, NxF32 _z, NxF32 _w){x=_x;y=_y;z=_z;w=_w;}
-	NxF32 angle() const { return acosf(w)*2.0f; }
-	float3 axis() const { float3 a(x,y,z); if(fabsf(angle())<0.0000001f) return float3(1,0,0); return a*(1/sinf(angle()/2.0f)); }
-	float3 xdir() const { return float3( 1-2*(y*y+z*z),  2*(x*y+w*z),  2*(x*z-w*y) ); }
-	float3 ydir() const { return float3(   2*(x*y-w*z),1-2*(x*x+z*z),  2*(y*z+w*x) ); }
-	float3 zdir() const { return float3(   2*(x*z+w*y),  2*(y*z-w*x),1-2*(x*x+y*y) ); }
-	float3x3 getmatrix() const { return float3x3( xdir(), ydir(), zdir() ); }
-	operator float3x3() { return getmatrix(); }
-	void Normalize();
-};
-
-Quaternion& operator*=(Quaternion& a, NxF32 s );
-Quaternion	operator*( const Quaternion& a, NxF32 s );
-Quaternion	operator*( const Quaternion& a, const Quaternion& b);
-Quaternion	operator+( const Quaternion& a, const Quaternion& b );
-Quaternion	normalize( Quaternion a );
-NxF32		dot( const Quaternion &a, const Quaternion &b );
-float3		operator*( const Quaternion& q, const float3& v );
-float3		operator*( const float3& v, const Quaternion& q );
-Quaternion	slerp( Quaternion a, const Quaternion& b, NxF32 interp );
-Quaternion  Interpolate(const Quaternion &q0,const Quaternion &q1,NxF32 alpha);
-Quaternion  RotationArc(float3 v0, float3 v1 );  // returns quat q where q*v0=v1
-Quaternion  Inverse(const Quaternion &q);
-float4x4     MatrixFromQuatVec(const Quaternion &q, const float3 &v);
-
-
-//------ Euler Angle -----
-
-Quaternion YawPitchRoll( NxF32 yaw, NxF32 pitch, NxF32 roll );
-NxF32 Yaw( const Quaternion& q );
-NxF32 Pitch( const Quaternion& q );
-NxF32 Roll( Quaternion q );
-NxF32 Yaw( const float3& v );
-NxF32 Pitch( const float3& v );
-
-
-//------- Plane ----------
-
-class Plane
-{
-	public:
-	float3	normal;
-	NxF32	dist;   // distance below origin - the D from plane equasion Ax+By+Cz+D=0
-			Plane(const float3 &n,NxF32 d):normal(n),dist(d){}
-			Plane():normal(),dist(0){}
-	void	Transform(const float3 &position, const Quaternion &orientation);
-};
-
-inline Plane PlaneFlip(const Plane &plane){return Plane(-plane.normal,-plane.dist);}
-inline NxI32 operator==( const Plane &a, const Plane &b ) { return (a.normal==b.normal && a.dist==b.dist); }
-inline NxI32 coplanar( const Plane &a, const Plane &b ) { return (a==b || a==PlaneFlip(b)); }
-
-
-//--------- Utility Functions ------
-
-float3  PlaneLineIntersection(const Plane &plane, const float3 &p0, const float3 &p1);
-float3  PlaneProject(const Plane &plane, const float3 &point);
-float3  LineProject(const float3 &p0, const float3 &p1, const float3 &a);  // projects a onto infinite line p0p1
-NxF32   LineProjectTime(const float3 &p0, const float3 &p1, const float3 &a);
-float3  ThreePlaneIntersection(const Plane &p0,const Plane &p1, const Plane &p2);
-NxI32     PolyHit(const float3 *vert,const NxI32 n,const float3 &v0, const float3 &v1, float3 *impact=NULL, float3 *normal=NULL);
-NxI32     BoxInside(const float3 &p,const float3 &bmin, const float3 &bmax) ;
-NxI32     BoxIntersect(const float3 &v0, const float3 &v1, const float3 &bmin, const float3 &bmax, float3 *impact);
-NxF32   DistanceBetweenLines(const float3 &ustart, const float3 &udir, const float3 &vstart, const float3 &vdir, float3 *upoint=NULL, float3 *vpoint=NULL);
-float3  TriNormal(const float3 &v0, const float3 &v1, const float3 &v2);
-float3  NormalOf(const float3 *vert, const NxI32 n);
-Quaternion VirtualTrackBall(const float3 &cop, const float3 &cor, const float3 &dir0, const float3 &dir1);
-
-
-
-
-//*****************************************************
-// ** VECMATH.CPP
-//*****************************************************
-
-
-NxF32   sqr(NxF32 a) {return a*a;}
-NxF32   clampf(NxF32 a) {return Min(1.0f,Max(0.0f,a));}
-
-
-NxF32 Round(NxF32 a,NxF32 precision)
-{
-	return floorf(0.5f+a/precision)*precision;
-}
-
-
-NxF32 Interpolate(const NxF32 &f0,const NxF32 &f1,NxF32 alpha)
-{
-	return f0*(1-alpha) + f1*alpha;
-}
-
-
-NxI32     argmin(NxF32 a[],NxI32 n)
-{
-	NxI32 r=0;
-	for(NxI32 i=1;i<n;i++)
-		{
-		if(a[i]<a[r])
-				{
-			r = i;
-		}
-	}
-	return r;
-}
-
-
-
-//------------ float3 (3D) --------------
-
-
-
-float3 operator+( const float3& a, const float3& b )
-{
-	return float3(a.x+b.x, a.y+b.y, a.z+b.z);
-}
-
-
-float3 operator-( const float3& a, const float3& b )
-{
-	return float3( a.x-b.x, a.y-b.y, a.z-b.z );
-}
-
-
-float3 operator-( const float3& v )
-{
-	return float3( -v.x, -v.y, -v.z );
-}
-
-
-float3 operator*( const float3& v, NxF32 s )
-{
-	return float3( v.x*s, v.y*s, v.z*s );
-}
-
-
-float3 operator*( NxF32 s, const float3& v )
-{
-	return float3( v.x*s, v.y*s, v.z*s );
-}
-
-
-float3 operator/( const float3& v, NxF32 s )
-{
-	return v*(1.0f/s);
-}
-
-NxF32  dot( const float3& a, const float3& b )
-{
-	return a.x*b.x + a.y*b.y + a.z*b.z;
-}
-
-float3 cmul( const float3 &v1, const float3 &v2)
-{
-	return float3(v1.x*v2.x, v1.y*v2.y, v1.z*v2.z);
-}
-
-
-float3 cross( const float3& a, const float3& b )
-{
-		return float3( a.y*b.z - a.z*b.y,
-									 a.z*b.x - a.x*b.z,
-									 a.x*b.y - a.y*b.x );
-}
-
-
-
-
-float3& operator+=( float3& a , const float3& b )
-{
-		a.x += b.x;
-		a.y += b.y;
-		a.z += b.z;
-		return a;
-}
-
-
-float3& operator-=( float3& a , const float3& b )
-{
-		a.x -= b.x;
-		a.y -= b.y;
-		a.z -= b.z;
-		return a;
-}
-
-
-float3& operator*=(float3& v , NxF32 s )
-{
-		v.x *= s;
-		v.y *= s;
-		v.z *= s;
-		return v;
-}
-
-
-float3& operator/=(float3& v , NxF32 s )
-{
-		NxF32 sinv = 1.0f / s;
-		v.x *= sinv;
-		v.y *= sinv;
-		v.z *= sinv;
-		return v;
-}
-
-float3 vabs(const float3 &v)
-{
-	return float3(fabsf(v.x),fabsf(v.y),fabsf(v.z));
-}
-
-
-NxF32 magnitude( const float3& v )
-{
-		return sqrtf(sqr(v.x) + sqr( v.y)+ sqr(v.z));
-}
-
-
-
-float3 normalize( const float3 &v )
-{
-	// this routine, normalize, is ok, provided magnitude works!!
-		NxF32 d=magnitude(v);
-		if (d==0)
-		{
-		printf("Cant normalize ZERO vector\n");
-		assert(0);// yes this could go here
-		d=0.1f;
-	}
-	d = 1/d;
-	return float3(v.x*d,v.y*d,v.z*d);
-}
-
-float3 safenormalize(const float3 &v)
-{
-	if(magnitude(v)<=0.0f)
-	{
-		return float3(1,0,0);
-	}
-	return normalize(v);
-}
-
-float3 Round(const float3 &a,NxF32 precision)
-{
-	return float3(Round(a.x,precision),Round(a.y,precision),Round(a.z,precision));
-}
-
-
-float3 Interpolate(const float3 &v0,const float3 &v1,NxF32 alpha)
-{
-	return v0*(1-alpha) + v1*alpha;
-}
-
-float3 VectorMin(const float3 &a,const float3 &b)
-{
-	return float3(Min(a.x,b.x),Min(a.y,b.y),Min(a.z,b.z));
-}
-float3 VectorMax(const float3 &a,const float3 &b)
-{
-	return float3(Max(a.x,b.x),Max(a.y,b.y),Max(a.z,b.z));
-}
-
-// the statement v1*v2 is ambiguous since there are 3 types
-// of vector multiplication
-//  - componantwise (for example combining colors)
-//  - dot product
-//  - cross product
-// Therefore we never declare/implement this function.
-// So we will never see:  float3 operator*(float3 a,float3 b)
-
-
-
-
-//------------ float3x3 ---------------
-NxF32 Determinant(const float3x3 &m)
-{
-	return  m.x.x*m.y.y*m.z.z + m.y.x*m.z.y*m.x.z + m.z.x*m.x.y*m.y.z
-			 -m.x.x*m.z.y*m.y.z - m.y.x*m.x.y*m.z.z - m.z.x*m.y.y*m.x.z ;
-}
-
-float3x3 Inverse(const float3x3 &a)
-{
-	float3x3 b;
-	NxF32 d=Determinant(a);
-	assert(d!=0);
-	for(NxI32 i=0;i<3;i++)
-		{
-		for(NxI32 j=0;j<3;j++)
-				{
-			NxI32 i1=(i+1)%3;
-			NxI32 i2=(i+2)%3;
-			NxI32 j1=(j+1)%3;
-			NxI32 j2=(j+2)%3;
-			// reverse indexs i&j to take transpose
-			b[j][i] = (a[i1][j1]*a[i2][j2]-a[i1][j2]*a[i2][j1])/d;
-		}
-	}
-	// Matrix check=a*b; // Matrix 'check' should be the identity (or close to it)
-	return b;
-}
-
-
-float3x3 Transpose( const float3x3& m )
-{
-	return float3x3( float3(m.x.x,m.y.x,m.z.x),
-					float3(m.x.y,m.y.y,m.z.y),
-					float3(m.x.z,m.y.z,m.z.z));
-}
-
-
-float3 operator*(const float3& v , const float3x3 &m ) {
-	return float3((m.x.x*v.x + m.y.x*v.y + m.z.x*v.z),
-					(m.x.y*v.x + m.y.y*v.y + m.z.y*v.z),
-					(m.x.z*v.x + m.y.z*v.y + m.z.z*v.z));
-}
-float3 operator*(const float3x3 &m,const float3& v  ) {
-	return float3(dot(m.x,v),dot(m.y,v),dot(m.z,v));
-}
-
-
-float3x3 operator*( const float3x3& a, const float3x3& b )
-{
-	return float3x3(a.x*b,a.y*b,a.z*b);
-}
-
-float3x3 operator*( const float3x3& a, const NxF32& s )
-{
-	return float3x3(a.x*s, a.y*s ,a.z*s);
-}
-float3x3 operator/( const float3x3& a, const NxF32& s )
-{
-	NxF32 t=1/s;
-	return float3x3(a.x*t, a.y*t ,a.z*t);
-}
-float3x3 operator+( const float3x3& a, const float3x3& b )
-{
-	return float3x3(a.x+b.x, a.y+b.y, a.z+b.z);
-}
-float3x3 operator-( const float3x3& a, const float3x3& b )
-{
-	return float3x3(a.x-b.x, a.y-b.y, a.z-b.z);
-}
-float3x3 &operator+=( float3x3& a, const float3x3& b )
-{
-	a.x+=b.x;
-	a.y+=b.y;
-	a.z+=b.z;
-	return a;
-}
-float3x3 &operator-=( float3x3& a, const float3x3& b )
-{
-	a.x-=b.x;
-	a.y-=b.y;
-	a.z-=b.z;
-	return a;
-}
-float3x3 &operator*=( float3x3& a, const NxF32& s )
-{
-	a.x*=s;
-	a.y*=s;
-	a.z*=s;
-	return a;
-}
-
-
-
-float3 ThreePlaneIntersection(const Plane &p0,const Plane &p1, const Plane &p2){
-	float3x3 mp =Transpose(float3x3(p0.normal,p1.normal,p2.normal));
-	float3x3 mi = Inverse(mp);
-	float3 b(p0.dist,p1.dist,p2.dist);
-	return -b * mi;
-}
-
-
-//--------------- 4D ----------------
-
-float4   operator*( const float4&   v, const float4x4& m )
-{
-	return v.x*m.x + v.y*m.y + v.z*m.z + v.w*m.w; // yes this actually works
-}
-
-NxI32 operator==( const float4 &a, const float4 &b )
-{
-	return (a.x==b.x && a.y==b.y && a.z==b.z && a.w==b.w);
-}
-
-
-//  Dont implement m*v for now, since that might confuse us
-//  All our transforms are based on multiplying the "row" vector on the left
-//float4   operator*(const float4x4& m , const float4&   v )
-//{
-//	return float4(dot(v,m.x),dot(v,m.y),dot(v,m.z),dot(v,m.w));
-//}
-
-
-
-float4 cmul( const float4 &a, const float4 &b)
-{
-	return float4(a.x*b.x,a.y*b.y,a.z*b.z,a.w*b.w);
-}
-
-
-float4 operator*( const float4 &v, NxF32 s)
-{
-	return float4(v.x*s,v.y*s,v.z*s,v.w*s);
-}
-
-
-float4 operator*( NxF32 s, const float4 &v)
-{
-	return float4(v.x*s,v.y*s,v.z*s,v.w*s);
-}
-
-
-float4 operator+( const float4 &a, const float4 &b)
-{
-	return float4(a.x+b.x,a.y+b.y,a.z+b.z,a.w+b.w);
-}
-
-
-
-float4 operator-( const float4 &a, const float4 &b)
-{
-	return float4(a.x-b.x,a.y-b.y,a.z-b.z,a.w-b.w);
-}
-
-
-float4 Homogenize(const float3 &v3,const NxF32 &w)
-{
-	return float4(v3.x,v3.y,v3.z,w);
-}
-
-
-
-float4x4 operator*( const float4x4& a, const float4x4& b )
-{
-	return float4x4(a.x*b,a.y*b,a.z*b,a.w*b);
-}
-
-float4x4 MatrixTranspose(const float4x4 &m)
-{
-	return float4x4(
-		m.x.x, m.y.x, m.z.x, m.w.x,
-		m.x.y, m.y.y, m.z.y, m.w.y,
-		m.x.z, m.y.z, m.z.z, m.w.z,
-		m.x.w, m.y.w, m.z.w, m.w.w );
-}
-
-float4x4 MatrixRigidInverse(const float4x4 &m)
-{
-	float4x4 trans_inverse = MatrixTranslation(-m.w.xyz());
-	float4x4 rot   = m;
-	rot.w = float4(0,0,0,1);
-	return trans_inverse * MatrixTranspose(rot);
-}
-
-
-float4x4 MatrixPerspectiveFov(NxF32 fovy, NxF32 aspect, NxF32 zn, NxF32 zf )
-{
-	NxF32 h = 1.0f/tanf(fovy/2.0f); // view space height
-	NxF32 w = h / aspect ;  // view space width
-	return float4x4(
-		w, 0, 0             ,   0,
-		0, h, 0             ,   0,
-		0, 0, zf/(zn-zf)    ,  -1,
-		0, 0, zn*zf/(zn-zf) ,   0 );
-}
-
-
-
-float4x4 MatrixLookAt(const float3& eye, const float3& at, const float3& up)
-{
-	float4x4 m;
-	m.w.w = 1.0f;
-	m.w.xyz() = eye;
-	m.z.xyz() = normalize(eye-at);
-	m.x.xyz() = normalize(cross(up,m.z.xyz()));
-	m.y.xyz() = cross(m.z.xyz(),m.x.xyz());
-	return MatrixRigidInverse(m);
-}
-
-
-float4x4 MatrixTranslation(const float3 &t)
-{
-	return float4x4(
-		1,  0,  0,  0,
-		0,  1,  0,  0,
-		0,  0,  1,  0,
-		t.x,t.y,t.z,1 );
-}
-
-
-float4x4 MatrixRotationZ(const NxF32 angle_radians)
-{
-	NxF32 s =  sinf(angle_radians);
-	NxF32 c =  cosf(angle_radians);
-	return float4x4(
-		c,  s,  0,  0,
-		-s, c,  0,  0,
-		0,  0,  1,  0,
-		0,  0,  0,  1 );
-}
-
-
-
-NxI32 operator==( const float4x4 &a, const float4x4 &b )
-{
-	return (a.x==b.x && a.y==b.y && a.z==b.z && a.w==b.w);
-}
-
-
-float4x4 Inverse(const float4x4 &m)
-{
-	float4x4 d;
-	NxF32 *dst = &d.x.x;
-	NxF32 tmp[12]; /* temp array for pairs */
-	NxF32 src[16]; /* array of transpose source matrix */
-	NxF32 det; /* determinant */
-	/* transpose matrix */
-	for ( NxI32 i = 0; i < 4; i++) {
-		src[i] = m(i,0) ;
-		src[i + 4] = m(i,1);
-		src[i + 8] = m(i,2);
-		src[i + 12] = m(i,3);
-	}
-	/* calculate pairs for first 8 elements (cofactors) */
-	tmp[0]  = src[10] * src[15];
-	tmp[1]  = src[11] * src[14];
-	tmp[2]  = src[9] * src[15];
-	tmp[3]  = src[11] * src[13];
-	tmp[4]  = src[9] * src[14];
-	tmp[5]  = src[10] * src[13];
-	tmp[6]  = src[8] * src[15];
-	tmp[7]  = src[11] * src[12];
-	tmp[8]  = src[8] * src[14];
-	tmp[9]  = src[10] * src[12];
-	tmp[10] = src[8] * src[13];
-	tmp[11] = src[9] * src[12];
-	/* calculate first 8 elements (cofactors) */
-	dst[0]  = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];
-	dst[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];
-	dst[1]  = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];
-	dst[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];
-	dst[2]  = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];
-	dst[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];
-	dst[3]  = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];
-	dst[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];
-	dst[4]  = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];
-	dst[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];
-	dst[5]  = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];
-	dst[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];
-	dst[6]  = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];
-	dst[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];
-	dst[7]  = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];
-	dst[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];
-	/* calculate pairs for second 8 elements (cofactors) */
-	tmp[0]  = src[2]*src[7];
-	tmp[1]  = src[3]*src[6];
-	tmp[2]  = src[1]*src[7];
-	tmp[3]  = src[3]*src[5];
-	tmp[4]  = src[1]*src[6];
-	tmp[5]  = src[2]*src[5];
-	tmp[6]  = src[0]*src[7];
-	tmp[7]  = src[3]*src[4];
-	tmp[8]  = src[0]*src[6];
-	tmp[9]  = src[2]*src[4];
-	tmp[10] = src[0]*src[5];
-	tmp[11] = src[1]*src[4];
-	/* calculate second 8 elements (cofactors) */
-	dst[8]  = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];
-	dst[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];
-	dst[9]  = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];
-	dst[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];
-	dst[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];
-	dst[10]-= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];
-	dst[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];
-	dst[11]-= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];
-	dst[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];
-	dst[12]-= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];
-	dst[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];
-	dst[13]-= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];
-	dst[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];
-	dst[14]-= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];
-	dst[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];
-	dst[15]-= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];
-	/* calculate determinant */
-	det=src[0]*dst[0]+src[1]*dst[1]+src[2]*dst[2]+src[3]*dst[3];
-	/* calculate matrix inverse */
-	det = 1/det;
-	for ( NxI32 j = 0; j < 16; j++)
-	dst[j] *= det;
-	return d;
-}
-
-
-//--------- Quaternion --------------
-
-Quaternion operator*( const Quaternion& a, const Quaternion& b )
-{
-	Quaternion c;
-	c.w = a.w*b.w - a.x*b.x - a.y*b.y - a.z*b.z;
-	c.x = a.w*b.x + a.x*b.w + a.y*b.z - a.z*b.y;
-	c.y = a.w*b.y - a.x*b.z + a.y*b.w + a.z*b.x;
-	c.z = a.w*b.z + a.x*b.y - a.y*b.x + a.z*b.w;
-	return c;
-}
-
-
-Quaternion operator*( const Quaternion& a, NxF32 b )
-{
-	return Quaternion(a.x*b, a.y*b, a.z*b ,a.w*b);
-}
-
-Quaternion  Inverse(const Quaternion &q)
-{
-	return Quaternion(-q.x,-q.y,-q.z,q.w);
-}
-
-Quaternion& operator*=( Quaternion& q, const NxF32 s )
-{
-		q.x *= s;
-		q.y *= s;
-		q.z *= s;
-		q.w *= s;
-		return q;
-}
-void Quaternion::Normalize()
-{
-	NxF32 m = sqrtf(sqr(w)+sqr(x)+sqr(y)+sqr(z));
-	if(m<0.000000001f) {
-		w=1.0f;
-		x=y=z=0.0f;
-		return;
-	}
-	(*this) *= (1.0f/m);
-}
-
-float3 operator*( const Quaternion& q, const float3& v )
-{
-	// The following is equivalent to:
-	//return (q.getmatrix() * v);
-	NxF32 qx2 = q.x*q.x;
-	NxF32 qy2 = q.y*q.y;
-	NxF32 qz2 = q.z*q.z;
-
-	NxF32 qxqy = q.x*q.y;
-	NxF32 qxqz = q.x*q.z;
-	NxF32 qxqw = q.x*q.w;
-	NxF32 qyqz = q.y*q.z;
-	NxF32 qyqw = q.y*q.w;
-	NxF32 qzqw = q.z*q.w;
-	return float3(
-		(1-2*(qy2+qz2))*v.x + (2*(qxqy-qzqw))*v.y + (2*(qxqz+qyqw))*v.z ,
-		(2*(qxqy+qzqw))*v.x + (1-2*(qx2+qz2))*v.y + (2*(qyqz-qxqw))*v.z ,
-		(2*(qxqz-qyqw))*v.x + (2*(qyqz+qxqw))*v.y + (1-2*(qx2+qy2))*v.z  );
-}
-
-Quaternion operator+( const Quaternion& a, const Quaternion& b )
-{
-	return Quaternion(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
-}
-
-NxF32 dot( const Quaternion &a,const Quaternion &b )
-{
-	return  (a.w*b.w + a.x*b.x + a.y*b.y + a.z*b.z);
-}
-
-Quaternion normalize( Quaternion a )
-{
-	NxF32 m = sqrtf(sqr(a.w)+sqr(a.x)+sqr(a.y)+sqr(a.z));
-	if(m<0.000000001)
-		{
-		a.w=1;
-		a.x=a.y=a.z=0;
-		return a;
-	}
-	return a * (1/m);
-}
-
-Quaternion slerp( Quaternion a, const Quaternion& b, NxF32 interp )
-{
-	if(dot(a,b) <0.0)
-		{
-		a.w=-a.w;
-		a.x=-a.x;
-		a.y=-a.y;
-		a.z=-a.z;
-	}
-	NxF32 d = dot(a,b);
-	if(d>=1.0) {
-		return a;
-	}
-	NxF32 theta = acosf(d);
-	if(theta==0.0f) { return(a);}
-	return a*(sinf(theta-interp*theta)/sinf(theta)) + b*(sinf(interp*theta)/sinf(theta));
-}
-
-
-Quaternion Interpolate(const Quaternion &q0,const Quaternion &q1,NxF32 alpha) {
-	return slerp(q0,q1,alpha);
-}
-
-
-Quaternion YawPitchRoll( NxF32 yaw, NxF32 pitch, NxF32 roll )
-{
-	roll  *= DEG2RAD;
-	yaw   *= DEG2RAD;
-	pitch *= DEG2RAD;
-	return Quaternion(float3(0.0f,0.0f,1.0f),yaw)*Quaternion(float3(1.0f,0.0f,0.0f),pitch)*Quaternion(float3(0.0f,1.0f,0.0f),roll);
-}
-
-NxF32 Yaw( const Quaternion& q )
-{
-	static float3 v;
-	v=q.ydir();
-	return (v.y==0.0&&v.x==0.0) ? 0.0f: atan2f(-v.x,v.y)*RAD2DEG;
-}
-
-NxF32 Pitch( const Quaternion& q )
-{
-	static float3 v;
-	v=q.ydir();
-	return atan2f(v.z,sqrtf(sqr(v.x)+sqr(v.y)))*RAD2DEG;
-}
-
-NxF32 Roll( Quaternion q )
-{
-	q = Quaternion(float3(0.0f,0.0f,1.0f),-Yaw(q)*DEG2RAD)  *q;
-	q = Quaternion(float3(1.0f,0.0f,0.0f),-Pitch(q)*DEG2RAD)  *q;
-	return atan2f(-q.xdir().z,q.xdir().x)*RAD2DEG;
-}
-
-NxF32 Yaw( const float3& v )
-{
-	return (v.y==0.0&&v.x==0.0) ? 0.0f: atan2f(-v.x,v.y)*RAD2DEG;
-}
-
-NxF32 Pitch( const float3& v )
-{
-	return atan2f(v.z,sqrtf(sqr(v.x)+sqr(v.y)))*RAD2DEG;
-}
-
-
-//------------- Plane --------------
-
-
-void Plane::Transform(const float3 &position, const Quaternion &orientation) {
-	//   Transforms the plane to the space defined by the
-	//   given position/orientation.
-	static float3 newnormal;
-	static float3 origin;
-
-	newnormal = Inverse(orientation)*normal;
-	origin = Inverse(orientation)*(-normal*dist - position);
-
-	normal = newnormal;
-	dist = -dot(newnormal, origin);
-}
-
-
-
-
-//--------- utility functions -------------
-
-//        RotationArc()
-// Given two vectors v0 and v1 this function
-// returns quaternion q where q*v0==v1.
-// Routine taken from game programming gems.
-Quaternion RotationArc(float3 v0,float3 v1){
-	static Quaternion q;
-	v0 = normalize(v0);  // Comment these two lines out if you know its not needed.
-	v1 = normalize(v1);  // If vector is already unit length then why do it again?
-	float3  c = cross(v0,v1);
-	NxF32   d = dot(v0,v1);
-	if(d<=-1.0f) { return Quaternion(1,0,0,0);} // 180 about x axis
-	NxF32   s = sqrtf((1+d)*2);
-	q.x = c.x / s;
-	q.y = c.y / s;
-	q.z = c.z / s;
-	q.w = s /2.0f;
-	return q;
-}
-
-
-float4x4 MatrixFromQuatVec(const Quaternion &q, const float3 &v)
-{
-	// builds a 4x4 transformation matrix based on orientation q and translation v
-	NxF32 qx2 = q.x*q.x;
-	NxF32 qy2 = q.y*q.y;
-	NxF32 qz2 = q.z*q.z;
-
-	NxF32 qxqy = q.x*q.y;
-	NxF32 qxqz = q.x*q.z;
-	NxF32 qxqw = q.x*q.w;
-	NxF32 qyqz = q.y*q.z;
-	NxF32 qyqw = q.y*q.w;
-	NxF32 qzqw = q.z*q.w;
-
-	return float4x4(
-		1-2*(qy2+qz2),
-		2*(qxqy+qzqw),
-		2*(qxqz-qyqw),
-		0            ,
-		2*(qxqy-qzqw),
-		1-2*(qx2+qz2),
-		2*(qyqz+qxqw),
-		0            ,
-		2*(qxqz+qyqw),
-		2*(qyqz-qxqw),
-		1-2*(qx2+qy2),
-		0    ,
-		 v.x ,
-		 v.y ,
-		 v.z ,
-		 1.0f );
-}
-
-
-float3 PlaneLineIntersection(const Plane &plane, const float3 &p0, const float3 &p1)
-{
-	// returns the point where the line p0-p1 intersects the plane n&d
-				static float3 dif;
-		dif = p1-p0;
-				NxF32 dn= dot(plane.normal,dif);
-				NxF32 t = -(plane.dist+dot(plane.normal,p0) )/dn;
-				return p0 + (dif*t);
-}
-
-float3 PlaneProject(const Plane &plane, const float3 &point)
-{
-	return point - plane.normal * (dot(point,plane.normal)+plane.dist);
-}
-
-float3 LineProject(const float3 &p0, const float3 &p1, const float3 &a)
-{
-	float3 w;
-	w = p1-p0;
-	NxF32 t= dot(w,(a-p0)) / (sqr(w.x)+sqr(w.y)+sqr(w.z));
-	return p0+ w*t;
-}
-
-
-NxF32 LineProjectTime(const float3 &p0, const float3 &p1, const float3 &a)
-{
-	float3 w;
-	w = p1-p0;
-	NxF32 t= dot(w,(a-p0)) / (sqr(w.x)+sqr(w.y)+sqr(w.z));
-	return t;
-}
-
-
-
-float3 TriNormal(const float3 &v0, const float3 &v1, const float3 &v2)
-{
-	// return the normal of the triangle
-	// inscribed by v0, v1, and v2
-	float3 cp=cross(v1-v0,v2-v1);
-	NxF32 m=magnitude(cp);
-	if(m==0) return float3(1,0,0);
-	return cp*(1.0f/m);
-}
-
-
-
-NxI32 BoxInside(const float3 &p, const float3 &bmin, const float3 &bmax)
-{
-	return (p.x >= bmin.x && p.x <=bmax.x &&
-			p.y >= bmin.y && p.y <=bmax.y &&
-			p.z >= bmin.z && p.z <=bmax.z );
-}
-
-
-NxI32 BoxIntersect(const float3 &v0, const float3 &v1, const float3 &bmin, const float3 &bmax,float3 *impact)
-{
-	if(BoxInside(v0,bmin,bmax))
-		{
-				*impact=v0;
-				return 1;
-		}
-	if(v0.x<=bmin.x && v1.x>=bmin.x)
-		{
-		NxF32 a = (bmin.x-v0.x)/(v1.x-v0.x);
-		//v.x = bmin.x;
-		NxF32 vy =  (1-a) *v0.y + a*v1.y;
-		NxF32 vz =  (1-a) *v0.z + a*v1.z;
-		if(vy>=bmin.y && vy<=bmax.y && vz>=bmin.z && vz<=bmax.z)
-				{
-			impact->x = bmin.x;
-			impact->y = vy;
-			impact->z = vz;
-			return 1;
-		}
-	}
-	else if(v0.x >= bmax.x  &&  v1.x <= bmax.x)
-		{
-		NxF32 a = (bmax.x-v0.x)/(v1.x-v0.x);
-		//v.x = bmax.x;
-		NxF32 vy =  (1-a) *v0.y + a*v1.y;
-		NxF32 vz =  (1-a) *v0.z + a*v1.z;
-		if(vy>=bmin.y && vy<=bmax.y && vz>=bmin.z && vz<=bmax.z)
-				{
-			impact->x = bmax.x;
-			impact->y = vy;
-			impact->z = vz;
-			return 1;
-		}
-	}
-	if(v0.y<=bmin.y && v1.y>=bmin.y)
-		{
-		NxF32 a = (bmin.y-v0.y)/(v1.y-v0.y);
-		NxF32 vx =  (1-a) *v0.x + a*v1.x;
-		//v.y = bmin.y;
-		NxF32 vz =  (1-a) *v0.z + a*v1.z;
-		if(vx>=bmin.x && vx<=bmax.x && vz>=bmin.z && vz<=bmax.z)
-				{
-			impact->x = vx;
-			impact->y = bmin.y;
-			impact->z = vz;
-			return 1;
-		}
-	}
-	else if(v0.y >= bmax.y  &&  v1.y <= bmax.y)
-		{
-		NxF32 a = (bmax.y-v0.y)/(v1.y-v0.y);
-		NxF32 vx =  (1-a) *v0.x + a*v1.x;
-		// vy = bmax.y;
-		NxF32 vz =  (1-a) *v0.z + a*v1.z;
-		if(vx>=bmin.x && vx<=bmax.x && vz>=bmin.z && vz<=bmax.z)
-				{
-			impact->x = vx;
-			impact->y = bmax.y;
-			impact->z = vz;
-			return 1;
-		}
-	}
-	if(v0.z<=bmin.z && v1.z>=bmin.z)
-		{
-		NxF32 a = (bmin.z-v0.z)/(v1.z-v0.z);
-		NxF32 vx =  (1-a) *v0.x + a*v1.x;
-		NxF32 vy =  (1-a) *v0.y + a*v1.y;
-		// v.z = bmin.z;
-		if(vy>=bmin.y && vy<=bmax.y && vx>=bmin.x && vx<=bmax.x)
-				{
-			impact->x = vx;
-			impact->y = vy;
-			impact->z = bmin.z;
-			return 1;
-		}
-	}
-	else if(v0.z >= bmax.z  &&  v1.z <= bmax.z)
-		{
-		NxF32 a = (bmax.z-v0.z)/(v1.z-v0.z);
-		NxF32 vx =  (1-a) *v0.x + a*v1.x;
-		NxF32 vy =  (1-a) *v0.y + a*v1.y;
-		// v.z = bmax.z;
-		if(vy>=bmin.y && vy<=bmax.y && vx>=bmin.x && vx<=bmax.x)
-				{
-			impact->x = vx;
-			impact->y = vy;
-			impact->z = bmax.z;
-			return 1;
-		}
-	}
-	return 0;
-}
-
-
-NxF32 DistanceBetweenLines(const float3 &ustart, const float3 &udir, const float3 &vstart, const float3 &vdir, float3 *upoint, float3 *vpoint)
-{
-	static float3 cp;
-	cp = normalize(cross(udir,vdir));
-
-	NxF32 distu = -dot(cp,ustart);
-	NxF32 distv = -dot(cp,vstart);
-	NxF32 dist = (NxF32)fabs(distu-distv);
-	if(upoint)
-		{
-		Plane plane;
-		plane.normal = normalize(cross(vdir,cp));
-		plane.dist = -dot(plane.normal,vstart);
-		*upoint = PlaneLineIntersection(plane,ustart,ustart+udir);
-	}
-	if(vpoint)
-		{
-		Plane plane;
-		plane.normal = normalize(cross(udir,cp));
-		plane.dist = -dot(plane.normal,ustart);
-		*vpoint = PlaneLineIntersection(plane,vstart,vstart+vdir);
-	}
-	return dist;
-}
-
-
-Quaternion VirtualTrackBall(const float3 &cop, const float3 &cor, const float3 &dir1, const float3 &dir2)
-{
-	// routine taken from game programming gems.
-	// Implement track ball functionality to spin stuf on the screen
-	//  cop   center of projection
-	//  cor   center of rotation
-	//  dir1  old mouse direction
-	//  dir2  new mouse direction
-	// pretend there is a sphere around cor.  Then find the points
-	// where dir1 and dir2 intersect that sphere.  Find the
-	// rotation that takes the first point to the second.
-	NxF32 m;
-	// compute plane
-	float3 nrml = cor - cop;
-	NxF32 fudgefactor = 1.0f/(magnitude(nrml) * 0.25f); // since trackball proportional to distance from cop
-	nrml = normalize(nrml);
-	NxF32 dist = -dot(nrml,cor);
-	float3 u= PlaneLineIntersection(Plane(nrml,dist),cop,cop+dir1);
-	u=u-cor;
-	u=u*fudgefactor;
-	m= magnitude(u);
-	if(m>1)
-		{
-				u/=m;
-		}
-	else
-		{
-		u=u - (nrml * sqrtf(1-m*m));
-	}
-	float3 v= PlaneLineIntersection(Plane(nrml,dist),cop,cop+dir2);
-	v=v-cor;
-	v=v*fudgefactor;
-	m= magnitude(v);
-	if(m>1)
-		{
-				v/=m;
-		}
-	else
-		{
-		v=v - (nrml * sqrtf(1-m*m));
-	}
-	return RotationArc(u,v);
-}
-
-
-NxI32 countpolyhit=0;
-NxI32 PolyHit(const float3 *vert, const NxI32 n, const float3 &v0, const float3 &v1, float3 *impact, float3 *normal)
-{
-	countpolyhit++;
-	NxI32 i;
-	float3 nrml(0,0,0);
-	for(i=0;i<n;i++)
-		{
-		NxI32 i1=(i+1)%n;
-		NxI32 i2=(i+2)%n;
-		nrml = nrml + cross(vert[i1]-vert[i],vert[i2]-vert[i1]);
-	}
-
-	NxF32 m = magnitude(nrml);
-	if(m==0.0)
-		{
-				return 0;
-		}
-	nrml = nrml * (1.0f/m);
-	NxF32 dist = -dot(nrml,vert[0]);
-	NxF32 d0,d1;
-	if((d0=dot(v0,nrml)+dist) <0  ||  (d1=dot(v1,nrml)+dist) >0)
-		{
-				return 0;
-		}
-
-	static float3 the_point;
-	// By using the cached plane distances d0 and d1
-	// we can optimize the following:
-	//     the_point = planelineintersection(nrml,dist,v0,v1);
-	NxF32 a = d0/(d0-d1);
-	the_point = v0*(1-a) + v1*a;
-
-
-	NxI32 inside=1;
-	for(NxI32 j=0;inside && j<n;j++)
-		{
-			// let inside = 0 if outside
-			float3 pp1,pp2,side;
-			pp1 = vert[j] ;
-			pp2 = vert[(j+1)%n];
-			side = cross((pp2-pp1),(the_point-pp1));
-			inside = (dot(nrml,side) >= 0.0);
-	}
-	if(inside)
-		{
-		if(normal){*normal=nrml;}
-		if(impact){*impact=the_point;}
-	}
-	return inside;
-}
-
-//****************************************************
-// HULL.H source code goes here
-//****************************************************
-class PHullResult
-{
-public:
-
-	PHullResult(void)
-	{
-		mVcount = 0;
-		mIndexCount = 0;
-		mFaceCount = 0;
-		mVertices = 0;
-		mIndices  = 0;
-	}
-
-	NxU32 mVcount;
-	NxU32 mIndexCount;
-	NxU32 mFaceCount;
-	NxF32       *mVertices;
-	NxU32 *mIndices;
-};
-
-bool ComputeHull(NxU32 vcount,const NxF32 *vertices,PHullResult &result,NxU32 maxverts,NxF32 inflate);
-void ReleaseHull(PHullResult &result);
-
-//*****************************************************
-// HULL.cpp source code goes here
-//*****************************************************
-
-
-#define REAL3 float3
-#define REAL  NxF32
-
-#define COPLANAR   (0)
-#define UNDER      (1)
-#define OVER       (2)
-#define SPLIT      (OVER|UNDER)
-#define PAPERWIDTH (0.001f)
-#define VOLUME_EPSILON (1e-20f)
-
-NxF32 planetestepsilon = PAPERWIDTH;
-
-class ConvexH : public Memalloc
-{
-  public:
-	class HalfEdge
-	{
-	  public:
-		short ea;         // the other half of the edge (index into edges list)
-		NxU8 v;  // the vertex at the start of this edge (index into vertices list)
-		NxU8 p;  // the facet on which this edge lies (index into facets list)
-		HalfEdge(){}
-		HalfEdge(short _ea,NxU8 _v, NxU8 _p):ea(_ea),v(_v),p(_p){}
-	};
-	Array<REAL3> vertices;
-	Array<HalfEdge> edges;
-	Array<Plane>  facets;
-	ConvexH(NxI32 vertices_size,NxI32 edges_size,NxI32 facets_size);
-};
-
-typedef ConvexH::HalfEdge HalfEdge;
-
-ConvexH::ConvexH(NxI32 vertices_size,NxI32 edges_size,NxI32 facets_size)
-	:vertices(vertices_size)
-	,edges(edges_size)
-	,facets(facets_size)
-{
-	vertices.count=vertices_size;
-	edges.count   = edges_size;
-	facets.count  = facets_size;
-}
-
-ConvexH *ConvexHDup(ConvexH *src)
-{
-	ConvexH *dst = MEMALLOC_NEW(ConvexH)(src->vertices.count,src->edges.count,src->facets.count);
-
-	memcpy(dst->vertices.element,src->vertices.element,sizeof(float3)*src->vertices.count);
-	memcpy(dst->edges.element,src->edges.element,sizeof(HalfEdge)*src->edges.count);
-	memcpy(dst->facets.element,src->facets.element,sizeof(Plane)*src->facets.count);
-	return dst;
-}
-
-
-NxI32 PlaneTest(const Plane &p, const REAL3 &v) {
-	REAL a  = dot(v,p.normal)+p.dist;
-	NxI32   flag = (a>planetestepsilon)?OVER:((a<-planetestepsilon)?UNDER:COPLANAR);
-	return flag;
-}
-
-NxI32 SplitTest(ConvexH &convex,const Plane &plane) {
-	NxI32 flag=0;
-	for(NxI32 i=0;i<convex.vertices.count;i++) {
-		flag |= PlaneTest(plane,convex.vertices[i]);
-	}
-	return flag;
-}
-
-class VertFlag
-{
-public:
-	NxU8 planetest;
-	NxU8 junk;
-	NxU8 undermap;
-	NxU8 overmap;
-};
-class EdgeFlag
-{
-public:
-	NxU8 planetest;
-	NxU8 fixes;
-	short undermap;
-	short overmap;
-};
-class PlaneFlag
-{
-public:
-	NxU8 undermap;
-	NxU8 overmap;
-};
-class Coplanar{
-public:
-	unsigned short ea;
-	NxU8 v0;
-	NxU8 v1;
-};
-
-NxI32 AssertIntact(ConvexH &convex) {
-	NxI32 i;
-	NxI32 estart=0;
-	for(i=0;i<convex.edges.count;i++) {
-		if(convex.edges[estart].p!= convex.edges[i].p) {
-			estart=i;
-		}
-		NxI32 inext = i+1;
-		if(inext>= convex.edges.count || convex.edges[inext].p != convex.edges[i].p) {
-			inext = estart;
-		}
-		assert(convex.edges[inext].p == convex.edges[i].p);
-		NxI32 nb = convex.edges[i].ea;
-		assert(nb!=255);
-		if(nb==255 || nb==-1) return 0;
-		assert(nb!=-1);
-		assert(i== convex.edges[nb].ea);
-	}
-	for(i=0;i<convex.edges.count;i++) {
-		assert(COPLANAR==PlaneTest(convex.facets[convex.edges[i].p],convex.vertices[convex.edges[i].v]));
-		if(COPLANAR!=PlaneTest(convex.facets[convex.edges[i].p],convex.vertices[convex.edges[i].v])) return 0;
-		if(convex.edges[estart].p!= convex.edges[i].p) {
-			estart=i;
-		}
-		NxI32 i1 = i+1;
-		if(i1>= convex.edges.count || convex.edges[i1].p != convex.edges[i].p) {
-			i1 = estart;
-		}
-		NxI32 i2 = i1+1;
-		if(i2>= convex.edges.count || convex.edges[i2].p != convex.edges[i].p) {
-			i2 = estart;
-		}
-		if(i==i2) continue; // i sliced tangent to an edge and created 2 meaningless edges
-		REAL3 localnormal = TriNormal(convex.vertices[convex.edges[i ].v],
-			                           convex.vertices[convex.edges[i1].v],
-			                           convex.vertices[convex.edges[i2].v]);
-		//assert(dot(localnormal,convex.facets[convex.edges[i].p].normal)>0);//Commented out on Stan Melax' advice
-		if(dot(localnormal,convex.facets[convex.edges[i].p].normal)<=0)return 0;
-	}
-	return 1;
-}
-
-ConvexH *ConvexHCrop(ConvexH &convex,const Plane &slice)
-{
-	NxI32 i;
-	NxI32 vertcountunder=0;
-	NxI32 vertcountover =0;
-	static Array<NxI32> vertscoplanar;  // existing vertex members of convex that are coplanar
-	vertscoplanar.count=0;
-	static Array<NxI32> edgesplit;  // existing edges that members of convex that cross the splitplane
-	edgesplit.count=0;
-
-	assert(convex.edges.count<480);
-
-	EdgeFlag  edgeflag[512];
-	VertFlag  vertflag[256];
-	PlaneFlag planeflag[128];
-	HalfEdge  tmpunderedges[512];
-	Plane	  tmpunderplanes[128];
-	Coplanar coplanaredges[512];
-	NxI32 coplanaredges_num=0;
-
-	Array<REAL3> createdverts;
-	// do the side-of-plane tests
-	for(i=0;i<convex.vertices.count;i++) {
-		vertflag[i].planetest = (NxU8)PlaneTest(slice,convex.vertices[i]);
-		if(vertflag[i].planetest == COPLANAR) {
-			// ? vertscoplanar.Add(i);
-			vertflag[i].undermap = (NxU8)vertcountunder++;
-			vertflag[i].overmap  = (NxU8)vertcountover++;
-		}
-		else if(vertflag[i].planetest == UNDER)	{
-			vertflag[i].undermap = (NxU8)vertcountunder++;
-		}
-		else {
-			assert(vertflag[i].planetest == OVER);
-			vertflag[i].overmap  = (NxU8)vertcountover++;
-			vertflag[i].undermap = (NxU8)-1; // for debugging purposes
-		}
-	}
-
-	NxI32 under_edge_count =0;
-	NxI32 underplanescount=0;
-	NxI32 e0=0;
-
-	for(NxI32 currentplane=0; currentplane<convex.facets.count; currentplane++) {
-		NxI32 estart =e0;
-		NxI32 enextface=0;
-		NxI32 planeside = 0;
-		NxI32 e1 = e0+1;
-		NxI32 vout=-1;
-		NxI32 vin =-1;
-		NxI32 coplanaredge = -1;
-		do{
-
-			if(e1 >= convex.edges.count || convex.edges[e1].p!=currentplane) {
-				enextface = e1;
-				e1=estart;
-			}
-			HalfEdge &edge0 = convex.edges[e0];
-			HalfEdge &edge1 = convex.edges[e1];
-			HalfEdge &edgea = convex.edges[edge0.ea];
-
-
-			planeside |= vertflag[edge0.v].planetest;
-			//if((vertflag[edge0.v].planetest & vertflag[edge1.v].planetest)  == COPLANAR) {
-			//	assert(ecop==-1);
-			//	ecop=e;
-			//}
-
-
-			if(vertflag[edge0.v].planetest == OVER && vertflag[edge1.v].planetest == OVER){
-				// both endpoints over plane
-				edgeflag[e0].undermap  = -1;
-			}
-			else if((vertflag[edge0.v].planetest | vertflag[edge1.v].planetest)  == UNDER) {
-				// at least one endpoint under, the other coplanar or under
-
-				edgeflag[e0].undermap = (short)under_edge_count;
-				tmpunderedges[under_edge_count].v = (NxU8)vertflag[edge0.v].undermap;
-				tmpunderedges[under_edge_count].p = (NxU8)underplanescount;
-				if(edge0.ea < e0) {
-					// connect the neighbors
-					assert(edgeflag[edge0.ea].undermap !=-1);
-					tmpunderedges[under_edge_count].ea = edgeflag[edge0.ea].undermap;
-					tmpunderedges[edgeflag[edge0.ea].undermap].ea = (short)under_edge_count;
-				}
-				under_edge_count++;
-			}
-			else if((vertflag[edge0.v].planetest | vertflag[edge1.v].planetest)  == COPLANAR) {
-				// both endpoints coplanar
-				// must check a 3rd point to see if UNDER
-				NxI32 e2 = e1+1;
-				if(e2>=convex.edges.count || convex.edges[e2].p!=currentplane) {
-					e2 = estart;
-				}
-				assert(convex.edges[e2].p==currentplane);
-				HalfEdge &edge2 = convex.edges[e2];
-				if(vertflag[edge2.v].planetest==UNDER) {
-
-					edgeflag[e0].undermap = (short)under_edge_count;
-					tmpunderedges[under_edge_count].v = (NxU8)vertflag[edge0.v].undermap;
-					tmpunderedges[under_edge_count].p = (NxU8)underplanescount;
-					tmpunderedges[under_edge_count].ea = -1;
-					// make sure this edge is added to the "coplanar" list
-					coplanaredge = under_edge_count;
-					vout = vertflag[edge0.v].undermap;
-					vin  = vertflag[edge1.v].undermap;
-					under_edge_count++;
-				}
-				else {
-					edgeflag[e0].undermap = -1;
-				}
-			}
-			else if(vertflag[edge0.v].planetest == UNDER && vertflag[edge1.v].planetest == OVER) {
-				// first is under 2nd is over
-
-				edgeflag[e0].undermap = (short) under_edge_count;
-				tmpunderedges[under_edge_count].v = (NxU8)vertflag[edge0.v].undermap;
-				tmpunderedges[under_edge_count].p = (NxU8)underplanescount;
-				if(edge0.ea < e0) {
-					assert(edgeflag[edge0.ea].undermap !=-1);
-					// connect the neighbors
-					tmpunderedges[under_edge_count].ea = edgeflag[edge0.ea].undermap;
-					tmpunderedges[edgeflag[edge0.ea].undermap].ea = (short)under_edge_count;
-					vout = tmpunderedges[edgeflag[edge0.ea].undermap].v;
-				}
-				else {
-					Plane &p0 = convex.facets[edge0.p];
-					Plane &pa = convex.facets[edgea.p];
-					createdverts.Add(ThreePlaneIntersection(p0,pa,slice));
-					//createdverts.Add(PlaneProject(slice,PlaneLineIntersection(slice,convex.vertices[edge0.v],convex.vertices[edgea.v])));
-					//createdverts.Add(PlaneLineIntersection(slice,convex.vertices[edge0.v],convex.vertices[edgea.v]));
-					vout = vertcountunder++;
-				}
-				under_edge_count++;
-				/// hmmm something to think about: i might be able to output this edge regarless of
-				// wheter or not we know v-in yet.  ok i;ll try this now:
-				tmpunderedges[under_edge_count].v = (NxU8)vout;
-				tmpunderedges[under_edge_count].p = (NxU8)underplanescount;
-				tmpunderedges[under_edge_count].ea = -1;
-				coplanaredge = under_edge_count;
-				under_edge_count++;
-
-				if(vin!=-1) {
-					// we previously processed an edge  where we came under
-					// now we know about vout as well
-
-					// ADD THIS EDGE TO THE LIST OF EDGES THAT NEED NEIGHBOR ON PARTITION PLANE!!
-				}
-
-			}
-			else if(vertflag[edge0.v].planetest == COPLANAR && vertflag[edge1.v].planetest == OVER) {
-				// first is coplanar 2nd is over
-
-				edgeflag[e0].undermap = -1;
-				vout = vertflag[edge0.v].undermap;
-				// I hate this but i have to make sure part of this face is UNDER before ouputting this vert
-				NxI32 k=estart;
-				assert(edge0.p == currentplane);
-				while(!(planeside&UNDER) && k<convex.edges.count && convex.edges[k].p==edge0.p) {
-					planeside |= vertflag[convex.edges[k].v].planetest;
-					k++;
-				}
-				if(planeside&UNDER){
-					tmpunderedges[under_edge_count].v = (NxU8)vout;
-					tmpunderedges[under_edge_count].p = (NxU8)underplanescount;
-					tmpunderedges[under_edge_count].ea = -1;
-					coplanaredge = under_edge_count; // hmmm should make a note of the edge # for later on
-					under_edge_count++;
-
-				}
-			}
-			else if(vertflag[edge0.v].planetest == OVER && vertflag[edge1.v].planetest == UNDER) {
-				// first is over next is under
-				// new vertex!!!
-				if (vin!=-1) return NULL;
-				if(e0<edge0.ea) {
-					Plane &p0 = convex.facets[edge0.p];
-					Plane &pa = convex.facets[edgea.p];
-					createdverts.Add(ThreePlaneIntersection(p0,pa,slice));
-					//createdverts.Add(PlaneLineIntersection(slice,convex.vertices[edge0.v],convex.vertices[edgea.v]));
-					//createdverts.Add(PlaneProject(slice,PlaneLineIntersection(slice,convex.vertices[edge0.v],convex.vertices[edgea.v])));
-					vin = vertcountunder++;
-				}
-				else {
-					// find the new vertex that was created by edge[edge0.ea]
-					NxI32 nea = edgeflag[edge0.ea].undermap;
-					assert(tmpunderedges[nea].p==tmpunderedges[nea+1].p);
-					vin = tmpunderedges[nea+1].v;
-					assert(vin < vertcountunder);
-				}
-				if(vout!=-1) {
-					// we previously processed an edge  where we went over
-					// now we know vin too
-					// ADD THIS EDGE TO THE LIST OF EDGES THAT NEED NEIGHBOR ON PARTITION PLANE!!
-				}
-				// output edge
-				tmpunderedges[under_edge_count].v = (NxU8)vin;
-				tmpunderedges[under_edge_count].p = (NxU8)underplanescount;
-				edgeflag[e0].undermap = (short)under_edge_count;
-				if(e0>edge0.ea) {
-					assert(edgeflag[edge0.ea].undermap !=-1);
-					// connect the neighbors
-					tmpunderedges[under_edge_count].ea = edgeflag[edge0.ea].undermap;
-					tmpunderedges[edgeflag[edge0.ea].undermap].ea = (short)under_edge_count;
-				}
-				assert(edgeflag[e0].undermap == under_edge_count);
-				under_edge_count++;
-			}
-			else if(vertflag[edge0.v].planetest == OVER && vertflag[edge1.v].planetest == COPLANAR) {
-				// first is over next is coplanar
-
-				edgeflag[e0].undermap = -1;
-				vin = vertflag[edge1.v].undermap;
-				if (vin==-1) return NULL;
-				if(vout!=-1) {
-					// we previously processed an edge  where we came under
-					// now we know both endpoints
-					// ADD THIS EDGE TO THE LIST OF EDGES THAT NEED NEIGHBOR ON PARTITION PLANE!!
-				}
-
-			}
-			else {
-				assert(0);
-			}
-
-
-			e0=e1;
-			e1++; // do the modulo at the beginning of the loop
-
-		} while(e0!=estart) ;
-		e0 = enextface;
-		if(planeside&UNDER) {
-			planeflag[currentplane].undermap = (NxU8)underplanescount;
-			tmpunderplanes[underplanescount] = convex.facets[currentplane];
-			underplanescount++;
-		}
-		else {
-			planeflag[currentplane].undermap = 0;
-		}
-		if(vout>=0 && (planeside&UNDER)) {
-			assert(vin>=0);
-			assert(coplanaredge>=0);
-			assert(coplanaredge!=511);
-			coplanaredges[coplanaredges_num].ea = (short)coplanaredge;
-			coplanaredges[coplanaredges_num].v0 = (NxU8)vin;
-			coplanaredges[coplanaredges_num].v1 = (NxU8)vout;
-			coplanaredges_num++;
-		}
-	}
-
-	// add the new plane to the mix:
-	if(coplanaredges_num>0) {
-		tmpunderplanes[underplanescount++]=slice;
-	}
-	for(i=0;i<coplanaredges_num-1;i++) {
-		if(coplanaredges[i].v1 != coplanaredges[i+1].v0) {
-			NxI32 j = 0;
-			for(j=i+2;j<coplanaredges_num;j++) {
-				if(coplanaredges[i].v1 == coplanaredges[j].v0) {
-					Coplanar tmp = coplanaredges[i+1];
-					coplanaredges[i+1] = coplanaredges[j];
-					coplanaredges[j] = tmp;
-					break;
-				}
-			}
-			if(j>=coplanaredges_num)
-			{
-				// assert(j<coplanaredges_num);
-				return NULL;
-			}
-		}
-	}
-	ConvexH *punder = MEMALLOC_NEW(ConvexH)(vertcountunder,under_edge_count+coplanaredges_num,underplanescount);
-
-	ConvexH &under = *punder;
-	NxI32 k=0;
-	for(i=0;i<convex.vertices.count;i++) {
-		if(vertflag[i].planetest != OVER){
-			under.vertices[k++] = convex.vertices[i];
-		}
-	}
-	i=0;
-	while(k<vertcountunder) {
-		under.vertices[k++] = createdverts[i++];
-	}
-	assert(i==createdverts.count);
-
-	for(i=0;i<coplanaredges_num;i++) {
-		under.edges[under_edge_count+i].p  = (NxU8)(underplanescount-1);
-		under.edges[under_edge_count+i].ea = coplanaredges[i].ea;
-		tmpunderedges[coplanaredges[i].ea].ea = (short)(under_edge_count+i);
-		under.edges[under_edge_count+i].v  = coplanaredges[i].v0;
-	}
-
-	memcpy(under.edges.element,tmpunderedges,sizeof(HalfEdge)*under_edge_count);
-	memcpy(under.facets.element,tmpunderplanes,sizeof(Plane)*underplanescount);
-	return punder;
-}
-
-
-NxF32 minadjangle = 3.0f;  // in degrees  - result wont have two adjacent facets within this angle of each other.
-static NxI32 candidateplane(Plane *planes,NxI32 planes_count,ConvexH *convex,NxF32 epsilon)
-{
-	NxI32 p =-1;
-	REAL md=0;
-	NxI32 i,j;
-	NxF32 maxdot_minang = cosf(DEG2RAD*minadjangle);
-	for(i=0;i<planes_count;i++)
-	{
-		NxF32 d=0;
-		NxF32 dmax=0;
-		NxF32 dmin=0;
-		for(j=0;j<convex->vertices.count;j++)
-		{
-			dmax = Max(dmax,dot(convex->vertices[j],planes[i].normal)+planes[i].dist);
-			dmin = Min(dmin,dot(convex->vertices[j],planes[i].normal)+planes[i].dist);
-		}
-		NxF32 dr = dmax-dmin;
-		if(dr<planetestepsilon) dr=1.0f; // shouldn't happen.
-		d = dmax /dr;
-		if(d<=md) continue;
-		for(j=0;j<convex->facets.count;j++)
-		{
-			if(planes[i]==convex->facets[j])
-			{
-				d=0;continue;
-			}
-			if(dot(planes[i].normal,convex->facets[j].normal)>maxdot_minang)
-			{
-				for(NxI32 k=0;k<convex->edges.count;k++)
-				{
-					if(convex->edges[k].p!=j) continue;
-					if(dot(convex->vertices[convex->edges[k].v],planes[i].normal)+planes[i].dist<0)
-					{
-						d=0; // so this plane wont get selected.
-						break;
-					}
-				}
-			}
-		}
-		if(d>md)
-		{
-			p=i;
-			md=d;
-		}
-	}
-	return (md>epsilon)?p:-1;
-}
-
-
-
-template<class T>
-inline NxI32 maxdir(const T *p,NxI32 count,const T &dir)
-{
-	assert(count);
-	NxI32 m=0;
-	for(NxI32 i=1;i<count;i++)
-	{
-		if(dot(p[i],dir)>dot(p[m],dir)) m=i;
-	}
-	return m;
-}
-
-
-template<class T>
-NxI32 maxdirfiltered(const T *p,NxI32 count,const T &dir,Array<NxI32> &allow)
-{
-	assert(count);
-	NxI32 m=-1;
-	for(NxI32 i=0;i<count;i++) if(allow[i])
-	{
-		if(m==-1 || dot(p[i],dir)>dot(p[m],dir)) m=i;
-	}
-	assert(m!=-1);
-	return m;
-}
-
-float3 orth(const float3 &v)
-{
-	float3 a=cross(v,float3(0,0,1));
-	float3 b=cross(v,float3(0,1,0));
-	return normalize((magnitude(a)>magnitude(b))?a:b);
-}
-
-
-template<class T>
-NxI32 maxdirsterid(const T *p,NxI32 count,const T &dir,Array<NxI32> &allow)
-{
-	NxI32 m=-1;
-	while(m==-1)
-	{
-		m = maxdirfiltered(p,count,dir,allow);
-		if(allow[m]==3) return m;
-		T u = orth(dir);
-		T v = cross(u,dir);
-		NxI32 ma=-1;
-		for(NxF32 x = 0.0f ; x<= 360.0f ; x+= 45.0f)
-		{
-			NxF32 s = sinf(DEG2RAD*(x));
-			NxF32 c = cosf(DEG2RAD*(x));
-			NxI32 mb = maxdirfiltered(p,count,dir+(u*s+v*c)*0.025f,allow);
-			if(ma==m && mb==m)
-			{
-				allow[m]=3;
-				return m;
-			}
-			if(ma!=-1 && ma!=mb)  // Yuck - this is really ugly
-			{
-				NxI32 mc = ma;
-				for(NxF32 xx = x-40.0f ; xx <= x ; xx+= 5.0f)
-				{
-					NxF32 s = sinf(DEG2RAD*(xx));
-					NxF32 c = cosf(DEG2RAD*(xx));
-					NxI32 md = maxdirfiltered(p,count,dir+(u*s+v*c)*0.025f,allow);
-					if(mc==m && md==m)
-					{
-						allow[m]=3;
-						return m;
-					}
-					mc=md;
-				}
-			}
-			ma=mb;
-		}
-		allow[m]=0;
-		m=-1;
-	}
-	assert(0);
-	return m;
-}
-
-
-
-
-NxI32 operator ==(const int3 &a,const int3 &b)
-{
-	for(NxI32 i=0;i<3;i++)
-	{
-		if(a[i]!=b[i]) return 0;
-	}
-	return 1;
-}
-
-int3 roll3(int3 a)
-{
-	NxI32 tmp=a[0];
-	a[0]=a[1];
-	a[1]=a[2];
-	a[2]=tmp;
-	return a;
-}
-NxI32 isa(const int3 &a,const int3 &b)
-{
-	return ( a==b || roll3(a)==b || a==roll3(b) );
-}
-NxI32 b2b(const int3 &a,const int3 &b)
-{
-	return isa(a,int3(b[2],b[1],b[0]));
-}
-NxI32 above(float3* vertices,const int3& t, const float3 &p, NxF32 epsilon)
-{
-	float3 n=TriNormal(vertices[t[0]],vertices[t[1]],vertices[t[2]]);
-	return (dot(n,p-vertices[t[0]]) > epsilon); // EPSILON???
-}
-NxI32 hasedge(const int3 &t, NxI32 a,NxI32 b)
-{
-	for(NxI32 i=0;i<3;i++)
-	{
-		NxI32 i1= (i+1)%3;
-		if(t[i]==a && t[i1]==b) return 1;
-	}
-	return 0;
-}
-NxI32 hasvert(const int3 &t, NxI32 v)
-{
-	return (t[0]==v || t[1]==v || t[2]==v) ;
-}
-NxI32 shareedge(const int3 &a,const int3 &b)
-{
-	NxI32 i;
-	for(i=0;i<3;i++)
-	{
-		NxI32 i1= (i+1)%3;
-		if(hasedge(a,b[i1],b[i])) return 1;
-	}
-	return 0;
-}
-
-class Tri;
-
-static Array<Tri*> tris; // djs: For heaven's sake!!!!
-
-class Tri : public int3
-{
-public:
-	int3 n;
-	NxI32 id;
-	NxI32 vmax;
-	NxF32 rise;
-	Tri(NxI32 a,NxI32 b,NxI32 c):int3(a,b,c),n(-1,-1,-1)
-	{
-		id = tris.count;
-		tris.Add(this);
-		vmax=-1;
-		rise = 0.0f;
-	}
-	~Tri()
-	{
-		assert(tris[id]==this);
-		tris[id]=NULL;
-	}
-	NxI32 &neib(NxI32 a,NxI32 b);
-};
-
-
-NxI32 &Tri::neib(NxI32 a,NxI32 b)
-{
-	static NxI32 er=-1;
-	NxI32 i;
-	for(i=0;i<3;i++)
-	{
-		NxI32 i1=(i+1)%3;
-		NxI32 i2=(i+2)%3;
-		if((*this)[i]==a && (*this)[i1]==b) return n[i2];
-		if((*this)[i]==b && (*this)[i1]==a) return n[i2];
-	}
-	assert(0);
-	return er;
-}
-void b2bfix(Tri* s,Tri*t)
-{
-	NxI32 i;
-	for(i=0;i<3;i++)
-	{
-		NxI32 i1=(i+1)%3;
-		NxI32 i2=(i+2)%3;
-		NxI32 a = (*s)[i1];
-		NxI32 b = (*s)[i2];
-		assert(tris[s->neib(a,b)]->neib(b,a) == s->id);
-		assert(tris[t->neib(a,b)]->neib(b,a) == t->id);
-		tris[s->neib(a,b)]->neib(b,a) = t->neib(b,a);
-		tris[t->neib(b,a)]->neib(a,b) = s->neib(a,b);
-	}
-}
-
-void removeb2b(Tri* s,Tri*t)
-{
-	b2bfix(s,t);
-	delete s;
-	delete t;
-}
-
-void extrude(Tri *t0,NxI32 v)
-{
-	int3 t= *t0;
-	NxI32 n = tris.count;
-	Tri* ta = MEMALLOC_NEW(Tri)(v,t[1],t[2]);
-	ta->n = int3(t0->n[0],n+1,n+2);
-	tris[t0->n[0]]->neib(t[1],t[2]) = n+0;
-	Tri* tb = MEMALLOC_NEW(Tri)(v,t[2],t[0]);
-	tb->n = int3(t0->n[1],n+2,n+0);
-	tris[t0->n[1]]->neib(t[2],t[0]) = n+1;
-	Tri* tc = MEMALLOC_NEW(Tri)(v,t[0],t[1]);
-	tc->n = int3(t0->n[2],n+0,n+1);
-	tris[t0->n[2]]->neib(t[0],t[1]) = n+2;
-	if(hasvert(*tris[ta->n[0]],v)) removeb2b(ta,tris[ta->n[0]]);
-	if(hasvert(*tris[tb->n[0]],v)) removeb2b(tb,tris[tb->n[0]]);
-	if(hasvert(*tris[tc->n[0]],v)) removeb2b(tc,tris[tc->n[0]]);
-	delete t0;
-
-}
-
-Tri *extrudable(NxF32 epsilon)
-{
-	NxI32 i;
-	Tri *t=NULL;
-	for(i=0;i<tris.count;i++)
-	{
-		if(!t || (tris[i] && t->rise<tris[i]->rise))
-		{
-			t = tris[i];
-		}
-	}
-	return (t->rise >epsilon)?t:NULL ;
-}
-
-class int4
-{
-public:
-	NxI32 x,y,z,w;
-	int4(){};
-	int4(NxI32 _x,NxI32 _y, NxI32 _z,NxI32 _w){x=_x;y=_y;z=_z;w=_w;}
-	const NxI32& operator[](NxI32 i) const {return (&x)[i];}
-	NxI32& operator[](NxI32 i) {return (&x)[i];}
-};
-
-
-
-bool hasVolume(float3 *verts, NxI32 p0, NxI32 p1, NxI32 p2, NxI32 p3)
-{
-	float3 result3 = cross(verts[p1]-verts[p0], verts[p2]-verts[p0]);
-	if (magnitude(result3) < VOLUME_EPSILON && magnitude(result3) > -VOLUME_EPSILON) // Almost collinear or otherwise very close to each other
-		return false;
-	NxF32 result = dot(normalize(result3), verts[p3]-verts[p0]);
-	return (result > VOLUME_EPSILON || result < -VOLUME_EPSILON); // Returns true iff volume is significantly non-zero
-}
-
-int4 FindSimplex(float3 *verts,NxI32 verts_count,Array<NxI32> &allow)
-{
-	float3 basis[3];
-	basis[0] = float3( 0.01f, 0.02f, 1.0f );
-	NxI32 p0 = maxdirsterid(verts,verts_count, basis[0],allow);
-	NxI32	p1 = maxdirsterid(verts,verts_count,-basis[0],allow);
-	basis[0] = verts[p0]-verts[p1];
-	if(p0==p1 || basis[0]==float3(0,0,0))
-		return int4(-1,-1,-1,-1);
-	basis[1] = cross(float3(     1, 0.02f, 0),basis[0]);
-	basis[2] = cross(float3(-0.02f,     1, 0),basis[0]);
-	basis[1] = normalize( (magnitude(basis[1])>magnitude(basis[2])) ? basis[1]:basis[2]);
-	NxI32 p2 = maxdirsterid(verts,verts_count,basis[1],allow);
-	if(p2 == p0 || p2 == p1)
-	{
-		p2 = maxdirsterid(verts,verts_count,-basis[1],allow);
-	}
-	if(p2 == p0 || p2 == p1)
-		return int4(-1,-1,-1,-1);
-	basis[1] = verts[p2] - verts[p0];
-	basis[2] = normalize(cross(basis[1],basis[0]));
-	NxI32 p3 = maxdirsterid(verts,verts_count,basis[2],allow);
-	if(p3==p0||p3==p1||p3==p2||!hasVolume(verts, p0, p1, p2, p3)) p3 = maxdirsterid(verts,verts_count,-basis[2],allow);
-	if(p3==p0||p3==p1||p3==p2)
-		return int4(-1,-1,-1,-1);
-	assert(!(p0==p1||p0==p2||p0==p3||p1==p2||p1==p3||p2==p3));
-	if(dot(verts[p3]-verts[p0],cross(verts[p1]-verts[p0],verts[p2]-verts[p0])) <0) {Swap(p2,p3);}
-	return int4(p0,p1,p2,p3);
-}
-#pragma warning(push)
-#pragma warning(disable:4706)
-NxI32 calchullgen(float3 *verts,NxI32 verts_count, NxI32 vlimit)
-{
-	if(verts_count <4) return 0;
-	if(vlimit==0) vlimit=1000000000;
-	NxI32 j;
-	float3 bmin(*verts),bmax(*verts);
-	Array<NxI32> isextreme(verts_count);
-	Array<NxI32> allow(verts_count);
-	for(j=0;j<verts_count;j++)
-	{
-		allow.Add(1);
-		isextreme.Add(0);
-		bmin = VectorMin(bmin,verts[j]);
-		bmax = VectorMax(bmax,verts[j]);
-	}
-	NxF32 epsilon = magnitude(bmax-bmin) * 0.001f;
-
-
-	int4 p = FindSimplex(verts,verts_count,allow);
-	if(p.x==-1) return 0; // simplex failed
-
-
-
-	float3 center = (verts[p[0]]+verts[p[1]]+verts[p[2]]+verts[p[3]]) /4.0f;  // a valid interior point
-	Tri *t0 = MEMALLOC_NEW(Tri)(p[2],p[3],p[1]); t0->n=int3(2,3,1);
-	Tri *t1 = MEMALLOC_NEW(Tri)(p[3],p[2],p[0]); t1->n=int3(3,2,0);
-	Tri *t2 = MEMALLOC_NEW(Tri)(p[0],p[1],p[3]); t2->n=int3(0,1,3);
-	Tri *t3 = MEMALLOC_NEW(Tri)(p[1],p[0],p[2]); t3->n=int3(1,0,2);
-	isextreme[p[0]]=isextreme[p[1]]=isextreme[p[2]]=isextreme[p[3]]=1;
-
-	for(j=0;j<tris.count;j++)
-	{
-		Tri *t=tris[j];
-		assert(t);
-		assert(t->vmax<0);
-		float3 n=TriNormal(verts[(*t)[0]],verts[(*t)[1]],verts[(*t)[2]]);
-		t->vmax = maxdirsterid(verts,verts_count,n,allow);
-		t->rise = dot(n,verts[t->vmax]-verts[(*t)[0]]);
-	}
-	Tri *te;
-	vlimit-=4;
-	while(vlimit >0 && (te=extrudable(epsilon)))
-	{
-		int3 ti=*te;
-		NxI32 v=te->vmax;
-		assert(!isextreme[v]);  // wtf we've already done this vertex
-		isextreme[v]=1;
-		//if(v==p0 || v==p1 || v==p2 || v==p3) continue; // done these already
-		j=tris.count;
-		while(j--) {
-			if(!tris[j]) continue;
-			int3 t=*tris[j];
-			if(above(verts,t,verts[v],0.01f*epsilon))
-			{
-				extrude(tris[j],v);
-			}
-		}
-		// now check for those degenerate cases where we have a flipped triangle or a really skinny triangle
-		j=tris.count;
-		while(j--)
-		{
-			if(!tris[j]) continue;
-			if(!hasvert(*tris[j],v)) break;
-			int3 nt=*tris[j];
-			if(above(verts,nt,center,0.01f*epsilon)  || magnitude(cross(verts[nt[1]]-verts[nt[0]],verts[nt[2]]-verts[nt[1]]))< epsilon*epsilon*0.1f )
-			{
-				Tri *nb = tris[tris[j]->n[0]];
-				assert(nb);assert(!hasvert(*nb,v));assert(nb->id<j);
-				extrude(nb,v);
-				j=tris.count;
-			}
-		}
-		j=tris.count;
-		while(j--)
-		{
-			Tri *t=tris[j];
-			if(!t) continue;
-			if(t->vmax>=0) break;
-			float3 n=TriNormal(verts[(*t)[0]],verts[(*t)[1]],verts[(*t)[2]]);
-			t->vmax = maxdirsterid(verts,verts_count,n,allow);
-			if(isextreme[t->vmax])
-			{
-				t->vmax=-1; // already done that vertex - algorithm needs to be able to terminate.
-			}
-			else
-			{
-				t->rise = dot(n,verts[t->vmax]-verts[(*t)[0]]);
-			}
-		}
-		vlimit --;
-	}
-	return 1;
-}
-#pragma warning(pop)
-
-NxI32 calchull(float3 *verts,NxI32 verts_count, NxI32 *&tris_out, NxI32 &tris_count,NxI32 vlimit)
-{
-	NxI32 rc=calchullgen(verts,verts_count,  vlimit) ;
-	if(!rc) return 0;
-	Array<NxI32> ts;
-	for(NxI32 i=0;i<tris.count;i++)if(tris[i])
-	{
-		for(NxI32 j=0;j<3;j++)ts.Add((*tris[i])[j]);
-		delete tris[i];
-	}
-	tris_count = ts.count/3;
-	tris_out   = ts.element;
-	ts.element=NULL; ts.count=ts.array_size=0;
-	// please reset here, otherwise, we get a nice virtual function call (R6025) error with NxCooking library
-	tris.SetSize( 0 );
-	return 1;
-}
-
-static NxF32 area2(const float3 &v0,const float3 &v1,const float3 &v2)
-{
-	float3 cp = cross(v0-v1,v2-v0);
-	return dot(cp,cp);
-}
-NxI32 calchullpbev(float3 *verts,NxI32 verts_count,NxI32 vlimit, Array<Plane> &planes,NxF32 bevangle)
-{
-	NxI32 i,j;
-	Array<Plane> bplanes;
-	planes.count=0;
-	NxI32 rc = calchullgen(verts,verts_count,vlimit);
-	if(!rc) return 0;
-	extern NxF32 minadjangle; // default is 3.0f;  // in degrees  - result wont have two adjacent facets within this angle of each other.
-	NxF32 maxdot_minang = cosf(DEG2RAD*minadjangle);
-	for(i=0;i<tris.count;i++)if(tris[i])
-	{
-		Plane p;
-		Tri *t = tris[i];
-		p.normal = TriNormal(verts[(*t)[0]],verts[(*t)[1]],verts[(*t)[2]]);
-		p.dist   = -dot(p.normal, verts[(*t)[0]]);
-		for(j=0;j<3;j++)
-		{
-			if(t->n[j]<t->id) continue;
-			Tri *s = tris[t->n[j]];
-			REAL3 snormal = TriNormal(verts[(*s)[0]],verts[(*s)[1]],verts[(*s)[2]]);
-			if(dot(snormal,p.normal)>=cos(bevangle*DEG2RAD)) continue;
-			REAL3 e = verts[(*t)[(j+2)%3]] - verts[(*t)[(j+1)%3]];
-			REAL3 n = (e!=REAL3(0,0,0))? cross(snormal,e)+cross(e,p.normal) : snormal+p.normal;
-			assert(n!=REAL3(0,0,0));
-			if(n==REAL3(0,0,0)) return 0;
-			n=normalize(n);
-			bplanes.Add(Plane(n,-dot(n,verts[maxdir(verts,verts_count,n)])));
-		}
-	}
-	for(i=0;i<tris.count;i++)if(tris[i])for(j=i+1;j<tris.count;j++)if(tris[i] && tris[j])
-	{
-		Tri *ti = tris[i];
-		Tri *tj = tris[j];
-		REAL3 ni = TriNormal(verts[(*ti)[0]],verts[(*ti)[1]],verts[(*ti)[2]]);
-		REAL3 nj = TriNormal(verts[(*tj)[0]],verts[(*tj)[1]],verts[(*tj)[2]]);
-		if(dot(ni,nj)>maxdot_minang)
-		{
-			// somebody has to die, keep the biggest triangle
-			if( area2(verts[(*ti)[0]],verts[(*ti)[1]],verts[(*ti)[2]]) < area2(verts[(*tj)[0]],verts[(*tj)[1]],verts[(*tj)[2]]))
-			{
-				delete tris[i];
-			}
-			else
-			{
-				delete tris[j];
-			}
-		}
-	}
-	for(i=0;i<tris.count;i++)if(tris[i])
-	{
-		Plane p;
-		Tri *t = tris[i];
-		p.normal = TriNormal(verts[(*t)[0]],verts[(*t)[1]],verts[(*t)[2]]);
-		p.dist   = -dot(p.normal, verts[(*t)[0]]);
-		planes.Add(p);
-	}
-	for(i=0;i<bplanes.count;i++)
-	{
-		for(j=0;j<planes.count;j++)
-		{
-			if(dot(bplanes[i].normal,planes[j].normal)>maxdot_minang) break;
-		}
-		if(j==planes.count)
-		{
-			planes.Add(bplanes[i]);
-		}
-	}
-	for(i=0;i<tris.count;i++)if(tris[i])
-	{
-		delete tris[i];
-	}
-	tris.count = 0; //bad place to do the tris.SetSize(0) fix, this line is executed many times, and will result in a whole lot of allocations if the array is totally cleared here
-	return 1;
-}
-
-ConvexH *test_cube()
-{
-	ConvexH *convex = MEMALLOC_NEW(ConvexH)(8,24,6);
-	convex->vertices[0] = REAL3(0,0,0);
-	convex->vertices[1] = REAL3(0,0,1);
-	convex->vertices[2] = REAL3(0,1,0);
-	convex->vertices[3] = REAL3(0,1,1);
-	convex->vertices[4] = REAL3(1,0,0);
-	convex->vertices[5] = REAL3(1,0,1);
-	convex->vertices[6] = REAL3(1,1,0);
-	convex->vertices[7] = REAL3(1,1,1);
-
-	convex->facets[0] = Plane(REAL3(-1,0,0),0);
-	convex->facets[1] = Plane(REAL3(1,0,0),-1);
-	convex->facets[2] = Plane(REAL3(0,-1,0),0);
-	convex->facets[3] = Plane(REAL3(0,1,0),-1);
-	convex->facets[4] = Plane(REAL3(0,0,-1),0);
-	convex->facets[5] = Plane(REAL3(0,0,1),-1);
-
-	convex->edges[0 ] = HalfEdge(11,0,0);
-	convex->edges[1 ] = HalfEdge(23,1,0);
-	convex->edges[2 ] = HalfEdge(15,3,0);
-	convex->edges[3 ] = HalfEdge(16,2,0);
-
-	convex->edges[4 ] = HalfEdge(13,6,1);
-	convex->edges[5 ] = HalfEdge(21,7,1);
-	convex->edges[6 ] = HalfEdge( 9,5,1);
-	convex->edges[7 ] = HalfEdge(18,4,1);
-
-	convex->edges[8 ] = HalfEdge(19,0,2);
-	convex->edges[9 ] = HalfEdge( 6,4,2);
-	convex->edges[10] = HalfEdge(20,5,2);
-	convex->edges[11] = HalfEdge( 0,1,2);
-
-	convex->edges[12] = HalfEdge(22,3,3);
-	convex->edges[13] = HalfEdge( 4,7,3);
-	convex->edges[14] = HalfEdge(17,6,3);
-	convex->edges[15] = HalfEdge( 2,2,3);
-
-	convex->edges[16] = HalfEdge( 3,0,4);
-	convex->edges[17] = HalfEdge(14,2,4);
-	convex->edges[18] = HalfEdge( 7,6,4);
-	convex->edges[19] = HalfEdge( 8,4,4);
-
-	convex->edges[20] = HalfEdge(10,1,5);
-	convex->edges[21] = HalfEdge( 5,5,5);
-	convex->edges[22] = HalfEdge(12,7,5);
-	convex->edges[23] = HalfEdge( 1,3,5);
-
-
-	return convex;
-}
-
-ConvexH *ConvexHMakeCube(const REAL3 &bmin, const REAL3 &bmax)
-{
-	ConvexH *convex = test_cube();
-	convex->vertices[0] = REAL3(bmin.x,bmin.y,bmin.z);
-	convex->vertices[1] = REAL3(bmin.x,bmin.y,bmax.z);
-	convex->vertices[2] = REAL3(bmin.x,bmax.y,bmin.z);
-	convex->vertices[3] = REAL3(bmin.x,bmax.y,bmax.z);
-	convex->vertices[4] = REAL3(bmax.x,bmin.y,bmin.z);
-	convex->vertices[5] = REAL3(bmax.x,bmin.y,bmax.z);
-	convex->vertices[6] = REAL3(bmax.x,bmax.y,bmin.z);
-	convex->vertices[7] = REAL3(bmax.x,bmax.y,bmax.z);
-
-	convex->facets[0] = Plane(REAL3(-1,0,0), bmin.x);
-	convex->facets[1] = Plane(REAL3(1,0,0), -bmax.x);
-	convex->facets[2] = Plane(REAL3(0,-1,0), bmin.y);
-	convex->facets[3] = Plane(REAL3(0,1,0), -bmax.y);
-	convex->facets[4] = Plane(REAL3(0,0,-1), bmin.z);
-	convex->facets[5] = Plane(REAL3(0,0,1), -bmax.z);
-	return convex;
-}
-
-
-static NxI32 overhull(Plane *planes,NxI32 planes_count,float3 *verts, NxI32 verts_count,NxI32 maxplanes,
-			 float3 *&verts_out, NxI32 &verts_count_out,  NxI32 *&faces_out, NxI32 &faces_count_out ,NxF32 inflate)
-{
-	NxI32 i,j;
-   if (verts_count < 4) return 0;
-	maxplanes = Min(maxplanes,planes_count);
-	float3 bmin(verts[0]),bmax(verts[0]);
-	for(i=0;i<verts_count;i++)
-	{
-		bmin = VectorMin(bmin,verts[i]);
-		bmax = VectorMax(bmax,verts[i]);
-	}
-	NxF32 diameter = magnitude(bmax-bmin);
-//	inflate *=diameter;   // RELATIVE INFLATION
-	bmin -= float3(inflate*2.5f,inflate*2.5f,inflate*2.5f);
-	bmax += float3(inflate*2.5f,inflate*2.5f,inflate*2.5f);
-	// 2 is from the formula:
-	// D = d*|n1+n2|/(1-n1 dot n2), where d is "inflate" and
-	// n1 and n2 are the normals of two planes at bevelAngle to each other
-	// for 120 degrees, D is 2d
-
-	//bmin -= float3(inflate,inflate,inflate);
-	//bmax += float3(inflate,inflate,inflate);
-	for(i=0;i<planes_count;i++)
-	{
-		planes[i].dist -= inflate;
-	}
-	float3 emin = bmin; // VectorMin(bmin,float3(0,0,0));
-	float3 emax = bmax; // VectorMax(bmax,float3(0,0,0));
-	NxF32 epsilon  = 0.01f; // size of object is taken into account within candidate plane function.  Used to multiply here by magnitude(emax-emin)
-	planetestepsilon = magnitude(emax-emin) * PAPERWIDTH;
-	// todo: add bounding cube planes to force bevel. or try instead not adding the diameter expansion ??? must think.
-	// ConvexH *convex = ConvexHMakeCube(bmin - float3(diameter,diameter,diameter),bmax+float3(diameter,diameter,diameter));
-	NxF32 maxdot_minang = cosf(DEG2RAD*minadjangle);
-	for(j=0;j<6;j++)
-	{
-		float3 n(0,0,0);
-		n[j/2] = (j%2)? 1.0f : -1.0f;
-		for(i=0;i<planes_count;i++)
-		{
-			if(dot(n,planes[i].normal)> maxdot_minang)
-			{
-				(*((j%2)?&bmax:&bmin)) += n * (diameter*0.5f);
-				break;
-			}
-		}
-	}
-	ConvexH *c = ConvexHMakeCube(REAL3(bmin),REAL3(bmax));
-	NxI32 k;
-	while(maxplanes-- && (k=candidateplane(planes,planes_count,c,epsilon))>=0)
-	{
-		ConvexH *tmp = c;
-		c = ConvexHCrop(*tmp,planes[k]);
-		if(c==NULL) {c=tmp; break;} // might want to debug this case better!!!
-		if(!AssertIntact(*c)) {c=tmp; break;} // might want to debug this case better too!!!
-		delete tmp;
-	}
-
-	assert(AssertIntact(*c));
-	//return c;
-	faces_out = (NxI32*)MEMALLOC_MALLOC(sizeof(NxI32)*(1+c->facets.count+c->edges.count));     // new NxI32[1+c->facets.count+c->edges.count];
-	faces_count_out=0;
-	i=0;
-	faces_out[faces_count_out++]=-1;
-	k=0;
-	while(i<c->edges.count)
-	{
-		j=1;
-		while(j+i<c->edges.count && c->edges[i].p==c->edges[i+j].p) { j++; }
-		faces_out[faces_count_out++]=j;
-		while(j--)
-		{
-			faces_out[faces_count_out++] = c->edges[i].v;
-			i++;
-		}
-		k++;
-	}
-	faces_out[0]=k; // number of faces.
-	assert(k==c->facets.count);
-	assert(faces_count_out == 1+c->facets.count+c->edges.count);
-	verts_out = c->vertices.element; // new float3[c->vertices.count];
-	verts_count_out = c->vertices.count;
-	for(i=0;i<c->vertices.count;i++)
-	{
-		verts_out[i] = float3(c->vertices[i]);
-	}
-	c->vertices.count=c->vertices.array_size=0;	c->vertices.element=NULL;
-	delete c;
-	return 1;
-}
-
-static NxI32 overhullv(float3 *verts, NxI32 verts_count,NxI32 maxplanes,
-			 float3 *&verts_out, NxI32 &verts_count_out,  NxI32 *&faces_out, NxI32 &faces_count_out ,NxF32 inflate,NxF32 bevangle,NxI32 vlimit)
-{
-	if(!verts_count) return 0;
-	extern NxI32 calchullpbev(float3 *verts,NxI32 verts_count,NxI32 vlimit, Array<Plane> &planes,NxF32 bevangle) ;
-	Array<Plane> planes;
-	NxI32 rc=calchullpbev(verts,verts_count,vlimit,planes,bevangle) ;
-	if(!rc) return 0;
-	return overhull(planes.element,planes.count,verts,verts_count,maxplanes,verts_out,verts_count_out,faces_out,faces_count_out,inflate);
-}
-
-
-//*****************************************************
-//*****************************************************
-
-
-bool ComputeHull(NxU32 vcount,const NxF32 *vertices,PHullResult &result,NxU32 vlimit,NxF32 inflate)
-{
-
-	NxI32 index_count;
-	NxI32 *faces;
-	float3 *verts_out;
-	NxI32     verts_count_out;
-
-	if(inflate==0.0f)
-	{
-		NxI32  *tris_out;
-		NxI32    tris_count;
-		NxI32 ret = calchull( (float3 *) vertices, (NxI32) vcount, tris_out, tris_count, vlimit );
-		if(!ret) return false;
-		result.mIndexCount = (NxU32) (tris_count*3);
-		result.mFaceCount  = (NxU32) tris_count;
-		result.mVertices   = (NxF32*) vertices;
-		result.mVcount     = (NxU32) vcount;
-		result.mIndices    = (NxU32 *) tris_out;
-		return true;
-	}
-
-	NxI32 ret = overhullv((float3*)vertices,vcount,35,verts_out,verts_count_out,faces,index_count,inflate,120.0f,vlimit);
-	if(!ret) {
-		tris.SetSize(0); //have to set the size to 0 in order to protect from a "pure virtual function call" problem
-		return false;
-	}
-
-	Array<int3> tris;
-	NxI32 n=faces[0];
-	NxI32 k=1;
-	for(NxI32 i=0;i<n;i++)
-	{
-		NxI32 pn = faces[k++];
-		for(NxI32 j=2;j<pn;j++) tris.Add(int3(faces[k],faces[k+j-1],faces[k+j]));
-		k+=pn;
-	}
-	assert(tris.count == index_count-1-(n*3));
-	MEMALLOC_FREE(faces);	// PT: I added that. Is it ok ?
-
-	result.mIndexCount = (NxU32) (tris.count*3);
-	result.mFaceCount  = (NxU32) tris.count;
-	result.mVertices   = (NxF32*) verts_out;
-	result.mVcount     = (NxU32) verts_count_out;
-	result.mIndices    = (NxU32 *) tris.element;
-	tris.element=NULL; tris.count = tris.array_size=0;
-	CONVEX_DECOMPOSITION::tris.SetSize(0); //have to set the size to 0 in order to protect from a "pure virtual function call" problem
-
-	return true;
-}
-
-
-void ReleaseHull(PHullResult &result)
-{
-  MEMALLOC_FREE(result.mIndices);	// PT: I added that. Is it ok ?
-  MEMALLOC_FREE(result.mVertices);	// PT: I added that. Is it ok ?
-	result.mVcount = 0;
-	result.mIndexCount = 0;
-	result.mIndices = 0;
-	result.mVertices = 0;
-	result.mIndices  = 0;
-}
-
-
-
-//****** HULLLIB source code
-
-
-HullError HullLibrary::CreateConvexHull(const HullDesc       &desc,           // describes the input request
-																				HullResult           &result)         // contains the resulst
-{
-	HullError ret = QE_FAIL;
-
-
-	PHullResult hr;
-
-	NxU32 vcount = desc.mVcount;
-	if ( vcount < 8 ) vcount = 8;
-
-	NxF32 *vsource  = (NxF32 *) MEMALLOC_MALLOC( sizeof(NxF32)*vcount*3 );
-
-
-	NxF32 scale[3];
-
-	NxU32 ovcount;
-
-	bool ok = CleanupVertices(desc.mVcount,desc.mVertices, desc.mVertexStride, ovcount, vsource, desc.mNormalEpsilon, scale ); // normalize point cloud, remove duplicates!
-
-	if ( ok )
-	{
-
-
-		{
-			for (NxU32 i=0; i<ovcount; i++)
-			{
-				NxF32 *v = &vsource[i*3];
-				v[0]*=scale[0];
-				v[1]*=scale[1];
-				v[2]*=scale[2];
-			}
-		}
-
-		NxF32 skinwidth = 0;
-		if ( desc.HasHullFlag(QF_SKIN_WIDTH) )
-			skinwidth = desc.mSkinWidth;
-
-		ok = ComputeHull(ovcount,vsource,hr,desc.mMaxVertices,skinwidth);
-
-		if ( ok )
-		{
-
-			// re-index triangle mesh so it refers to only used vertices, rebuild a new vertex table.
-			NxF32 *vscratch = (NxF32 *) MEMALLOC_MALLOC( sizeof(NxF32)*hr.mVcount*3);
-			BringOutYourDead(hr.mVertices,hr.mVcount, vscratch, ovcount, hr.mIndices, hr.mIndexCount );
-
-			ret = QE_OK;
-
-			if ( desc.HasHullFlag(QF_TRIANGLES) ) // if he wants the results as triangle!
-			{
-				result.mPolygons          = false;
-				result.mNumOutputVertices = ovcount;
-				result.mOutputVertices    = (NxF32 *)MEMALLOC_MALLOC( sizeof(NxF32)*ovcount*3);
-				result.mNumFaces          = hr.mFaceCount;
-				result.mNumIndices        = hr.mIndexCount;
-
-				result.mIndices           = (NxU32 *) MEMALLOC_MALLOC( sizeof(NxU32)*hr.mIndexCount);
-
-				memcpy(result.mOutputVertices, vscratch, sizeof(NxF32)*3*ovcount );
-
-  			if ( desc.HasHullFlag(QF_REVERSE_ORDER) )
-				{
-
-					const NxU32 *source = hr.mIndices;
-								NxU32 *dest   = result.mIndices;
-
-					for (NxU32 i=0; i<hr.mFaceCount; i++)
-					{
-						dest[0] = source[2];
-						dest[1] = source[1];
-						dest[2] = source[0];
-						dest+=3;
-						source+=3;
-					}
-
-				}
-				else
-				{
-					memcpy(result.mIndices, hr.mIndices, sizeof(NxU32)*hr.mIndexCount);
-				}
-			}
-			else
-			{
-				result.mPolygons          = true;
-				result.mNumOutputVertices = ovcount;
-				result.mOutputVertices    = (NxF32 *)MEMALLOC_MALLOC( sizeof(NxF32)*ovcount*3);
-				result.mNumFaces          = hr.mFaceCount;
-				result.mNumIndices        = hr.mIndexCount+hr.mFaceCount;
-				result.mIndices           = (NxU32 *) MEMALLOC_MALLOC( sizeof(NxU32)*result.mNumIndices);
-				memcpy(result.mOutputVertices, vscratch, sizeof(NxF32)*3*ovcount );
-
-				{
-					const NxU32 *source = hr.mIndices;
-								NxU32 *dest   = result.mIndices;
-					for (NxU32 i=0; i<hr.mFaceCount; i++)
-					{
-						dest[0] = 3;
-						if ( desc.HasHullFlag(QF_REVERSE_ORDER) )
-						{
-							dest[1] = source[2];
-							dest[2] = source[1];
-							dest[3] = source[0];
-						}
-						else
-						{
-							dest[1] = source[0];
-							dest[2] = source[1];
-							dest[3] = source[2];
-						}
-
-						dest+=4;
-						source+=3;
-					}
-				}
-			}
-			// ReleaseHull frees memory for hr.mVertices, which can be the
-			// same pointer as vsource, so be sure to set it to NULL if necessary
-			if ( hr.mVertices == vsource) vsource = NULL;
-
-			ReleaseHull(hr);
-
-			if ( vscratch )
-			{
-				MEMALLOC_FREE(vscratch);
-			}
-		}
-	}
-
-	// this pointer is usually freed in ReleaseHull()
-	if ( vsource )
-	{
-		MEMALLOC_FREE(vsource);
-	}
-
-
-	return ret;
-}
-
-
-
-HullError HullLibrary::ReleaseResult(HullResult &result) // release memory allocated for this result, we are done with it.
-{
-	if ( result.mOutputVertices )
-	{
-		MEMALLOC_FREE(result.mOutputVertices);
-		result.mOutputVertices = 0;
-	}
-	if ( result.mIndices )
-	{
-		MEMALLOC_FREE(result.mIndices);
-		result.mIndices = 0;
-	}
-	return QE_OK;
-}
-
-
-static void AddPoint(NxU32 &vcount,NxF32 *p,NxF32 x,NxF32 y,NxF32 z)
-{
-	NxF32 *dest = &p[vcount*3];
-	dest[0] = x;
-	dest[1] = y;
-	dest[2] = z;
-	vcount++;
-}
-
-
-NxF32 GetDist(NxF32 px,NxF32 py,NxF32 pz,const NxF32 *p2)
-{
-
-	NxF32 dx = px - p2[0];
-	NxF32 dy = py - p2[1];
-	NxF32 dz = pz - p2[2];
-
-	return dx*dx+dy*dy+dz*dz;
-}
-
-
-
-bool  HullLibrary::CleanupVertices(NxU32 svcount,
-																const NxF32 *svertices,
-																NxU32 stride,
-																NxU32 &vcount,       // output number of vertices
-																NxF32 *vertices,                 // location to store the results.
-																NxF32  normalepsilon,
-																NxF32 *scale)
-{
-	if ( svcount == 0 ) return false;
-
-
-	#define EPSILON 0.000001f // close enough to consider two floating point numbers to be 'the same'.
-
-	vcount = 0;
-
-	NxF32 recip[3];
-
-	if ( scale )
-	{
-		scale[0] = 1;
-		scale[1] = 1;
-		scale[2] = 1;
-	}
-
-	NxF32 bmin[3] = {  FLT_MAX,  FLT_MAX,  FLT_MAX };
-	NxF32 bmax[3] = { -FLT_MAX, -FLT_MAX, -FLT_MAX };
-
-	const char *vtx = (const char *) svertices;
-
-	{
-		for (NxU32 i=0; i<svcount; i++)
-		{
-			const NxF32 *p = (const NxF32 *) vtx;
-
-			vtx+=stride;
-
-			for (NxI32 j=0; j<3; j++)
-			{
-				if ( p[j] < bmin[j] ) bmin[j] = p[j];
-				if ( p[j] > bmax[j] ) bmax[j] = p[j];
-			}
-		}
-	}
-
-	NxF32 dx = bmax[0] - bmin[0];
-	NxF32 dy = bmax[1] - bmin[1];
-	NxF32 dz = bmax[2] - bmin[2];
-
-	NxF32 center[3];
-
-	center[0] = dx*0.5f + bmin[0];
-	center[1] = dy*0.5f + bmin[1];
-	center[2] = dz*0.5f + bmin[2];
-
-	if ( dx < EPSILON || dy < EPSILON || dz < EPSILON || svcount < 3 )
-	{
-
-		NxF32 len = FLT_MAX;
-
-		if ( dx > EPSILON && dx < len ) len = dx;
-		if ( dy > EPSILON && dy < len ) len = dy;
-		if ( dz > EPSILON && dz < len ) len = dz;
-
-		if ( len == FLT_MAX )
-		{
-			dx = dy = dz = 0.01f; // one centimeter
-		}
-		else
-		{
-			if ( dx < EPSILON ) dx = len * 0.05f; // 1/5th the shortest non-zero edge.
-			if ( dy < EPSILON ) dy = len * 0.05f;
-			if ( dz < EPSILON ) dz = len * 0.05f;
-		}
-
-		NxF32 x1 = center[0] - dx;
-		NxF32 x2 = center[0] + dx;
-
-		NxF32 y1 = center[1] - dy;
-		NxF32 y2 = center[1] + dy;
-
-		NxF32 z1 = center[2] - dz;
-		NxF32 z2 = center[2] + dz;
-
-		AddPoint(vcount,vertices,x1,y1,z1);
-		AddPoint(vcount,vertices,x2,y1,z1);
-		AddPoint(vcount,vertices,x2,y2,z1);
-		AddPoint(vcount,vertices,x1,y2,z1);
-		AddPoint(vcount,vertices,x1,y1,z2);
-		AddPoint(vcount,vertices,x2,y1,z2);
-		AddPoint(vcount,vertices,x2,y2,z2);
-		AddPoint(vcount,vertices,x1,y2,z2);
-
-		return true; // return cube
-
-
-	}
-	else
-	{
-		if ( scale )
-		{
-			scale[0] = dx;
-			scale[1] = dy;
-			scale[2] = dz;
-
-			recip[0] = 1 / dx;
-			recip[1] = 1 / dy;
-			recip[2] = 1 / dz;
-
-			center[0]*=recip[0];
-			center[1]*=recip[1];
-			center[2]*=recip[2];
-
-		}
-
-	}
-
-
-
-	vtx = (const char *) svertices;
-
-	for (NxU32 i=0; i<svcount; i++)
-	{
-
-		const NxF32 *p = (const NxF32 *)vtx;
-		vtx+=stride;
-
-		NxF32 px = p[0];
-		NxF32 py = p[1];
-		NxF32 pz = p[2];
-
-		if ( scale )
-		{
-			px = px*recip[0]; // normalize
-			py = py*recip[1]; // normalize
-			pz = pz*recip[2]; // normalize
-		}
-
-		{
-			NxU32 j;
-
-			for (j=0; j<vcount; j++)
-			{
-				NxF32 *v = &vertices[j*3];
-
-				NxF32 x = v[0];
-				NxF32 y = v[1];
-				NxF32 z = v[2];
-
-				NxF32 dx = fabsf(x - px );
-				NxF32 dy = fabsf(y - py );
-				NxF32 dz = fabsf(z - pz );
-
-				if ( dx < normalepsilon && dy < normalepsilon && dz < normalepsilon )
-				{
-					// ok, it is close enough to the old one
-					// now let us see if it is further from the center of the point cloud than the one we already recorded.
-					// in which case we keep this one instead.
-
-					NxF32 dist1 = GetDist(px,py,pz,center);
-					NxF32 dist2 = GetDist(v[0],v[1],v[2],center);
-
-					if ( dist1 > dist2 )
-					{
-						v[0] = px;
-						v[1] = py;
-						v[2] = pz;
-					}
-
-					break;
-				}
-			}
-
-			if ( j == vcount )
-			{
-				NxF32 *dest = &vertices[vcount*3];
-				dest[0] = px;
-				dest[1] = py;
-				dest[2] = pz;
-				vcount++;
-			}
-		}
-	}
-
-	// ok..now make sure we didn't prune so many vertices it is now invalid.
-	{
-		NxF32 bmin[3] = {  FLT_MAX,  FLT_MAX,  FLT_MAX };
-		NxF32 bmax[3] = { -FLT_MAX, -FLT_MAX, -FLT_MAX };
-
-		for (NxU32 i=0; i<vcount; i++)
-		{
-			const NxF32 *p = &vertices[i*3];
-			for (NxI32 j=0; j<3; j++)
-			{
-				if ( p[j] < bmin[j] ) bmin[j] = p[j];
-				if ( p[j] > bmax[j] ) bmax[j] = p[j];
-			}
-		}
-
-		NxF32 dx = bmax[0] - bmin[0];
-		NxF32 dy = bmax[1] - bmin[1];
-		NxF32 dz = bmax[2] - bmin[2];
-
-		if ( dx < EPSILON || dy < EPSILON || dz < EPSILON || vcount < 3)
-		{
-			NxF32 cx = dx*0.5f + bmin[0];
-			NxF32 cy = dy*0.5f + bmin[1];
-			NxF32 cz = dz*0.5f + bmin[2];
-
-			NxF32 len = FLT_MAX;
-
-			if ( dx >= EPSILON && dx < len ) len = dx;
-			if ( dy >= EPSILON && dy < len ) len = dy;
-			if ( dz >= EPSILON && dz < len ) len = dz;
-
-			if ( len == FLT_MAX )
-			{
-				dx = dy = dz = 0.01f; // one centimeter
-			}
-			else
-			{
-				if ( dx < EPSILON ) dx = len * 0.05f; // 1/5th the shortest non-zero edge.
-				if ( dy < EPSILON ) dy = len * 0.05f;
-				if ( dz < EPSILON ) dz = len * 0.05f;
-			}
-
-			NxF32 x1 = cx - dx;
-			NxF32 x2 = cx + dx;
-
-			NxF32 y1 = cy - dy;
-			NxF32 y2 = cy + dy;
-
-			NxF32 z1 = cz - dz;
-			NxF32 z2 = cz + dz;
-
-			vcount = 0; // add box
-
-			AddPoint(vcount,vertices,x1,y1,z1);
-			AddPoint(vcount,vertices,x2,y1,z1);
-			AddPoint(vcount,vertices,x2,y2,z1);
-			AddPoint(vcount,vertices,x1,y2,z1);
-			AddPoint(vcount,vertices,x1,y1,z2);
-			AddPoint(vcount,vertices,x2,y1,z2);
-			AddPoint(vcount,vertices,x2,y2,z2);
-			AddPoint(vcount,vertices,x1,y2,z2);
-
-			return true;
-		}
-	}
-
-	return true;
-}
-
-void HullLibrary::BringOutYourDead(const NxF32 *verts,NxU32 vcount, NxF32 *overts,NxU32 &ocount,NxU32 *indices,NxU32 indexcount)
-{
-	NxU32 *used = (NxU32 *)MEMALLOC_MALLOC(sizeof(NxU32)*vcount);
-	memset(used,0,sizeof(NxU32)*vcount);
-
-	ocount = 0;
-
-	for (NxU32 i=0; i<indexcount; i++)
-	{
-		NxU32 v = indices[i]; // original array index
-
-		assert( v < vcount );
-
-		if ( used[v] ) // if already remapped
-		{
-			indices[i] = used[v]-1; // index to new array
-		}
-		else
-		{
-
-			indices[i] = ocount;      // new index mapping
-
-			overts[ocount*3+0] = verts[v*3+0]; // copy old vert to new vert array
-			overts[ocount*3+1] = verts[v*3+1];
-			overts[ocount*3+2] = verts[v*3+2];
-
-			ocount++; // increment output vert count
-
-			assert( ocount <= vcount );
-
-			used[v] = ocount; // assign new index remapping
-		}
-	}
-
-	MEMALLOC_FREE(used);
-}
-
-//==================================================================================
-HullError HullLibrary::CreateTriangleMesh(HullResult &answer,ConvexHullTriangleInterface *iface)
-{
-	HullError ret = QE_FAIL;
-
-
-	const NxF32 *p            = answer.mOutputVertices;
-	const NxU32   *idx = answer.mIndices;
-	NxU32 fcount       = answer.mNumFaces;
-
-	if ( p && idx && fcount )
-	{
-		ret = QE_OK;
-
-		for (NxU32 i=0; i<fcount; i++)
-		{
-			NxU32 pcount = *idx++;
-
-			NxU32 i1 = *idx++;
-			NxU32 i2 = *idx++;
-			NxU32 i3 = *idx++;
-
-			const NxF32 *p1 = &p[i1*3];
-			const NxF32 *p2 = &p[i2*3];
-			const NxF32 *p3 = &p[i3*3];
-
-			AddConvexTriangle(iface,p1,p2,p3);
-
-			pcount-=3;
-			while ( pcount )
-			{
-				i3 = *idx++;
-				p2 = p3;
-				p3 = &p[i3*3];
-
-				AddConvexTriangle(iface,p1,p2,p3);
-				pcount--;
-			}
-
-		}
-	}
-
-	return ret;
-}
-
-//==================================================================================
-void HullLibrary::AddConvexTriangle(ConvexHullTriangleInterface *callback,const NxF32 *p1,const NxF32 *p2,const NxF32 *p3)
-{
-	ConvexHullVertex v1,v2,v3;
-
-	#define TSCALE1 (1.0f/4.0f)
-
-	v1.mPos[0] = p1[0];
-	v1.mPos[1] = p1[1];
-	v1.mPos[2] = p1[2];
-
-	v2.mPos[0] = p2[0];
-	v2.mPos[1] = p2[1];
-	v2.mPos[2] = p2[2];
-
-	v3.mPos[0] = p3[0];
-	v3.mPos[1] = p3[1];
-	v3.mPos[2] = p3[2];
-
-	NxF32 n[3];
-	ComputeNormal(n,p1,p2,p3);
-
-	v1.mNormal[0] = n[0];
-	v1.mNormal[1] = n[1];
-	v1.mNormal[2] = n[2];
-
-	v2.mNormal[0] = n[0];
-	v2.mNormal[1] = n[1];
-	v2.mNormal[2] = n[2];
-
-	v3.mNormal[0] = n[0];
-	v3.mNormal[1] = n[1];
-	v3.mNormal[2] = n[2];
-
-	const NxF32 *tp1 = p1;
-	const NxF32 *tp2 = p2;
-	const NxF32 *tp3 = p3;
-
-	NxI32 i1 = 0;
-	NxI32 i2 = 0;
-
-	NxF32 nx = fabsf(n[0]);
-	NxF32 ny = fabsf(n[1]);
-	NxF32 nz = fabsf(n[2]);
-
-	if ( nx <= ny && nx <= nz )
-		i1 = 0;
-	if ( ny <= nx && ny <= nz )
-		i1 = 1;
-	if ( nz <= nx && nz <= ny )
-		i1 = 2;
-
-	switch ( i1 )
-	{
-		case 0:
-			if ( ny < nz )
-				i2 = 1;
-			else
-				i2 = 2;
-			break;
-		case 1:
-			if ( nx < nz )
-				i2 = 0;
-			else
-				i2 = 2;
-			break;
-		case 2:
-			if ( nx < ny )
-				i2 = 0;
-			else
-				i2 = 1;
-			break;
-	}
-
-	v1.mTexel[0] = tp1[i1]*TSCALE1;
-	v1.mTexel[1] = tp1[i2]*TSCALE1;
-
-	v2.mTexel[0] = tp2[i1]*TSCALE1;
-	v2.mTexel[1] = tp2[i2]*TSCALE1;
-
-	v3.mTexel[0] = tp3[i1]*TSCALE1;
-	v3.mTexel[1] = tp3[i2]*TSCALE1;
-
-	callback->ConvexHullTriangle(v3,v2,v1);
-}
-
-//==================================================================================
-NxF32 HullLibrary::ComputeNormal(NxF32 *n,const NxF32 *A,const NxF32 *B,const NxF32 *C)
-{
-	NxF32 vx,vy,vz,wx,wy,wz,vw_x,vw_y,vw_z,mag;
-
-	vx = (B[0] - C[0]);
-	vy = (B[1] - C[1]);
-	vz = (B[2] - C[2]);
-
-	wx = (A[0] - B[0]);
-	wy = (A[1] - B[1]);
-	wz = (A[2] - B[2]);
-
-	vw_x = vy * wz - vz * wy;
-	vw_y = vz * wx - vx * wz;
-	vw_z = vx * wy - vy * wx;
-
-	mag = sqrtf((vw_x * vw_x) + (vw_y * vw_y) + (vw_z * vw_z));
-
-	if ( mag < 0.000001f )
-	{
-		mag = 0;
-	}
-	else
-	{
-		mag = 1.0f/mag;
-	}
-
-	n[0] = vw_x * mag;
-	n[1] = vw_y * mag;
-	n[2] = vw_z * mag;
-
-	return mag;
-}
-
-}; // End of namespace

+ 0 - 201
Engine/lib/convexDecomp/NvStanHull.h

@@ -1,201 +0,0 @@
-#ifndef NV_STAN_HULL_H
-
-#define NV_STAN_HULL_H
-
-/*
-
-NvStanHull.h : A convex hull generator written by Stan Melax
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#include "NvUserMemAlloc.h"
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class HullResult
-{
-public:
-	HullResult(void)
-	{
-		mPolygons = true;
-		mNumOutputVertices = 0;
-		mOutputVertices = 0;
-		mNumFaces = 0;
-		mNumIndices = 0;
-		mIndices = 0;
-	}
-	bool             mPolygons;                  // true if indices represents polygons, false indices are triangles
-	NxU32            mNumOutputVertices;         // number of vertices in the output hull
-	NxF32           *mOutputVertices;            // array of vertices, 3 floats each x,y,z
-	NxU32            mNumFaces;                  // the number of faces produced
-	NxU32            mNumIndices;                // the total number of indices
-	NxU32           *mIndices;                   // pointer to indices.
-
-// If triangles, then indices are array indexes into the vertex list.
-// If polygons, indices are in the form (number of points in face) (p1, p2, p3, ..) etc..
-};
-
-enum HullFlag
-{
-	QF_TRIANGLES         = (1<<0),             // report results as triangles, not polygons.
-	QF_REVERSE_ORDER     = (1<<1),             // reverse order of the triangle indices.
-	QF_SKIN_WIDTH        = (1<<2),             // extrude hull based on this skin width
-	QF_DEFAULT           = (QF_TRIANGLES | QF_SKIN_WIDTH)
-};
-
-
-class HullDesc
-{
-public:
-	HullDesc(void)
-	{
-		mFlags          = QF_DEFAULT;
-		mVcount         = 0;
-		mVertices       = 0;
-		mVertexStride   = 0;
-		mNormalEpsilon  = 0.001f;
-		mMaxVertices = 4096; // maximum number of points to be considered for a convex hull.
-		mSkinWidth = 0.01f; // default is one centimeter
-	};
-
-	HullDesc(HullFlag flag,
-						 NxU32 vcount,
-						 const NxF32 *vertices,
-						 NxU32 stride)
-	{
-		mFlags          = flag;
-		mVcount         = vcount;
-		mVertices       = vertices;
-		mVertexStride   = stride;
-		mNormalEpsilon  = 0.001f;
-		mMaxVertices    = 4096;
-		mSkinWidth = 0.01f; // default is one centimeter
-	}
-
-	bool HasHullFlag(HullFlag flag) const
-	{
-		if ( mFlags & flag ) return true;
-		return false;
-	}
-
-	void SetHullFlag(HullFlag flag)
-	{
-		mFlags|=flag;
-	}
-
-	void ClearHullFlag(HullFlag flag)
-	{
-		mFlags&=~flag;
-	}
-
-	NxU32      mFlags;           // flags to use when generating the convex hull.
-	NxU32      mVcount;          // number of vertices in the input point cloud
-	const NxF32      *mVertices;        // the array of vertices.
-	NxU32      mVertexStride;    // the stride of each vertex, in bytes.
-	NxF32             mNormalEpsilon;   // the epsilon for removing duplicates.  This is a normalized value, if normalized bit is on.
-	NxF32             mSkinWidth;
-	NxU32      mMaxVertices;               // maximum number of vertices to be considered for the hull!
-};
-
-enum HullError
-{
-	QE_OK,            // success!
-	QE_FAIL,           // failed.
-	QE_NOT_READY,
-};
-
-// This class is used when converting a convex hull into a triangle mesh.
-class ConvexHullVertex
-{
-public:
-	NxF32         mPos[3];
-	NxF32         mNormal[3];
-	NxF32         mTexel[2];
-};
-
-// A virtual interface to receive the triangles from the convex hull.
-class ConvexHullTriangleInterface
-{
-public:
-	virtual void ConvexHullTriangle(const ConvexHullVertex &v1,const ConvexHullVertex &v2,const ConvexHullVertex &v3) = 0;
-};
-
-
-class HullLibrary
-{
-public:
-
-	HullError CreateConvexHull(const HullDesc       &desc,           // describes the input request
-															HullResult           &result);        // contains the resulst
-
-	HullError ReleaseResult(HullResult &result); // release memory allocated for this result, we are done with it.
-
-	HullError CreateTriangleMesh(HullResult &answer,ConvexHullTriangleInterface *iface);
-private:
-	NxF32 ComputeNormal(NxF32 *n,const NxF32 *A,const NxF32 *B,const NxF32 *C);
-	void AddConvexTriangle(ConvexHullTriangleInterface *callback,const NxF32 *p1,const NxF32 *p2,const NxF32 *p3);
-
-	void BringOutYourDead(const NxF32 *verts,NxU32 vcount, NxF32 *overts,NxU32 &ocount,NxU32 *indices,NxU32 indexcount);
-
-	bool    CleanupVertices(NxU32 svcount,
-													const NxF32 *svertices,
-													NxU32 stride,
-													NxU32 &vcount,       // output number of vertices
-													NxF32 *vertices,                 // location to store the results.
-													NxF32  normalepsilon,
-													NxF32 *scale);
-};
-
-}; // end of namespace
-
-#endif

+ 0 - 511
Engine/lib/convexDecomp/NvThreadConfig.cpp

@@ -1,511 +0,0 @@
-/*
-
-NvThreadConfig.cpp : A simple wrapper class to define threading and mutex locks.
-
-*/
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#include <cassert>
-#include "NvThreadConfig.h"
-
-#if defined(WIN32)
-
-#define _WIN32_WINNT 0x400
-#include <windows.h>
-
-#pragma comment(lib,"winmm.lib")
-
-//	#ifndef _WIN32_WINNT
-
-//	#endif
-//	#include <windows.h>
-//#include <winbase.h>
-#endif
-
-#if defined(_XBOX)
-	#include <xtl.h>
-#endif
-
-#if defined(__linux__) || defined( __APPLE__ ) || defined( __FreeBSD__)
-	//#include <sys/time.h>
-	#include <time.h>
-	#include <unistd.h>
-	#include <errno.h>
-	#define __stdcall
-#endif
-
-#if defined( __APPLE__ ) || defined( __FreeBSD__)
-
-   #include <sys/time.h>
-#endif
-
-#if defined(__APPLE__) || defined(__linux__) || defined( __FreeBSD__)
-
-	#include <pthread.h>
-#endif
-
-#if defined( __APPLE__ ) || defined( __FreeBSD__)
-
-   #define PTHREAD_MUTEX_RECURSIVE_NP PTHREAD_MUTEX_RECURSIVE
-#endif
-
-
-#ifdef	NDEBUG
-#define VERIFY( x ) (x)
-#else
-#define VERIFY( x ) assert((x))
-#endif
-
-namespace CONVEX_DECOMPOSITION
-{
-
-NxU32 tc_timeGetTime(void)
-{
-   #if defined(__linux__)
-      struct timespec ts;
-      clock_gettime(CLOCK_REALTIME, &ts);
-      return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
-    #elif defined( __APPLE__ ) || defined( __FreeBSD__)
-
-      struct timeval tp;
-      gettimeofday(&tp, (struct timezone *)0);
-      return tp.tv_sec * 1000 + tp.tv_usec / 1000;
-   #elif defined( _XBOX )
-      return GetTickCount();
-   #else
-      return timeGetTime();
-   #endif
-}
-
-void   tc_sleep(NxU32 ms)
-{
-   #if defined(__linux__) || defined( __APPLE__ ) ||  defined( __FreeBSD__)
-      usleep(ms * 1000);
-   #else
-      Sleep(ms);
-   #endif
-}
-
-void tc_spinloop()
-{
-
-   #if defined( _XBOX )
-      // Pause would do nothing on the Xbox. Threads are not scheduled.
-   #elif defined( _WIN64 )
-      YieldProcessor( );
-   #elif defined( __APPLE__ )
-      pthread_yield_np();
-   #elif defined(__linux__)  || defined(__FreeBSD__)
-      #if defined(_POSIX_PRIORITY_SCHEDULING)
-         sched_yield();
-      #else
-         asm("pause");
-      #endif
-   #elif
-      __asm { pause };
-   #endif
-}
-
-void tc_interlockedExchange(void *dest, const int64_t exchange)
-{
-   #if defined( __linux__ ) || defined( __APPLE__ ) ||  defined( __FreeBSD__)
-
-	  // not working
-	  assert(false);
-	  //__sync_lock_test_and_set((int64_t*)dest, exchange);
-#elif defined( _XBOX ) || defined( _WIN64 )
-   InterlockedExchange((volatile LONG *)dest, exchange);
-   #else
-      __asm
-      {
-         mov      ebx, dword ptr [exchange]
-         mov      ecx, dword ptr [exchange + 4]
-         mov      edi, dest
-         mov      eax, dword ptr [edi]
-         mov      edx, dword ptr [edi + 4]
-         jmp      start
-      retry:
-         pause
-      start:
-         lock cmpxchg8b [edi]
-         jnz      retry
-      };
-   #endif
-}
-
-NxI32 tc_interlockedCompareExchange(void *dest, NxI32 exchange, NxI32 compare)
-{
-   #if defined( __linux__ ) || defined( __APPLE__ ) ||  defined( __FreeBSD__)
-
-	  // not working
-	  assert(false);
-	  return 0;
-	  //return __sync_val_compare_and_swap((uintptr_t*)dest, exchange, compare);
-	  //return __sync_bool_compare_and_swap((uintptr_t*)dest, exchange, compare);
-   #elif defined( _XBOX ) || defined( _WIN64 )
-     return InterlockedCompareExchange((volatile LONG *)dest, exchange, compare);
-   #else
-      char _ret;
-      //
-      __asm
-      {
-         mov      edx, [dest]
-         mov      eax, [compare]
-         mov      ecx, [exchange]
-
-         lock cmpxchg [edx], ecx
-
-         setz    al
-         mov     byte ptr [_ret], al
-      }
-      //
-      return _ret;
-   #endif
-}
-
-NxI32 tc_interlockedCompareExchange(void *dest, const NxI32 exchange1, const NxI32 exchange2, const NxI32 compare1, const NxI32 compare2)
-{
-   #if defined( __linux__ ) || defined( __APPLE__ ) || defined( __FreeBSD__)
-	  // not working
-      assert(false);
-	  return 0;
-	  //uint64_t exchange = ((uint64_t)exchange1 << 32) | (uint64_t)exchange2;
-	  //uint64_t compare = ((uint64_t)compare1 << 32) | (uint64_t)compare2;
-	  //return __sync_bool_compare_and_swap((int64_t*)dest, exchange, compare);
-   #elif defined( _XBOX ) || defined( _WIN64 )
-     assert(false);
-     return 0;
-   #else
-      char _ret;
-      //
-      __asm
-      {
-         mov     ebx, [exchange1]
-         mov     ecx, [exchange2]
-         mov     edi, [dest]
-         mov     eax, [compare1]
-         mov     edx, [compare2]
-         lock cmpxchg8b [edi]
-         setz    al
-         mov     byte ptr [_ret], al
-      }
-      //
-      return _ret;
-   #endif
-}
-
-class MyThreadMutex : public ThreadMutex
-{
-public:
-  MyThreadMutex(void)
-  {
-    #if defined(WIN32) || defined(_XBOX)
-  	InitializeCriticalSection(&m_Mutex);
-    #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-  	pthread_mutexattr_t mutexAttr;  // Mutex Attribute
-  	VERIFY( pthread_mutexattr_init(&mutexAttr) == 0 );
-  	VERIFY( pthread_mutexattr_settype(&mutexAttr, PTHREAD_MUTEX_RECURSIVE_NP) == 0 );
-  	VERIFY( pthread_mutex_init(&m_Mutex, &mutexAttr) == 0 );
-  	VERIFY( pthread_mutexattr_destroy(&mutexAttr) == 0 );
-    #endif
-  }
-
-  ~MyThreadMutex(void)
-  {
-    #if defined(WIN32) || defined(_XBOX)
-  	DeleteCriticalSection(&m_Mutex);
-    #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-  	VERIFY( pthread_mutex_destroy(&m_Mutex) == 0 );
-    #endif
-  }
-
-  void lock(void)
-  {
-    #if defined(WIN32) || defined(_XBOX)
-  	EnterCriticalSection(&m_Mutex);
-    #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-  	VERIFY( pthread_mutex_lock(&m_Mutex) == 0 );
-    #endif
-  }
-
-  bool tryLock(void)
-  {
-    #if defined(WIN32) || defined(_XBOX)
-  	bool bRet = false;
-  	//assert(("TryEnterCriticalSection seems to not work on XP???", 0));
-  	bRet = TryEnterCriticalSection(&m_Mutex) ? true : false;
-  	return bRet;
-    #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-  	NxI32 result = pthread_mutex_trylock(&m_Mutex);
-  	return (result == 0);
-    #endif
-  }
-
-  void unlock(void)
-  {
-    #if defined(WIN32) || defined(_XBOX)
-  	LeaveCriticalSection(&m_Mutex);
-    #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-  	VERIFY( pthread_mutex_unlock(&m_Mutex) == 0 );
-    #endif
-  }
-
-private:
-  #if defined(WIN32) || defined(_XBOX)
-	CRITICAL_SECTION m_Mutex;
-	#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-	pthread_mutex_t  m_Mutex;
-	#endif
-};
-
-ThreadMutex * tc_createThreadMutex(void)
-{
-  MyThreadMutex *m = new MyThreadMutex;
-  return static_cast< ThreadMutex *>(m);
-}
-
-void          tc_releaseThreadMutex(ThreadMutex *tm)
-{
-  MyThreadMutex *m = static_cast< MyThreadMutex *>(tm);
-  delete m;
-}
-
-#if defined(WIN32) || defined(_XBOX)
-static unsigned long __stdcall _ThreadWorkerFunc(LPVOID arg);
-#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-static void* _ThreadWorkerFunc(void* arg);
-#endif
-
-class MyThread : public Thread
-{
-public:
-  MyThread(ThreadInterface *iface)
-  {
-    mInterface = iface;
-	#if defined(WIN32) || defined(_XBOX)
-   	  mThread     = CreateThread(0, 0, _ThreadWorkerFunc, this, 0, 0);
-    #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-	  VERIFY( pthread_create(&mThread, NULL, _ThreadWorkerFunc, this) == 0 );
-	#endif
-  }
-
-  ~MyThread(void)
-  {
-	#if defined(WIN32) || defined(_XBOX)
-      if ( mThread )
-      {
-        CloseHandle(mThread);
-        mThread = 0;
-      }
-	#endif
-  }
-
-  void onJobExecute(void)
-  {
-    mInterface->threadMain();
-  }
-
-private:
-  ThreadInterface *mInterface;
-  #if defined(WIN32) || defined(_XBOX)
-    HANDLE           mThread;
-  #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-    pthread_t mThread;
-  #endif
-};
-
-
-Thread      * tc_createThread(ThreadInterface *tinterface)
-{
-  MyThread *m = new MyThread(tinterface);
-  return static_cast< Thread *>(m);
-}
-
-void          tc_releaseThread(Thread *t)
-{
-  MyThread *m = static_cast<MyThread *>(t);
-  delete m;
-}
-
-#if defined(WIN32) || defined(_XBOX)
-static unsigned long __stdcall _ThreadWorkerFunc(LPVOID arg)
-#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-static void* _ThreadWorkerFunc(void* arg)
-#endif
-{
-  MyThread *worker = (MyThread *) arg;
-	worker->onJobExecute();
-  return 0;
-}
-
-
-class MyThreadEvent : public ThreadEvent
-{
-public:
-  MyThreadEvent(void)
-  {
-	#if defined(WIN32) || defined(_XBOX)
-      mEvent = ::CreateEventA(NULL,TRUE,TRUE,"ThreadEvent");
-	#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-	  pthread_mutexattr_t mutexAttr;  // Mutex Attribute
-	  VERIFY( pthread_mutexattr_init(&mutexAttr) == 0 );
-	  VERIFY( pthread_mutexattr_settype(&mutexAttr, PTHREAD_MUTEX_RECURSIVE_NP) == 0 );
-	  VERIFY( pthread_mutex_init(&mEventMutex, &mutexAttr) == 0 );
-	  VERIFY( pthread_mutexattr_destroy(&mutexAttr) == 0 );
-	  VERIFY( pthread_cond_init(&mEvent, NULL) == 0 );
-	#endif
-  }
-
-  ~MyThreadEvent(void)
-  {
-	#if defined(WIN32) || defined(_XBOX)
-    if ( mEvent )
-    {
-      ::CloseHandle(mEvent);
-    }
-	#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-	  VERIFY( pthread_cond_destroy(&mEvent) == 0 );
-	  VERIFY( pthread_mutex_destroy(&mEventMutex) == 0 );
-	#endif
-  }
-
-  virtual void setEvent(void)  // signal the event
-  {
-	#if defined(WIN32) || defined(_XBOX)
-    if ( mEvent )
-    {
-      ::SetEvent(mEvent);
-    }
-	#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-	  VERIFY( pthread_mutex_lock(&mEventMutex) == 0 );
-	  VERIFY( pthread_cond_signal(&mEvent) == 0 );
-	  VERIFY( pthread_mutex_unlock(&mEventMutex) == 0 );
-	#endif
-  }
-
-  void resetEvent(void)
-  {
-	#if defined(WIN32) || defined(_XBOX)
-    if ( mEvent )
-    {
-      ::ResetEvent(mEvent);
-    }
-	#endif
-  }
-
-  virtual void waitForSingleObject(NxU32 ms)
-  {
-	#if defined(WIN32) || defined(_XBOX)
-    if ( mEvent )
-    {
-      ::WaitForSingleObject(mEvent,ms);
-    }
-	#elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-      VERIFY( pthread_mutex_lock(&mEventMutex) == 0 );
-	  if (ms == 0xffffffff)
-	  {
-		  VERIFY( pthread_cond_wait(&mEvent, &mEventMutex) == 0 );
-	  }
-	  else
-	  {
-	     struct timespec ts;
-        #ifdef __APPLE__
-        struct timeval tp;
-        gettimeofday(&tp, (struct timezone *)0);
-        ts.tv_nsec = tp.tv_usec * 1000;
-        ts.tv_sec = tp.tv_sec;
-        #else
-	     clock_gettime(CLOCK_REALTIME, &ts);
-        #endif
-	     ts.tv_nsec += ms * 1000000;
-	     ts.tv_sec += ts.tv_nsec / 1000000000;
-	     ts.tv_nsec %= 1000000000;
-		  NxI32 result = pthread_cond_timedwait(&mEvent, &mEventMutex, &ts);
-		  assert(result == 0 || result == ETIMEDOUT);
-	  }
-	  VERIFY( pthread_mutex_unlock(&mEventMutex) == 0 );
-	#endif
-  }
-
-private:
-  #if defined(WIN32) || defined(_XBOX)
-    HANDLE mEvent;
-  #elif defined(__APPLE__) || defined(__linux__) ||  defined( __FreeBSD__)
-
-    pthread_mutex_t mEventMutex;
-    pthread_cond_t mEvent;
-  #endif
-};
-
-ThreadEvent * tc_createThreadEvent(void)
-{
-  MyThreadEvent *m = new MyThreadEvent;
-  return static_cast<ThreadEvent *>(m);
-}
-
-void  tc_releaseThreadEvent(ThreadEvent *t)
-{
-  MyThreadEvent *m = static_cast< MyThreadEvent *>(t);
-  delete m;
-}
-
-}; // end of namespace

+ 0 - 119
Engine/lib/convexDecomp/NvThreadConfig.h

@@ -1,119 +0,0 @@
-#ifndef NV_THREAD_CONFIG_H
-
-#define NV_THREAD_CONFIG_H
-
-#include "NvUserMemAlloc.h"
-
-/*
-
-NvThreadConfig.h : A simple wrapper class to define threading and mutex locks.
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#ifdef _MSC_VER
-typedef __int64 int64_t;
-#else
-#include <stdint.h>
-#endif
-
-namespace CONVEX_DECOMPOSITION
-{
-
-NxU32 tc_timeGetTime(void);
-void     tc_sleep(NxU32 ms);
-
-void     tc_spinloop();
-void     tc_interlockedExchange(void *dest, const int64_t exchange);
-NxI32      tc_interlockedCompareExchange(void *dest, NxI32 exchange, NxI32 compare);
-NxI32      tc_interlockedCompareExchange(void *dest, const NxI32 exchange1, const NxI32 exchange2, const NxI32 compare1, const NxI32 compare2);
-
-class ThreadMutex
-{
-public:
-  virtual void lock(void) = 0;
-  virtual void unlock(void) = 0;
-  virtual bool tryLock(void) = 0;
-};
-
-
-ThreadMutex * tc_createThreadMutex(void);
-void          tc_releaseThreadMutex(ThreadMutex *tm);
-
-class ThreadInterface
-{
-public:
-  virtual void threadMain(void) = 0;
-};
-
-class Thread
-{
-public:
-};
-
-Thread      * tc_createThread(ThreadInterface *tinterface);
-void          tc_releaseThread(Thread *t);
-
-class ThreadEvent
-{
-public:
-  virtual void setEvent(void) = 0; // signal the event
-  virtual void resetEvent(void) = 0;
-  virtual void waitForSingleObject(NxU32 ms) = 0;
-};
-
-ThreadEvent * tc_createThreadEvent(void);
-void          tc_releaseThreadEvent(ThreadEvent *t);
-
-}; // end of namespace
-
-
-#endif

+ 0 - 81
Engine/lib/convexDecomp/NvUserMemAlloc.h

@@ -1,81 +0,0 @@
-#ifndef NV_USER_MEMALLOC_H
-
-#define NV_USER_MEMALLOC_H
-
-#include "NvSimpleTypes.h"
-
-/*
-
-NvUserMemAlloc.h : Modify these macros to change the default memory allocation behavior of the convex decomposition code.
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#ifndef MEMALLOC_NEW
-#define MEMALLOC_NEW(x) new x
-#define MEMALLOC_MALLOC(x) ::malloc(x)
-#define MEMALLOC_FREE(x) ::free(x)
-#define MEMALLOC_REALLOC(x,y) ::realloc(x,y)
-#endif
-
-namespace CONVEX_DECOMPOSITION
-{
-
-class Memalloc
-{
-public:
-};
-
-}; // end of namespace
-
-
-
-#endif

+ 0 - 38
Engine/lib/convexDecomp/readme.txt

@@ -1,38 +0,0 @@
-The ConvexDecomposition library was written by John W. Ratcliff mailto:[email protected]
-
-What is Convex Decomposition?
-
-Convex Decomposition is when you take an arbitrarily complex triangle mesh and sub-divide it into
-a collection of discrete compound pieces (each represented as a convex hull) to approximate
-the original shape of the objet.
-
-This is required since few physics engines can treat aribtrary triangle mesh objects as dynamic
-objects.  Even those engines which can handle this use case incurr a huge performance and memory
-penalty to do so.
-
-By breaking a complex triangle mesh up into a discrete number of convex components you can greatly
-improve performance for dynamic simulations.
-
---------------------------------------------------------------------------------
-
-This code is released under the MIT license.
-
-The code is functional but could use the following improvements:
-
-(1) The convex hull generator, originally written by Stan Melax, could use some major code cleanup.
-
-(2) The code to remove T-junctions appears to have a bug in it.  This code was working fine before,
-	but I haven't had time to debug why it stopped working.
-
-(3) Island generation once the mesh has been split is currently disabled due to the fact that the
-	Remove Tjunctions functionality has a bug in it.
-
-(4) The code to perform a raycast against a triangle mesh does not currently use any acceleration
-	data structures.
-
-(5) When a split is performed, the surface that got split is not 'capped'.  This causes a problem
-	if you use a high recursion depth on your convex decomposition.  It will cause the object to
-	be modelled as if it had a hollow interior.  A lot of work was done to solve this problem, but
-	it hasn't been integrated into this code drop yet.
-
-

+ 0 - 852
Engine/lib/convexDecomp/wavefront.cpp

@@ -1,852 +0,0 @@
-/*
-
-wavefront.cpp : A very small code snippet to read a Wavefront OBJ file into memory.
-
-*/
-
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-#ifndef __PPCGEKKO__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <ctype.h>
-
-#include "wavefront.h"
-
-#include <vector>
-
-typedef std::vector< NxI32 > IntVector;
-typedef std::vector< NxF32 > FloatVector;
-
-#pragma warning(disable:4996)
-
-namespace WAVEFRONT
-{
-
-
-/*******************************************************************/
-/******************** InParser.h  ********************************/
-/*******************************************************************/
-class InPlaceParserInterface
-{
-public:
-	virtual NxI32 ParseLine(NxI32 lineno,NxI32 argc,const char **argv) =0;  // return TRUE to continue parsing, return FALSE to abort parsing process
-};
-
-enum SeparatorType
-{
-	ST_DATA,        // is data
-	ST_HARD,        // is a hard separator
-	ST_SOFT,        // is a soft separator
-	ST_EOS          // is a comment symbol, and everything past this character should be ignored
-};
-
-class InPlaceParser
-{
-public:
-	InPlaceParser(void)
-	{
-		Init();
-	}
-
-	InPlaceParser(char *data,NxI32 len)
-	{
-		Init();
-		SetSourceData(data,len);
-	}
-
-	InPlaceParser(const char *fname)
-	{
-		Init();
-		SetFile(fname);
-	}
-
-	~InPlaceParser(void);
-
-	void Init(void)
-	{
-		mQuoteChar = 34;
-		mData = 0;
-		mLen  = 0;
-		mMyAlloc = false;
-		for (NxI32 i=0; i<256; i++)
-		{
-			mHard[i] = ST_DATA;
-			mHardString[i*2] = i;
-			mHardString[i*2+1] = 0;
-		}
-		mHard[0]  = ST_EOS;
-		mHard[32] = ST_SOFT;
-		mHard[9]  = ST_SOFT;
-		mHard[13] = ST_SOFT;
-		mHard[10] = ST_SOFT;
-	}
-
-	void SetFile(const char *fname); // use this file as source data to parse.
-
-	void SetSourceData(char *data,NxI32 len)
-	{
-		mData = data;
-		mLen  = len;
-		mMyAlloc = false;
-	};
-
-	NxI32  Parse(InPlaceParserInterface *callback); // returns true if entire file was parsed, false if it aborted for some reason
-
-	NxI32 ProcessLine(NxI32 lineno,char *line,InPlaceParserInterface *callback);
-
-	const char ** GetArglist(char *source,NxI32 &count); // convert source string into an arg list, this is a destructive parse.
-
-	void SetHardSeparator(char c) // add a hard separator
-	{
-		mHard[c] = ST_HARD;
-	}
-
-	void SetHard(char c) // add a hard separator
-	{
-		mHard[c] = ST_HARD;
-	}
-
-
-	void SetCommentSymbol(char c) // comment character, treated as 'end of string'
-	{
-		mHard[c] = ST_EOS;
-	}
-
-	void ClearHardSeparator(char c)
-	{
-		mHard[c] = ST_DATA;
-	}
-
-
-	void DefaultSymbols(void); // set up default symbols for hard seperator and comment symbol of the '#' character.
-
-	bool EOS(char c)
-	{
-		if ( mHard[c] == ST_EOS )
-		{
-			return true;
-		}
-		return false;
-	}
-
-	void SetQuoteChar(char c)
-	{
-		mQuoteChar = c;
-	}
-
-private:
-
-
-	inline char * AddHard(NxI32 &argc,const char **argv,char *foo);
-	inline bool   IsHard(char c);
-	inline char * SkipSpaces(char *foo);
-	inline bool   IsWhiteSpace(char c);
-	inline bool   IsNonSeparator(char c); // non seperator,neither hard nor soft
-
-	bool   mMyAlloc; // whether or not *I* allocated the buffer and am responsible for deleting it.
-	char  *mData;  // ascii data to parse.
-	NxI32    mLen;   // length of data
-	SeparatorType  mHard[256];
-	char   mHardString[256*2];
-	char           mQuoteChar;
-};
-
-/*******************************************************************/
-/******************** InParser.cpp  ********************************/
-/*******************************************************************/
-void InPlaceParser::SetFile(const char *fname)
-{
-	if ( mMyAlloc )
-	{
-		free(mData);
-	}
-	mData = 0;
-	mLen  = 0;
-	mMyAlloc = false;
-
-	FILE *fph = fopen(fname,"rb");
-	if ( fph )
-	{
-		fseek(fph,0L,SEEK_END);
-		mLen = ftell(fph);
-		fseek(fph,0L,SEEK_SET);
-		if ( mLen )
-		{
-			mData = (char *) malloc(sizeof(char)*(mLen+1));
-			size_t ok = fread(mData, mLen, 1, fph);
-			if ( !ok )
-			{
-				free(mData);
-				mData = 0;
-			}
-			else
-			{
-				mData[mLen] = 0; // zero byte terminate end of file marker.
-				mMyAlloc = true;
-			}
-		}
-		fclose(fph);
-	}
-
-}
-
-InPlaceParser::~InPlaceParser(void)
-{
-	if ( mMyAlloc )
-	{
-		free(mData);
-	}
-}
-
-#define MAXARGS 512
-
-bool InPlaceParser::IsHard(char c)
-{
-	return mHard[c] == ST_HARD;
-}
-
-char * InPlaceParser::AddHard(NxI32 &argc,const char **argv,char *foo)
-{
-	while ( IsHard(*foo) )
-	{
-		const char *hard = &mHardString[*foo*2];
-		if ( argc < MAXARGS )
-		{
-			argv[argc++] = hard;
-		}
-		foo++;
-	}
-	return foo;
-}
-
-bool   InPlaceParser::IsWhiteSpace(char c)
-{
-	return mHard[c] == ST_SOFT;
-}
-
-char * InPlaceParser::SkipSpaces(char *foo)
-{
-	while ( !EOS(*foo) && IsWhiteSpace(*foo) ) foo++;
-	return foo;
-}
-
-bool InPlaceParser::IsNonSeparator(char c)
-{
-	if ( !IsHard(c) && !IsWhiteSpace(c) && c != 0 ) return true;
-	return false;
-}
-
-
-NxI32 InPlaceParser::ProcessLine(NxI32 lineno,char *line,InPlaceParserInterface *callback)
-{
-	NxI32 ret = 0;
-
-	const char *argv[MAXARGS];
-	NxI32 argc = 0;
-
-	char *foo = line;
-
-	while ( !EOS(*foo) && argc < MAXARGS )
-	{
-
-		foo = SkipSpaces(foo); // skip any leading spaces
-
-		if ( EOS(*foo) ) break;
-
-		if ( *foo == mQuoteChar ) // if it is an open quote
-		{
-			foo++;
-			if ( argc < MAXARGS )
-			{
-				argv[argc++] = foo;
-			}
-			while ( !EOS(*foo) && *foo != mQuoteChar ) foo++;
-			if ( !EOS(*foo) )
-			{
-				*foo = 0; // replace close quote with zero byte EOS
-				foo++;
-			}
-		}
-		else
-		{
-
-			foo = AddHard(argc,argv,foo); // add any hard separators, skip any spaces
-
-			if ( IsNonSeparator(*foo) )  // add non-hard argument.
-			{
-				bool quote  = false;
-				if ( *foo == mQuoteChar )
-				{
-					foo++;
-					quote = true;
-				}
-
-				if ( argc < MAXARGS )
-				{
-					argv[argc++] = foo;
-				}
-
-				if ( quote )
-				{
-					while (*foo && *foo != mQuoteChar ) foo++;
-					if ( *foo ) *foo = 32;
-				}
-
-				// continue..until we hit an eos ..
-				while ( !EOS(*foo) ) // until we hit EOS
-				{
-					if ( IsWhiteSpace(*foo) ) // if we hit a space, stomp a zero byte, and exit
-					{
-						*foo = 0;
-						foo++;
-						break;
-					}
-					else if ( IsHard(*foo) ) // if we hit a hard separator, stomp a zero byte and store the hard separator argument
-					{
-						const char *hard = &mHardString[*foo*2];
-						*foo = 0;
-						if ( argc < MAXARGS )
-						{
-							argv[argc++] = hard;
-						}
-						foo++;
-						break;
-					}
-					foo++;
-				} // end of while loop...
-			}
-		}
-	}
-
-	if ( argc )
-	{
-		ret = callback->ParseLine(lineno, argc, argv );
-	}
-
-	return ret;
-}
-
-NxI32  InPlaceParser::Parse(InPlaceParserInterface *callback) // returns true if entire file was parsed, false if it aborted for some reason
-{
-	assert( callback );
-	if ( !mData ) return 0;
-
-	NxI32 ret = 0;
-
-	NxI32 lineno = 0;
-
-	char *foo   = mData;
-	char *begin = foo;
-
-
-	while ( *foo )
-	{
-		if ( *foo == 10 || *foo == 13 )
-		{
-			lineno++;
-			*foo = 0;
-
-			if ( *begin ) // if there is any data to parse at all...
-			{
-				NxI32 v = ProcessLine(lineno,begin,callback);
-				if ( v ) ret = v;
-			}
-
-			foo++;
-			if ( *foo == 10 ) foo++; // skip line feed, if it is in the carraige-return line-feed format...
-			begin = foo;
-		}
-		else
-		{
-			foo++;
-		}
-	}
-
-	lineno++; // lasst line.
-
-	NxI32 v = ProcessLine(lineno,begin,callback);
-	if ( v ) ret = v;
-	return ret;
-}
-
-
-void InPlaceParser::DefaultSymbols(void)
-{
-	SetHardSeparator(',');
-	SetHardSeparator('(');
-	SetHardSeparator(')');
-	SetHardSeparator('=');
-	SetHardSeparator('[');
-	SetHardSeparator(']');
-	SetHardSeparator('{');
-	SetHardSeparator('}');
-	SetCommentSymbol('#');
-}
-
-
-const char ** InPlaceParser::GetArglist(char *line,NxI32 &count) // convert source string into an arg list, this is a destructive parse.
-{
-	const char **ret = 0;
-
-	static const char *argv[MAXARGS];
-	NxI32 argc = 0;
-
-	char *foo = line;
-
-	while ( !EOS(*foo) && argc < MAXARGS )
-	{
-
-		foo = SkipSpaces(foo); // skip any leading spaces
-
-		if ( EOS(*foo) ) break;
-
-		if ( *foo == mQuoteChar ) // if it is an open quote
-		{
-			foo++;
-			if ( argc < MAXARGS )
-			{
-				argv[argc++] = foo;
-			}
-			while ( !EOS(*foo) && *foo != mQuoteChar ) foo++;
-			if ( !EOS(*foo) )
-			{
-				*foo = 0; // replace close quote with zero byte EOS
-				foo++;
-			}
-		}
-		else
-		{
-
-			foo = AddHard(argc,argv,foo); // add any hard separators, skip any spaces
-
-			if ( IsNonSeparator(*foo) )  // add non-hard argument.
-			{
-				bool quote  = false;
-				if ( *foo == mQuoteChar )
-				{
-					foo++;
-					quote = true;
-				}
-
-				if ( argc < MAXARGS )
-				{
-					argv[argc++] = foo;
-				}
-
-				if ( quote )
-				{
-					while (*foo && *foo != mQuoteChar ) foo++;
-					if ( *foo ) *foo = 32;
-				}
-
-				// continue..until we hit an eos ..
-				while ( !EOS(*foo) ) // until we hit EOS
-				{
-					if ( IsWhiteSpace(*foo) ) // if we hit a space, stomp a zero byte, and exit
-					{
-						*foo = 0;
-						foo++;
-						break;
-					}
-					else if ( IsHard(*foo) ) // if we hit a hard separator, stomp a zero byte and store the hard separator argument
-					{
-						const char *hard = &mHardString[*foo*2];
-						*foo = 0;
-						if ( argc < MAXARGS )
-						{
-							argv[argc++] = hard;
-						}
-						foo++;
-						break;
-					}
-					foo++;
-				} // end of while loop...
-			}
-		}
-	}
-
-	count = argc;
-	if ( argc )
-	{
-		ret = argv;
-	}
-
-	return ret;
-}
-
-/*******************************************************************/
-/******************** Geometry.h  ********************************/
-/*******************************************************************/
-
-class GeometryVertex
-{
-public:
-	NxF32        mPos[3];
-	NxF32        mNormal[3];
-	NxF32        mTexel[2];
-};
-
-
-class GeometryInterface
-{
-public:
-
-	virtual void NodeTriangle(const GeometryVertex *v1,const GeometryVertex *v2,const GeometryVertex *v3, bool textured)
-	{
-	}
-
-};
-
-
-/*******************************************************************/
-/******************** Obj.h  ********************************/
-/*******************************************************************/
-
-
-class OBJ : public InPlaceParserInterface
-{
-public:
-  NxI32 LoadMesh(const char *fname,GeometryInterface *callback, bool textured);
-  NxI32 ParseLine(NxI32 lineno,NxI32 argc,const char **argv);  // return TRUE to continue parsing, return FALSE to abort parsing process
-private:
-
-  void GetVertex(GeometryVertex &v,const char *face) const;
-
-  FloatVector     mVerts;
-  FloatVector     mTexels;
-  FloatVector     mNormals;
-
-  bool            mTextured;
-
-  GeometryInterface *mCallback;
-};
-
-
-/*******************************************************************/
-/******************** Obj.cpp  ********************************/
-/*******************************************************************/
-
-NxI32 OBJ::LoadMesh(const char *fname,GeometryInterface *iface, bool textured)
-{
-  mTextured = textured;
-  NxI32 ret = 0;
-
-  mVerts.clear();
-  mTexels.clear();
-  mNormals.clear();
-
-  mCallback = iface;
-
-  InPlaceParser ipp(fname);
-
-  ipp.Parse(this);
-
-return ret;
-}
-
-static const char * GetArg(const char **argv,NxI32 i,NxI32 argc)
-{
-  const char * ret = 0;
-  if ( i < argc ) ret = argv[i];
-  return ret;
-}
-
-void OBJ::GetVertex(GeometryVertex &v,const char *face) const
-{
-  v.mPos[0] = 0;
-  v.mPos[1] = 0;
-  v.mPos[2] = 0;
-
-  v.mTexel[0] = 0;
-  v.mTexel[1] = 0;
-
-  v.mNormal[0] = 0;
-  v.mNormal[1] = 1;
-  v.mNormal[2] = 0;
-
-  NxI32 index = atoi( face )-1;
-
-  const char *texel = strstr(face,"/");
-
-  if ( texel )
-  {
-    NxI32 tindex = atoi( texel+1) - 1;
-
-    if ( tindex >=0 && tindex < (NxI32)(mTexels.size()/2) )
-    {
-    	const NxF32 *t = &mTexels[tindex*2];
-
-      v.mTexel[0] = t[0];
-      v.mTexel[1] = t[1];
-
-    }
-
-    const char *normal = strstr(texel+1,"/");
-    if ( normal )
-    {
-      NxI32 nindex = atoi( normal+1 ) - 1;
-
-      if (nindex >= 0 && nindex < (NxI32)(mNormals.size()/3) )
-      {
-      	const NxF32 *n = &mNormals[nindex*3];
-
-        v.mNormal[0] = n[0];
-        v.mNormal[1] = n[1];
-        v.mNormal[2] = n[2];
-      }
-    }
-  }
-
-  if ( index >= 0 && index < (NxI32)(mVerts.size()/3) )
-  {
-
-    const NxF32 *p = &mVerts[index*3];
-
-    v.mPos[0] = p[0];
-    v.mPos[1] = p[1];
-    v.mPos[2] = p[2];
-  }
-
-}
-
-NxI32 OBJ::ParseLine(NxI32 lineno,NxI32 argc,const char **argv)  // return TRUE to continue parsing, return FALSE to abort parsing process
-{
-  NxI32 ret = 0;
-
-  if ( argc >= 1 )
-  {
-    const char *foo = argv[0];
-    if ( *foo != '#' )
-    {
-      if ( _stricmp(argv[0],"v") == 0 && argc == 4 )
-      {
-        NxF32 vx = (NxF32) atof( argv[1] );
-        NxF32 vy = (NxF32) atof( argv[2] );
-        NxF32 vz = (NxF32) atof( argv[3] );
-        mVerts.push_back(vx);
-        mVerts.push_back(vy);
-        mVerts.push_back(vz);
-      }
-      else if ( _stricmp(argv[0],"vt") == 0 && (argc == 3 || argc == 4))
-      {
-        // ignore 4rd component if present
-        NxF32 tx = (NxF32) atof( argv[1] );
-        NxF32 ty = (NxF32) atof( argv[2] );
-        mTexels.push_back(tx);
-        mTexels.push_back(ty);
-      }
-      else if ( _stricmp(argv[0],"vn") == 0 && argc == 4 )
-      {
-        NxF32 normalx = (NxF32) atof(argv[1]);
-        NxF32 normaly = (NxF32) atof(argv[2]);
-        NxF32 normalz = (NxF32) atof(argv[3]);
-        mNormals.push_back(normalx);
-        mNormals.push_back(normaly);
-        mNormals.push_back(normalz);
-      }
-      else if ( _stricmp(argv[0],"f") == 0 && argc >= 4 )
-      {
-        GeometryVertex v[32];
-
-        NxI32 vcount = argc-1;
-
-        for (NxI32 i=1; i<argc; i++)
-        {
-          GetVertex(v[i-1],argv[i] );
-        }
-
-		mCallback->NodeTriangle(&v[0],&v[1],&v[2], mTextured);
-
-        if ( vcount >=3 ) // do the fan
-        {
-          for (NxI32 i=2; i<(vcount-1); i++)
-          {
-            mCallback->NodeTriangle(&v[0],&v[i],&v[i+1], mTextured);
-          }
-        }
-
-      }
-    }
-  }
-
-  return ret;
-}
-
-
-
-
-class BuildMesh : public GeometryInterface
-{
-public:
-
-	NxI32 GetIndex(const NxF32 *p, const NxF32 *texCoord)
-	{
-
-		NxI32 vcount = (NxI32)mVertices.size()/3;
-
-		if(vcount>0)
-		{
-			//New MS STL library checks indices in debug build, so zero causes an assert if it is empty.
-			const NxF32 *v = &mVertices[0];
-			const NxF32 *t = texCoord != NULL ? &mTexCoords[0] : NULL;
-
-			for (NxI32 i=0; i<vcount; i++)
-			{
-				if ( v[0] == p[0] && v[1] == p[1] && v[2] == p[2] )
-				{
-					if (texCoord == NULL || (t[0] == texCoord[0] && t[1] == texCoord[1]))
-					{
-						return i;
-					}
-				}
-				v+=3;
-				if (t != NULL)
-					t += 2;
-			}
-		}
-
-		mVertices.push_back( p[0] );
-		mVertices.push_back( p[1] );
-		mVertices.push_back( p[2] );
-
-		if (texCoord != NULL)
-		{
-			mTexCoords.push_back( texCoord[0] );
-			mTexCoords.push_back( texCoord[1] );
-		}
-
-		return vcount;
-	}
-
-	virtual void NodeTriangle(const GeometryVertex *v1,const GeometryVertex *v2,const GeometryVertex *v3, bool textured)
-	{
-		mIndices.push_back( GetIndex(v1->mPos, textured ? v1->mTexel : NULL) );
-		mIndices.push_back( GetIndex(v2->mPos, textured ? v2->mTexel : NULL) );
-		mIndices.push_back( GetIndex(v3->mPos, textured ? v3->mTexel : NULL) );
-	}
-
-  const FloatVector& GetVertices(void) const { return mVertices; };
-  const FloatVector& GetTexCoords(void) const { return mTexCoords; };
-  const IntVector& GetIndices(void) const { return mIndices; };
-
-private:
-  FloatVector     mVertices;
-  FloatVector     mTexCoords;
-  IntVector       mIndices;
-};
-
-};
-
-using namespace WAVEFRONT;
-
-WavefrontObj::WavefrontObj(void)
-{
-	mVertexCount = 0;
-	mTriCount    = 0;
-	mIndices     = 0;
-	mVertices    = NULL;
-	mTexCoords   = NULL;
-}
-
-WavefrontObj::~WavefrontObj(void)
-{
-	delete mIndices;
-	delete mVertices;
-}
-
-NxU32 WavefrontObj::loadObj(const char *fname, bool textured) // load a wavefront obj returns number of triangles that were loaded.  Data is persists until the class is destructed.
-{
-
-	NxU32 ret = 0;
-
-	delete mVertices;
-	mVertices = 0;
-	delete mIndices;
-	mIndices = 0;
-	mVertexCount = 0;
-	mTriCount = 0;
-
-
-  BuildMesh bm;
-
-  OBJ obj;
-
-  obj.LoadMesh(fname,&bm, textured);
-
-
-	const FloatVector &vlist = bm.GetVertices();
-	const IntVector &indices = bm.GetIndices();
-	if ( vlist.size() )
-	{
-		mVertexCount = (NxI32)vlist.size()/3;
-		mVertices = new NxF32[mVertexCount*3];
-		memcpy( mVertices, &vlist[0], sizeof(NxF32)*mVertexCount*3 );
-
-		if (textured)
-		{
-			mTexCoords = new NxF32[mVertexCount * 2];
-			const FloatVector& tList = bm.GetTexCoords();
-			memcpy( mTexCoords, &tList[0], sizeof(NxF32) * mVertexCount * 2);
-		}
-
-		mTriCount = (NxI32)indices.size()/3;
-		mIndices = new NxU32[mTriCount*3*sizeof(NxU32)];
-		memcpy(mIndices, &indices[0], sizeof(NxU32)*mTriCount*3);
-		ret = mTriCount;
-	}
-
-
-	return ret;
-}
-
-#endif

+ 0 - 77
Engine/lib/convexDecomp/wavefront.h

@@ -1,77 +0,0 @@
-#ifndef WAVEFRONT_OBJ_H
-
-#define WAVEFRONT_OBJ_H
-
-/*
-
-wavefront.h : A very small code snippet to read a Wavefront OBJ file into memory.
-
-*/
-
-/*!
-**
-** Copyright (c) 2009 by John W. Ratcliff mailto:[email protected]
-**
-** Portions of this source has been released with the PhysXViewer application, as well as
-** Rocket, CreateDynamics, ODF, and as a number of sample code snippets.
-**
-** If you find this code useful or you are feeling particularily generous I would
-** ask that you please go to http://www.amillionpixels.us and make a donation
-** to Troy DeMolay.
-**
-** DeMolay is a youth group for young men between the ages of 12 and 21.
-** It teaches strong moral principles, as well as leadership skills and
-** public speaking.  The donations page uses the 'pay for pixels' paradigm
-** where, in this case, a pixel is only a single penny.  Donations can be
-** made for as small as $4 or as high as a $100 block.  Each person who donates
-** will get a link to their own site as well as acknowledgement on the
-** donations blog located here http://www.amillionpixels.blogspot.com/
-**
-** If you wish to contact me you can use the following methods:
-**
-** Skype ID: jratcliff63367
-** Yahoo: jratcliff63367
-** AOL: jratcliff1961
-** email: [email protected]
-**
-**
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#include "NvUserMemAlloc.h"
-
-class WavefrontObj
-{
-public:
-
-  WavefrontObj(void);
-  ~WavefrontObj(void);
-
-	NxU32 loadObj(const char *fname, bool textured); // load a wavefront obj returns number of triangles that were loaded.  Data is persists until the class is destructed.
-
-  NxU32          mVertexCount;
-  NxU32          mTriCount;
-  NxU32          *mIndices;
-  NxF32        *mVertices;
-  NxF32        *mTexCoords;
-};
-
-#endif

+ 3 - 0
Engine/lib/convexMath/CMakeLists.txt

@@ -0,0 +1,3 @@
+file(GLOB CONVEX_DECOMP_SOURCES "*.cpp")
+add_library(convexMath STATIC ${CONVEX_DECOMP_SOURCES})
+target_include_directories(convexMath PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

+ 17 - 0
Engine/lib/convexMath/FloatMath.cpp

@@ -0,0 +1,17 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+#include "FloatMath.h"
+#include <vector>
+
+#define REAL float
+
+#include "FloatMath.inl"
+
+#undef REAL
+#define REAL double
+
+#include "FloatMath.inl"

+ 525 - 0
Engine/lib/convexMath/FloatMath.h

@@ -0,0 +1,525 @@
+#ifndef FLOAT_MATH_LIB_H
+
+#define FLOAT_MATH_LIB_H
+
+
+#include <float.h>
+#include <stdint.h>
+
+namespace FLOAT_MATH
+{
+
+enum FM_ClipState
+{
+  FMCS_XMIN       = (1<<0),
+  FMCS_XMAX       = (1<<1),
+  FMCS_YMIN       = (1<<2),
+  FMCS_YMAX       = (1<<3),
+  FMCS_ZMIN       = (1<<4),
+  FMCS_ZMAX       = (1<<5),
+};
+
+enum FM_Axis
+{
+  FM_XAXIS   = (1<<0),
+  FM_YAXIS   = (1<<1),
+  FM_ZAXIS   = (1<<2)
+};
+
+enum LineSegmentType
+{
+  LS_START,
+  LS_MIDDLE,
+  LS_END
+};
+
+
+const float FM_PI = 3.1415926535897932384626433832795028841971693993751f;
+const float FM_DEG_TO_RAD = ((2.0f * FM_PI) / 360.0f);
+const float FM_RAD_TO_DEG = (360.0f / (2.0f * FM_PI));
+
+//***************** Float versions
+//***
+//*** vectors are assumed to be 3 floats or 3 doubles representing X, Y, Z
+//*** quaternions are assumed to be 4 floats or 4 doubles representing X,Y,Z,W
+//*** matrices are assumed to be 16 floats or 16 doubles representing a standard D3D or OpenGL style 4x4 matrix
+//*** bounding volumes are expressed as two sets of 3 floats/double representing bmin(x,y,z) and bmax(x,y,z)
+//*** Plane equations are assumed to be 4 floats or 4 doubles representing Ax,By,Cz,D
+
+FM_Axis fm_getDominantAxis(const float normal[3]);
+FM_Axis fm_getDominantAxis(const double normal[3]);
+
+void fm_decomposeTransform(const float local_transform[16],float trans[3],float rot[4],float scale[3]);
+void fm_decomposeTransform(const double local_transform[16],double trans[3],double rot[4],double scale[3]);
+
+void  fm_multiplyTransform(const float *pA,const float *pB,float *pM);
+void  fm_multiplyTransform(const double *pA,const double *pB,double *pM);
+
+void  fm_inverseTransform(const float matrix[16],float inverse_matrix[16]);
+void  fm_inverseTransform(const double matrix[16],double inverse_matrix[16]);
+
+void  fm_identity(float matrix[16]); // set 4x4 matrix to identity.
+void  fm_identity(double matrix[16]); // set 4x4 matrix to identity.
+
+void  fm_inverseRT(const float matrix[16], const float pos[3], float t[3]); // inverse rotate translate the point.
+void  fm_inverseRT(const double matrix[16],const double pos[3],double t[3]); // inverse rotate translate the point.
+
+void  fm_transform(const float matrix[16], const float pos[3], float t[3]); // rotate and translate this point.
+void  fm_transform(const double matrix[16],const double pos[3],double t[3]); // rotate and translate this point.
+
+float  fm_getDeterminant(const float matrix[16]);
+double fm_getDeterminant(const double matrix[16]);
+
+void fm_getSubMatrix(int32_t ki,int32_t kj,float pDst[16],const float matrix[16]);
+void fm_getSubMatrix(int32_t ki,int32_t kj,double pDst[16],const float matrix[16]);
+
+void  fm_rotate(const float matrix[16],const float pos[3],float t[3]); // only rotate the point by a 4x4 matrix, don't translate.
+void  fm_rotate(const double matrix[16],const double pos[3],double t[3]); // only rotate the point by a 4x4 matrix, don't translate.
+
+void  fm_eulerToMatrix(float ax,float ay,float az,float matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
+void  fm_eulerToMatrix(double ax,double ay,double az,double matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
+
+void  fm_getAABB(uint32_t vcount,const float *points,uint32_t pstride,float bmin[3],float bmax[3]);
+void  fm_getAABB(uint32_t vcount,const double *points,uint32_t pstride,double bmin[3],double bmax[3]);
+
+void  fm_getAABBCenter(const float bmin[3],const float bmax[3],float center[3]);
+void  fm_getAABBCenter(const double bmin[3],const double bmax[3],double center[3]);
+
+void fm_transformAABB(const float bmin[3],const float bmax[3],const float matrix[16],float tbmin[3],float tbmax[3]);
+void fm_transformAABB(const double bmin[3],const double bmax[3],const double matrix[16],double tbmin[3],double tbmax[3]);
+
+void  fm_eulerToQuat(float x,float y,float z,float quat[4]); // convert euler angles to quaternion.
+void  fm_eulerToQuat(double x,double y,double z,double quat[4]); // convert euler angles to quaternion.
+
+void  fm_quatToEuler(const float quat[4],float &ax,float &ay,float &az);
+void  fm_quatToEuler(const double quat[4],double &ax,double &ay,double &az);
+
+void  fm_eulerToQuat(const float euler[3],float quat[4]); // convert euler angles to quaternion. Angles must be radians not degrees!
+void  fm_eulerToQuat(const double euler[3],double quat[4]); // convert euler angles to quaternion.
+
+void  fm_scale(float x,float y,float z,float matrix[16]); // apply scale to the matrix.
+void  fm_scale(double x,double y,double z,double matrix[16]); // apply scale to the matrix.
+
+void  fm_eulerToQuatDX(float x,float y,float z,float quat[4]); // convert euler angles to quaternion using the fucked up DirectX method
+void  fm_eulerToQuatDX(double x,double y,double z,double quat[4]); // convert euler angles to quaternion using the fucked up DirectX method
+
+void  fm_eulerToMatrixDX(float x,float y,float z,float matrix[16]); // convert euler angles to quaternion using the fucked up DirectX method.
+void  fm_eulerToMatrixDX(double x,double y,double z,double matrix[16]); // convert euler angles to quaternion using the fucked up DirectX method.
+
+void  fm_quatToMatrix(const float quat[4],float matrix[16]); // convert quaternion rotation to matrix, translation set to zero.
+void  fm_quatToMatrix(const double quat[4],double matrix[16]); // convert quaternion rotation to matrix, translation set to zero.
+
+void  fm_quatRotate(const float quat[4],const float v[3],float r[3]); // rotate a vector directly by a quaternion.
+void  fm_quatRotate(const double quat[4],const double v[3],double r[3]); // rotate a vector directly by a quaternion.
+
+void  fm_getTranslation(const float matrix[16],float t[3]);
+void  fm_getTranslation(const double matrix[16],double t[3]);
+
+void  fm_setTranslation(const float *translation,float matrix[16]);
+void  fm_setTranslation(const double *translation,double matrix[16]);
+
+void  fm_multiplyQuat(const float *qa,const float *qb,float *quat);
+void  fm_multiplyQuat(const double *qa,const double *qb,double *quat);
+
+void  fm_matrixToQuat(const float matrix[16],float quat[4]); // convert the 3x3 portion of a 4x4 matrix into a quaternion as x,y,z,w
+void  fm_matrixToQuat(const double matrix[16],double quat[4]); // convert the 3x3 portion of a 4x4 matrix into a quaternion as x,y,z,w
+
+float fm_sphereVolume(float radius); // return's the volume of a sphere of this radius (4/3 PI * R cubed )
+double fm_sphereVolume(double radius); // return's the volume of a sphere of this radius (4/3 PI * R cubed )
+
+float fm_cylinderVolume(float radius,float h);
+double fm_cylinderVolume(double radius,double h);
+
+float fm_capsuleVolume(float radius,float h);
+double fm_capsuleVolume(double radius,double h);
+
+float fm_distance(const float p1[3],const float p2[3]);
+double fm_distance(const double p1[3],const double p2[3]);
+
+float fm_distanceSquared(const float p1[3],const float p2[3]);
+double fm_distanceSquared(const double p1[3],const double p2[3]);
+
+float fm_distanceSquaredXZ(const float p1[3],const float p2[3]);
+double fm_distanceSquaredXZ(const double p1[3],const double p2[3]);
+
+float fm_computePlane(const float p1[3],const float p2[3],const float p3[3],float *n); // return D
+double fm_computePlane(const double p1[3],const double p2[3],const double p3[3],double *n); // return D
+
+float fm_distToPlane(const float plane[4],const float pos[3]); // computes the distance of this point from the plane.
+double fm_distToPlane(const double plane[4],const double pos[3]); // computes the distance of this point from the plane.
+
+float fm_dot(const float p1[3],const float p2[3]);
+double fm_dot(const double p1[3],const double p2[3]);
+
+void  fm_cross(float cross[3],const float a[3],const float b[3]);
+void  fm_cross(double cross[3],const double a[3],const double b[3]);
+
+float  fm_computeNormalVector(float n[3],const float p1[3],const float p2[3]); // as P2-P1 normalized.
+double  fm_computeNormalVector(double n[3],const double p1[3],const double p2[3]); // as P2-P1 normalized.
+
+bool  fm_computeWindingOrder(const float p1[3],const float p2[3],const float p3[3]); // returns true if the triangle is clockwise.
+bool  fm_computeWindingOrder(const double p1[3],const double p2[3],const double p3[3]); // returns true if the triangle is clockwise.
+
+float  fm_normalize(float n[3]); // normalize this vector and return the distance
+double  fm_normalize(double n[3]); // normalize this vector and return the distance
+
+float  fm_normalizeQuat(float n[4]); // normalize this quat
+double  fm_normalizeQuat(double n[4]); // normalize this quat
+
+void  fm_matrixMultiply(const float A[16],const float B[16],float dest[16]);
+void  fm_matrixMultiply(const double A[16],const double B[16],double dest[16]);
+
+void  fm_composeTransform(const float position[3],const float quat[4],const float scale[3],float matrix[16]);
+void  fm_composeTransform(const double position[3],const double quat[4],const double scale[3],double matrix[16]);
+
+float fm_computeArea(const float p1[3],const float p2[3],const float p3[3]);
+double fm_computeArea(const double p1[3],const double p2[3],const double p3[3]);
+
+void  fm_lerp(const float p1[3],const float p2[3],float dest[3],float lerpValue);
+void  fm_lerp(const double p1[3],const double p2[3],double dest[3],double lerpValue);
+
+bool  fm_insideTriangleXZ(const float test[3],const float p1[3],const float p2[3],const float p3[3]);
+bool  fm_insideTriangleXZ(const double test[3],const double p1[3],const double p2[3],const double p3[3]);
+
+bool  fm_insideAABB(const float pos[3],const float bmin[3],const float bmax[3]);
+bool  fm_insideAABB(const double pos[3],const double bmin[3],const double bmax[3]);
+
+bool  fm_insideAABB(const float obmin[3],const float obmax[3],const float tbmin[3],const float tbmax[3]); // test if bounding box tbmin/tmbax is fully inside obmin/obmax
+bool  fm_insideAABB(const double obmin[3],const double obmax[3],const double tbmin[3],const double tbmax[3]); // test if bounding box tbmin/tmbax is fully inside obmin/obmax
+
+uint32_t fm_clipTestPoint(const float bmin[3],const float bmax[3],const float pos[3]);
+uint32_t fm_clipTestPoint(const double bmin[3],const double bmax[3],const double pos[3]);
+
+uint32_t fm_clipTestPointXZ(const float bmin[3],const float bmax[3],const float pos[3]); // only tests X and Z, not Y
+uint32_t fm_clipTestPointXZ(const double bmin[3],const double bmax[3],const double pos[3]); // only tests X and Z, not Y
+
+
+uint32_t fm_clipTestAABB(const float bmin[3],const float bmax[3],const float p1[3],const float p2[3],const float p3[3],uint32_t &andCode);
+uint32_t fm_clipTestAABB(const double bmin[3],const double bmax[3],const double p1[3],const double p2[3],const double p3[3],uint32_t &andCode);
+
+
+bool     fm_lineTestAABBXZ(const float p1[3],const float p2[3],const float bmin[3],const float bmax[3],float &time);
+bool     fm_lineTestAABBXZ(const double p1[3],const double p2[3],const double bmin[3],const double bmax[3],double &time);
+
+bool     fm_lineTestAABB(const float p1[3],const float p2[3],const float bmin[3],const float bmax[3],float &time);
+bool     fm_lineTestAABB(const double p1[3],const double p2[3],const double bmin[3],const double bmax[3],double &time);
+
+
+void  fm_initMinMax(const float p[3],float bmin[3],float bmax[3]);
+void  fm_initMinMax(const double p[3],double bmin[3],double bmax[3]);
+
+void  fm_initMinMax(float bmin[3],float bmax[3]);
+void  fm_initMinMax(double bmin[3],double bmax[3]);
+
+void  fm_minmax(const float p[3],float bmin[3],float bmax[3]); // accumulate to a min-max value
+void  fm_minmax(const double p[3],double bmin[3],double bmax[3]); // accumulate to a min-max value
+
+// Computes the diagonal length of the bounding box and then inflates the bounding box on all sides
+// by the ratio provided.
+void fm_inflateMinMax(float bmin[3], float bmax[3], float ratio);
+void fm_inflateMinMax(double bmin[3], double bmax[3], double ratio);
+
+float fm_solveX(const float plane[4],float y,float z); // solve for X given this plane equation and the other two components.
+double fm_solveX(const double plane[4],double y,double z); // solve for X given this plane equation and the other two components.
+
+float fm_solveY(const float plane[4],float x,float z); // solve for Y given this plane equation and the other two components.
+double fm_solveY(const double plane[4],double x,double z); // solve for Y given this plane equation and the other two components.
+
+float fm_solveZ(const float plane[4],float x,float y); // solve for Z given this plane equation and the other two components.
+double fm_solveZ(const double plane[4],double x,double y); // solve for Z given this plane equation and the other two components.
+
+bool  fm_computeBestFitPlane(uint32_t vcount,     // number of input data points
+	const float *points,     // starting address of points array.
+	uint32_t vstride,    // stride between input points.
+	const float *weights,    // *optional point weighting values.
+	uint32_t wstride,    // weight stride for each vertex.
+	float plane[4],		// Best fit plane equation
+	float center[3]);  // Best fit weighted center of input points
+
+bool  fm_computeBestFitPlane(uint32_t vcount,     // number of input data points
+	const double *points,     // starting address of points array.
+	uint32_t vstride,    // stride between input points.
+	const double *weights,    // *optional point weighting values.
+	uint32_t wstride,    // weight stride for each vertex.
+	double plane[4],
+	double center[3]); 
+
+// Computes the average center of a set of data points
+bool  fm_computeCentroid(uint32_t vcount,     // number of input data points
+						 const float *points,     // starting address of points array.
+						 float *center);
+
+bool  fm_computeCentroid(uint32_t vcount,     // number of input data points
+						 const double *points,     // starting address of points array.
+						 double *center);
+
+// Compute centroid of a triangle mesh; takes area of each triangle into account
+// weighted average
+bool  fm_computeCentroid(uint32_t vcount,     // number of input data points
+						const float *points,     // starting address of points array.
+						uint32_t triangleCount,
+						const uint32_t *indices,
+						float *center);
+
+// Compute centroid of a triangle mesh; takes area of each triangle into account
+// weighted average
+bool  fm_computeCentroid(uint32_t vcount,     // number of input data points
+	const double *points,     // starting address of points array.
+	uint32_t triangleCount,
+	const uint32_t *indices,
+	double *center);
+
+
+float  fm_computeBestFitAABB(uint32_t vcount,const float *points,uint32_t pstride,float bmin[3],float bmax[3]); // returns the diagonal distance
+double fm_computeBestFitAABB(uint32_t vcount,const double *points,uint32_t pstride,double bmin[3],double bmax[3]); // returns the diagonal distance
+
+float  fm_computeBestFitSphere(uint32_t vcount,const float *points,uint32_t pstride,float center[3]);
+double  fm_computeBestFitSphere(uint32_t vcount,const double *points,uint32_t pstride,double center[3]);
+
+bool fm_lineSphereIntersect(const float center[3],float radius,const float p1[3],const float p2[3],float intersect[3]);
+bool fm_lineSphereIntersect(const double center[3],double radius,const double p1[3],const double p2[3],double intersect[3]);
+
+bool fm_intersectRayAABB(const float bmin[3],const float bmax[3],const float pos[3],const float dir[3],float intersect[3]);
+bool fm_intersectLineSegmentAABB(const float bmin[3],const float bmax[3],const float p1[3],const float p2[3],float intersect[3]);
+
+bool fm_lineIntersectsTriangle(const float rayStart[3],const float rayEnd[3],const float p1[3],const float p2[3],const float p3[3],float sect[3]);
+bool fm_lineIntersectsTriangle(const double rayStart[3],const double rayEnd[3],const double p1[3],const double p2[3],const double p3[3],double sect[3]);
+
+bool fm_rayIntersectsTriangle(const float origin[3],const float dir[3],const float v0[3],const float v1[3],const float v2[3],float &t);
+bool fm_rayIntersectsTriangle(const double origin[3],const double dir[3],const double v0[3],const double v1[3],const double v2[3],double &t);
+
+bool fm_raySphereIntersect(const float center[3],float radius,const float pos[3],const float dir[3],float distance,float intersect[3]);
+bool fm_raySphereIntersect(const double center[3],double radius,const double pos[3],const double dir[3],double distance,double intersect[3]);
+
+void fm_catmullRom(float out_vector[3],const float p1[3],const float p2[3],const float p3[3],const float *p4, const float s);
+void fm_catmullRom(double out_vector[3],const double p1[3],const double p2[3],const double p3[3],const double *p4, const double s);
+
+bool fm_intersectAABB(const float bmin1[3],const float bmax1[3],const float bmin2[3],const float bmax2[3]);
+bool fm_intersectAABB(const double bmin1[3],const double bmax1[3],const double bmin2[3],const double bmax2[3]);
+
+
+// computes the rotation quaternion to go from unit-vector v0 to unit-vector v1
+void fm_rotationArc(const float v0[3],const float v1[3],float quat[4]);
+void fm_rotationArc(const double v0[3],const double v1[3],double quat[4]);
+
+float  fm_distancePointLineSegment(const float Point[3],const float LineStart[3],const float LineEnd[3],float intersection[3],LineSegmentType &type,float epsilon);
+double fm_distancePointLineSegment(const double Point[3],const double LineStart[3],const double LineEnd[3],double intersection[3],LineSegmentType &type,double epsilon);
+
+
+bool fm_colinear(const double p1[3],const double p2[3],const double p3[3],double epsilon=0.999);               // true if these three points in a row are co-linear
+bool fm_colinear(const float  p1[3],const float  p2[3],const float p3[3],float epsilon=0.999f);
+
+bool fm_colinear(const float a1[3],const float a2[3],const float b1[3],const float b2[3],float epsilon=0.999f);  // true if these two line segments are co-linear.
+bool fm_colinear(const double a1[3],const double a2[3],const double b1[3],const double b2[3],double epsilon=0.999);  // true if these two line segments are co-linear.
+
+enum IntersectResult
+{
+  IR_DONT_INTERSECT,
+  IR_DO_INTERSECT,
+  IR_COINCIDENT,
+  IR_PARALLEL,
+};
+
+IntersectResult fm_intersectLineSegments2d(const float a1[3], const float a2[3], const float b1[3], const float b2[3], float intersectionPoint[3]);
+IntersectResult fm_intersectLineSegments2d(const double a1[3],const double a2[3],const double b1[3],const double b2[3],double intersectionPoint[3]);
+
+IntersectResult fm_intersectLineSegments2dTime(const float a1[3], const float a2[3], const float b1[3], const float b2[3],float &t1,float &t2);
+IntersectResult fm_intersectLineSegments2dTime(const double a1[3],const double a2[3],const double b1[3],const double b2[3],double &t1,double &t2);
+
+// Plane-Triangle splitting
+
+enum PlaneTriResult
+{
+  PTR_ON_PLANE,
+  PTR_FRONT,
+  PTR_BACK,
+  PTR_SPLIT,
+};
+
+PlaneTriResult fm_planeTriIntersection(const float plane[4],    // the plane equation in Ax+By+Cz+D format
+									const float *triangle, // the source triangle.
+									uint32_t tstride,  // stride in bytes of the input and output *vertices*
+									float        epsilon,  // the co-planer epsilon value.
+									float       *front,    // the triangle in front of the
+									uint32_t &fcount,  // number of vertices in the 'front' triangle
+									float       *back,     // the triangle in back of the plane
+									uint32_t &bcount); // the number of vertices in the 'back' triangle.
+
+
+PlaneTriResult fm_planeTriIntersection(const double plane[4],    // the plane equation in Ax+By+Cz+D format
+									const double *triangle, // the source triangle.
+									uint32_t tstride,  // stride in bytes of the input and output *vertices*
+									double        epsilon,  // the co-planer epsilon value.
+									double       *front,    // the triangle in front of the
+									uint32_t &fcount,  // number of vertices in the 'front' triangle
+									double       *back,     // the triangle in back of the plane
+									uint32_t &bcount); // the number of vertices in the 'back' triangle.
+
+
+bool fm_intersectPointPlane(const float p1[3],const float p2[3],float *split,const float plane[4]);
+bool fm_intersectPointPlane(const double p1[3],const double p2[3],double *split,const double plane[4]);
+
+PlaneTriResult fm_getSidePlane(const float p[3],const float plane[4],float epsilon);
+PlaneTriResult fm_getSidePlane(const double p[3],const double plane[4],double epsilon);
+
+
+void fm_computeBestFitOBB(uint32_t vcount,const float *points,uint32_t pstride,float *sides,float matrix[16],bool bruteForce=true);
+void fm_computeBestFitOBB(uint32_t vcount,const double *points,uint32_t pstride,double *sides,double matrix[16],bool bruteForce=true);
+
+void fm_computeBestFitOBB(uint32_t vcount,const float *points,uint32_t pstride,float *sides,float pos[3],float quat[4],bool bruteForce=true);
+void fm_computeBestFitOBB(uint32_t vcount,const double *points,uint32_t pstride,double *sides,double pos[3],double quat[4],bool bruteForce=true);
+
+void fm_computeBestFitABB(uint32_t vcount,const float *points,uint32_t pstride,float *sides,float pos[3]);
+void fm_computeBestFitABB(uint32_t vcount,const double *points,uint32_t pstride,double *sides,double pos[3]);
+
+
+//** Note, if the returned capsule height is less than zero, then you must represent it is a sphere of size radius.
+void fm_computeBestFitCapsule(uint32_t vcount,const float *points,uint32_t pstride,float &radius,float &height,float matrix[16],bool bruteForce=true);
+void fm_computeBestFitCapsule(uint32_t vcount,const double *points,uint32_t pstride,float &radius,float &height,double matrix[16],bool bruteForce=true);
+
+
+void fm_planeToMatrix(const float plane[4],float matrix[16]); // convert a plane equation to a 4x4 rotation matrix.  Reference vector is 0,1,0
+void fm_planeToQuat(const float plane[4],float quat[4],float pos[3]); // convert a plane equation to a quaternion and translation
+
+void fm_planeToMatrix(const double plane[4],double matrix[16]); // convert a plane equation to a 4x4 rotation matrix
+void fm_planeToQuat(const double plane[4],double quat[4],double pos[3]); // convert a plane equation to a quaternion and translation
+
+inline void fm_doubleToFloat3(const double p[3],float t[3]) { t[0] = (float) p[0]; t[1] = (float)p[1]; t[2] = (float)p[2]; };
+inline void fm_floatToDouble3(const float p[3],double t[3]) { t[0] = (double)p[0]; t[1] = (double)p[1]; t[2] = (double)p[2]; };
+
+
+void  fm_eulerMatrix(float ax,float ay,float az,float matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
+void  fm_eulerMatrix(double ax,double ay,double az,double matrix[16]); // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
+
+
+float  fm_computeMeshVolume(const float *vertices,uint32_t tcount,const uint32_t *indices);
+double fm_computeMeshVolume(const double *vertices,uint32_t tcount,const uint32_t *indices);
+
+
+#define FM_DEFAULT_GRANULARITY 0.001f  // 1 millimeter is the default granularity
+
+class fm_VertexIndex
+{
+public:
+  virtual uint32_t          getIndex(const float pos[3],bool &newPos) = 0;  // get welded index for this float vector[3]
+  virtual uint32_t          getIndex(const double pos[3],bool &newPos) = 0;  // get welded index for this double vector[3]
+  virtual const float *   getVerticesFloat(void) const = 0;
+  virtual const double *  getVerticesDouble(void) const = 0;
+  virtual const float *   getVertexFloat(uint32_t index) const = 0;
+  virtual const double *  getVertexDouble(uint32_t index) const = 0;
+  virtual uint32_t          getVcount(void) const = 0;
+  virtual bool            isDouble(void) const = 0;
+  virtual bool            saveAsObj(const char *fname,uint32_t tcount,uint32_t *indices) = 0;
+};
+
+fm_VertexIndex * fm_createVertexIndex(double granularity,bool snapToGrid); // create an indexed vertex system for doubles
+fm_VertexIndex * fm_createVertexIndex(float granularity,bool snapToGrid);  // create an indexed vertex system for floats
+void             fm_releaseVertexIndex(fm_VertexIndex *vindex);
+
+
+class fm_Triangulate
+{
+public:
+  virtual const double *       triangulate3d(uint32_t pcount,
+											 const double *points,
+											 uint32_t vstride,
+											 uint32_t &tcount,
+											 bool consolidate,
+											 double epsilon) = 0;
+
+  virtual const float  *       triangulate3d(uint32_t pcount,
+											 const float  *points,
+											 uint32_t vstride,
+											 uint32_t &tcount,
+											 bool consolidate,
+											 float epsilon) = 0;
+};
+
+fm_Triangulate * fm_createTriangulate(void);
+void             fm_releaseTriangulate(fm_Triangulate *t);
+
+
+const float * fm_getPoint(const float *points,uint32_t pstride,uint32_t index);
+const double * fm_getPoint(const double *points,uint32_t pstride,uint32_t index);
+
+bool   fm_insideTriangle(float Ax, float Ay,float Bx, float By,float Cx, float Cy,float Px, float Py);
+bool   fm_insideTriangle(double Ax, double Ay,double Bx, double By,double Cx, double Cy,double Px, double Py);
+float  fm_areaPolygon2d(uint32_t pcount,const float *points,uint32_t pstride);
+double fm_areaPolygon2d(uint32_t pcount,const double *points,uint32_t pstride);
+
+bool  fm_pointInsidePolygon2d(uint32_t pcount,const float *points,uint32_t pstride,const float *point,uint32_t xindex=0,uint32_t yindex=1);
+bool  fm_pointInsidePolygon2d(uint32_t pcount,const double *points,uint32_t pstride,const double *point,uint32_t xindex=0,uint32_t yindex=1);
+
+uint32_t fm_consolidatePolygon(uint32_t pcount,const float *points,uint32_t pstride,float *dest,float epsilon=0.999999f); // collapses co-linear edges.
+uint32_t fm_consolidatePolygon(uint32_t pcount,const double *points,uint32_t pstride,double *dest,double epsilon=0.999999); // collapses co-linear edges.
+
+
+bool fm_computeSplitPlane(uint32_t vcount,const double *vertices,uint32_t tcount,const uint32_t *indices,double *plane);
+bool fm_computeSplitPlane(uint32_t vcount,const float *vertices,uint32_t tcount,const uint32_t *indices,float *plane);
+
+void fm_nearestPointInTriangle(const float *pos,const float *p1,const float *p2,const float *p3,float *nearest);
+void fm_nearestPointInTriangle(const double *pos,const double *p1,const double *p2,const double *p3,double *nearest);
+
+float  fm_areaTriangle(const float *p1,const float *p2,const float *p3);
+double fm_areaTriangle(const double *p1,const double *p2,const double *p3);
+
+void fm_subtract(const float *A,const float *B,float *diff); // compute A-B and store the result in 'diff'
+void fm_subtract(const double *A,const double *B,double *diff); // compute A-B and store the result in 'diff'
+
+void fm_multiply(float *A,float scalar);
+void fm_multiply(double *A,double scalar);
+
+void fm_add(const float *A,const float *B,float *sum);
+void fm_add(const double *A,const double *B,double *sum);
+
+void fm_copy3(const float *source,float *dest);
+void fm_copy3(const double *source,double *dest);
+
+// re-indexes an indexed triangle mesh but drops unused vertices.  The output_indices can be the same pointer as the input indices.
+// the output_vertices can point to the input vertices if you desire.  The output_vertices buffer should be at least the same size
+// is the input buffer.  The routine returns the new vertex count after re-indexing.
+uint32_t  fm_copyUniqueVertices(uint32_t vcount,const float *input_vertices,float *output_vertices,uint32_t tcount,const uint32_t *input_indices,uint32_t *output_indices);
+uint32_t  fm_copyUniqueVertices(uint32_t vcount,const double *input_vertices,double *output_vertices,uint32_t tcount,const uint32_t *input_indices,uint32_t *output_indices);
+
+bool    fm_isMeshCoplanar(uint32_t tcount,const uint32_t *indices,const float *vertices,bool doubleSided); // returns true if this collection of indexed triangles are co-planar!
+bool    fm_isMeshCoplanar(uint32_t tcount,const uint32_t *indices,const double *vertices,bool doubleSided); // returns true if this collection of indexed triangles are co-planar!
+
+bool    fm_samePlane(const float p1[4],const float p2[4],float normalEpsilon=0.01f,float dEpsilon=0.001f,bool doubleSided=false); // returns true if these two plane equations are identical within an epsilon
+bool    fm_samePlane(const double p1[4],const double p2[4],double normalEpsilon=0.01,double dEpsilon=0.001,bool doubleSided=false);
+
+void    fm_OBBtoAABB(const float obmin[3],const float obmax[3],const float matrix[16],float abmin[3],float abmax[3]);
+
+// a utility class that will tessellate a mesh.
+class fm_Tesselate
+{
+public:
+  virtual const uint32_t * tesselate(fm_VertexIndex *vindex,uint32_t tcount,const uint32_t *indices,float longEdge,uint32_t maxDepth,uint32_t &outcount) = 0;
+};
+
+fm_Tesselate * fm_createTesselate(void);
+void           fm_releaseTesselate(fm_Tesselate *t);
+
+void fm_computeMeanNormals(uint32_t vcount,       // the number of vertices
+						   const float *vertices,     // the base address of the vertex position data.
+						   uint32_t vstride,      // the stride between position data.
+						   float *normals,            // the base address  of the destination for mean vector normals
+						   uint32_t nstride,      // the stride between normals
+						   uint32_t tcount,       // the number of triangles
+						   const uint32_t *indices);     // the triangle indices
+
+void fm_computeMeanNormals(uint32_t vcount,       // the number of vertices
+						   const double *vertices,     // the base address of the vertex position data.
+						   uint32_t vstride,      // the stride between position data.
+						   double *normals,            // the base address  of the destination for mean vector normals
+						   uint32_t nstride,      // the stride between normals
+						   uint32_t tcount,       // the number of triangles
+						   const uint32_t *indices);     // the triangle indices
+
+
+bool fm_isValidTriangle(const float *p1,const float *p2,const float *p3,float epsilon=0.00001f);
+bool fm_isValidTriangle(const double *p1,const double *p2,const double *p3,double epsilon=0.00001f);
+
+
+}; // end of namespace
+
+#endif

+ 5280 - 0
Engine/lib/convexMath/FloatMath.inl

@@ -0,0 +1,5280 @@
+// a set of routines that let you do common 3d math
+// operations without any vector, matrix, or quaternion
+// classes or templates.
+//
+// a vector (or point) is a 'float *' to 3 floating point numbers.
+// a matrix is a 'float *' to an array of 16 floating point numbers representing a 4x4 transformation matrix compatible with D3D or OGL
+// a quaternion is a 'float *' to 4 floats representing a quaternion x,y,z,w
+//
+
+#ifdef _MSC_VER
+#pragma warning(disable:4996)
+#endif
+
+namespace FLOAT_MATH
+{
+
+void fm_inverseRT(const REAL matrix[16],const REAL pos[3],REAL t[3]) // inverse rotate translate the point.
+{
+
+	REAL _x = pos[0] - matrix[3*4+0];
+	REAL _y = pos[1] - matrix[3*4+1];
+	REAL _z = pos[2] - matrix[3*4+2];
+
+	// Multiply inverse-translated source vector by inverted rotation transform
+
+	t[0] = (matrix[0*4+0] * _x) + (matrix[0*4+1] * _y) + (matrix[0*4+2] * _z);
+	t[1] = (matrix[1*4+0] * _x) + (matrix[1*4+1] * _y) + (matrix[1*4+2] * _z);
+	t[2] = (matrix[2*4+0] * _x) + (matrix[2*4+1] * _y) + (matrix[2*4+2] * _z);
+
+}
+
+REAL fm_getDeterminant(const REAL matrix[16])
+{
+  REAL tempv[3];
+  REAL p0[3];
+  REAL p1[3];
+  REAL p2[3];
+
+
+	p0[0] = matrix[0*4+0];
+	p0[1] = matrix[0*4+1];
+	p0[2] = matrix[0*4+2];
+
+	p1[0] = matrix[1*4+0];
+	p1[1] = matrix[1*4+1];
+	p1[2] = matrix[1*4+2];
+
+	p2[0] = matrix[2*4+0];
+	p2[1] = matrix[2*4+1];
+	p2[2] = matrix[2*4+2];
+
+  fm_cross(tempv,p1,p2);
+
+  return fm_dot(p0,tempv);
+
+}
+
+REAL fm_squared(REAL x) { return x*x; };
+
+void fm_decomposeTransform(const REAL local_transform[16],REAL trans[3],REAL rot[4],REAL scale[3])
+{
+
+  trans[0] = local_transform[12];
+  trans[1] = local_transform[13];
+  trans[2] = local_transform[14];
+
+  scale[0] = (REAL)sqrt(fm_squared(local_transform[0*4+0]) + fm_squared(local_transform[0*4+1]) + fm_squared(local_transform[0*4+2]));
+  scale[1] = (REAL)sqrt(fm_squared(local_transform[1*4+0]) + fm_squared(local_transform[1*4+1]) + fm_squared(local_transform[1*4+2]));
+  scale[2] = (REAL)sqrt(fm_squared(local_transform[2*4+0]) + fm_squared(local_transform[2*4+1]) + fm_squared(local_transform[2*4+2]));
+
+  REAL m[16];
+  memcpy(m,local_transform,sizeof(REAL)*16);
+
+  REAL sx = 1.0f / scale[0];
+  REAL sy = 1.0f / scale[1];
+  REAL sz = 1.0f / scale[2];
+
+  m[0*4+0]*=sx;
+  m[0*4+1]*=sx;
+  m[0*4+2]*=sx;
+
+  m[1*4+0]*=sy;
+  m[1*4+1]*=sy;
+  m[1*4+2]*=sy;
+
+  m[2*4+0]*=sz;
+  m[2*4+1]*=sz;
+  m[2*4+2]*=sz;
+
+  fm_matrixToQuat(m,rot);
+
+}
+
+void fm_getSubMatrix(int32_t ki,int32_t kj,REAL pDst[16],const REAL matrix[16])
+{
+	int32_t row, col;
+	int32_t dstCol = 0, dstRow = 0;
+
+	for ( col = 0; col < 4; col++ )
+	{
+		if ( col == kj )
+		{
+			continue;
+		}
+		for ( dstRow = 0, row = 0; row < 4; row++ )
+		{
+			if ( row == ki )
+			{
+				continue;
+			}
+			pDst[dstCol*4+dstRow] = matrix[col*4+row];
+			dstRow++;
+		}
+		dstCol++;
+	}
+}
+
+void  fm_inverseTransform(const REAL matrix[16],REAL inverse_matrix[16])
+{
+	REAL determinant = fm_getDeterminant(matrix);
+	determinant = 1.0f / determinant;
+	for (int32_t i = 0; i < 4; i++ )
+	{
+		for (int32_t j = 0; j < 4; j++ )
+		{
+			int32_t sign = 1 - ( ( i + j ) % 2 ) * 2;
+			REAL subMat[16];
+			fm_identity(subMat);
+			fm_getSubMatrix( i, j, subMat, matrix );
+			REAL subDeterminant = fm_getDeterminant(subMat);
+			inverse_matrix[i*4+j] = ( subDeterminant * sign ) * determinant;
+		}
+	}
+}
+
+void fm_identity(REAL matrix[16]) // set 4x4 matrix to identity.
+{
+	matrix[0*4+0] = 1;
+	matrix[1*4+1] = 1;
+	matrix[2*4+2] = 1;
+	matrix[3*4+3] = 1;
+
+	matrix[1*4+0] = 0;
+	matrix[2*4+0] = 0;
+	matrix[3*4+0] = 0;
+
+	matrix[0*4+1] = 0;
+	matrix[2*4+1] = 0;
+	matrix[3*4+1] = 0;
+
+	matrix[0*4+2] = 0;
+	matrix[1*4+2] = 0;
+	matrix[3*4+2] = 0;
+
+	matrix[0*4+3] = 0;
+	matrix[1*4+3] = 0;
+	matrix[2*4+3] = 0;
+
+}
+
+void  fm_quatToEuler(const REAL quat[4],REAL &ax,REAL &ay,REAL &az)
+{
+	REAL x = quat[0];
+	REAL y = quat[1];
+	REAL z = quat[2];
+	REAL w = quat[3];
+
+	REAL sint	     = (2.0f * w * y) - (2.0f * x * z);
+	REAL cost_temp = 1.0f - (sint * sint);
+	REAL cost	   	 = 0;
+
+	if ( (REAL)fabs(cost_temp) > 0.001f )
+	{
+		cost = (REAL)sqrt( cost_temp );
+	}
+
+	REAL sinv, cosv, sinf, cosf;
+	if ( (REAL)fabs(cost) > 0.001f )
+	{
+	cost = 1.0f / cost;
+		sinv = ((2.0f * y * z) + (2.0f * w * x)) * cost;
+		cosv = (1.0f - (2.0f * x * x) - (2.0f * y * y)) * cost;
+		sinf = ((2.0f * x * y) + (2.0f * w * z)) * cost;
+		cosf = (1.0f - (2.0f * y * y) - (2.0f * z * z)) * cost;
+	}
+	else
+	{
+		sinv = (2.0f * w * x) - (2.0f * y * z);
+		cosv = 1.0f - (2.0f * x * x) - (2.0f * z * z);
+		sinf = 0;
+		cosf = 1.0f;
+	}
+
+	// compute output rotations
+	ax	= (REAL)atan2( sinv, cosv );
+	ay	= (REAL)atan2( sint, cost );
+	az	= (REAL)atan2( sinf, cosf );
+
+}
+
+void fm_eulerToMatrix(REAL ax,REAL ay,REAL az,REAL *matrix) // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
+{
+  REAL quat[4];
+  fm_eulerToQuat(ax,ay,az,quat);
+  fm_quatToMatrix(quat,matrix);
+}
+
+void fm_getAABB(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *bmin,REAL *bmax)
+{
+
+  const uint8_t *source = (const uint8_t *) points;
+
+	bmin[0] = points[0];
+	bmin[1] = points[1];
+	bmin[2] = points[2];
+
+	bmax[0] = points[0];
+	bmax[1] = points[1];
+	bmax[2] = points[2];
+
+
+  for (uint32_t i=1; i<vcount; i++)
+  {
+	source+=pstride;
+	const REAL *p = (const REAL *) source;
+
+	if ( p[0] < bmin[0] ) bmin[0] = p[0];
+	if ( p[1] < bmin[1] ) bmin[1] = p[1];
+	if ( p[2] < bmin[2] ) bmin[2] = p[2];
+
+		if ( p[0] > bmax[0] ) bmax[0] = p[0];
+		if ( p[1] > bmax[1] ) bmax[1] = p[1];
+		if ( p[2] > bmax[2] ) bmax[2] = p[2];
+
+  }
+}
+
+void  fm_eulerToQuat(const REAL *euler,REAL *quat) // convert euler angles to quaternion.
+{
+  fm_eulerToQuat(euler[0],euler[1],euler[2],quat);
+}
+
+void fm_eulerToQuat(REAL roll,REAL pitch,REAL yaw,REAL *quat) // convert euler angles to quaternion.
+{
+	roll  *= 0.5f;
+	pitch *= 0.5f;
+	yaw   *= 0.5f;
+
+	REAL cr = (REAL)cos(roll);
+	REAL cp = (REAL)cos(pitch);
+	REAL cy = (REAL)cos(yaw);
+
+	REAL sr = (REAL)sin(roll);
+	REAL sp = (REAL)sin(pitch);
+	REAL sy = (REAL)sin(yaw);
+
+	REAL cpcy = cp * cy;
+	REAL spsy = sp * sy;
+	REAL spcy = sp * cy;
+	REAL cpsy = cp * sy;
+
+	quat[0]   = ( sr * cpcy - cr * spsy);
+	quat[1]   = ( cr * spcy + sr * cpsy);
+	quat[2]   = ( cr * cpsy - sr * spcy);
+	quat[3]   = cr * cpcy + sr * spsy;
+}
+
+void fm_quatToMatrix(const REAL *quat,REAL *matrix) // convert quaternion rotation to matrix, zeros out the translation component.
+{
+
+	REAL xx = quat[0]*quat[0];
+	REAL yy = quat[1]*quat[1];
+	REAL zz = quat[2]*quat[2];
+	REAL xy = quat[0]*quat[1];
+	REAL xz = quat[0]*quat[2];
+	REAL yz = quat[1]*quat[2];
+	REAL wx = quat[3]*quat[0];
+	REAL wy = quat[3]*quat[1];
+	REAL wz = quat[3]*quat[2];
+
+	matrix[0*4+0] = 1 - 2 * ( yy + zz );
+	matrix[1*4+0] =     2 * ( xy - wz );
+	matrix[2*4+0] =     2 * ( xz + wy );
+
+	matrix[0*4+1] =     2 * ( xy + wz );
+	matrix[1*4+1] = 1 - 2 * ( xx + zz );
+	matrix[2*4+1] =     2 * ( yz - wx );
+
+	matrix[0*4+2] =     2 * ( xz - wy );
+	matrix[1*4+2] =     2 * ( yz + wx );
+	matrix[2*4+2] = 1 - 2 * ( xx + yy );
+
+	matrix[3*4+0] = matrix[3*4+1] = matrix[3*4+2] = (REAL) 0.0f;
+	matrix[0*4+3] = matrix[1*4+3] = matrix[2*4+3] = (REAL) 0.0f;
+	matrix[3*4+3] =(REAL) 1.0f;
+
+}
+
+
+void fm_quatRotate(const REAL *quat,const REAL *v,REAL *r) // rotate a vector directly by a quaternion.
+{
+  REAL left[4];
+
+	left[0] =   quat[3]*v[0] + quat[1]*v[2] - v[1]*quat[2];
+	left[1] =   quat[3]*v[1] + quat[2]*v[0] - v[2]*quat[0];
+	left[2] =   quat[3]*v[2] + quat[0]*v[1] - v[0]*quat[1];
+	left[3] = - quat[0]*v[0] - quat[1]*v[1] - quat[2]*v[2];
+
+	r[0] = (left[3]*-quat[0]) + (quat[3]*left[0]) + (left[1]*-quat[2]) - (-quat[1]*left[2]);
+	r[1] = (left[3]*-quat[1]) + (quat[3]*left[1]) + (left[2]*-quat[0]) - (-quat[2]*left[0]);
+	r[2] = (left[3]*-quat[2]) + (quat[3]*left[2]) + (left[0]*-quat[1]) - (-quat[0]*left[1]);
+
+}
+
+
+void fm_getTranslation(const REAL *matrix,REAL *t)
+{
+	t[0] = matrix[3*4+0];
+	t[1] = matrix[3*4+1];
+	t[2] = matrix[3*4+2];
+}
+
+void fm_matrixToQuat(const REAL *matrix,REAL *quat) // convert the 3x3 portion of a 4x4 matrix into a quaternion as x,y,z,w
+{
+
+	REAL tr = matrix[0*4+0] + matrix[1*4+1] + matrix[2*4+2];
+
+	// check the diagonal
+
+	if (tr > 0.0f )
+	{
+		REAL s = (REAL) sqrt ( (double) (tr + 1.0f) );
+		quat[3] = s * 0.5f;
+		s = 0.5f / s;
+		quat[0] = (matrix[1*4+2] - matrix[2*4+1]) * s;
+		quat[1] = (matrix[2*4+0] - matrix[0*4+2]) * s;
+		quat[2] = (matrix[0*4+1] - matrix[1*4+0]) * s;
+
+	}
+	else
+	{
+		// diagonal is negative
+		int32_t nxt[3] = {1, 2, 0};
+		REAL  qa[4];
+
+		int32_t i = 0;
+
+		if (matrix[1*4+1] > matrix[0*4+0]) i = 1;
+		if (matrix[2*4+2] > matrix[i*4+i]) i = 2;
+
+		int32_t j = nxt[i];
+		int32_t k = nxt[j];
+
+		REAL s = (REAL)sqrt ( ((matrix[i*4+i] - (matrix[j*4+j] + matrix[k*4+k])) + 1.0f) );
+
+		qa[i] = s * 0.5f;
+
+		if (s != 0.0f ) s = 0.5f / s;
+
+		qa[3] = (matrix[j*4+k] - matrix[k*4+j]) * s;
+		qa[j] = (matrix[i*4+j] + matrix[j*4+i]) * s;
+		qa[k] = (matrix[i*4+k] + matrix[k*4+i]) * s;
+
+		quat[0] = qa[0];
+		quat[1] = qa[1];
+		quat[2] = qa[2];
+		quat[3] = qa[3];
+	}
+//	fm_normalizeQuat(quat);
+}
+
+
+REAL fm_sphereVolume(REAL radius) // return's the volume of a sphere of this radius (4/3 PI * R cubed )
+{
+	return (4.0f / 3.0f ) * FM_PI * radius * radius * radius;
+}
+
+
+REAL fm_cylinderVolume(REAL radius,REAL h)
+{
+	return FM_PI * radius * radius *h;
+}
+
+REAL fm_capsuleVolume(REAL radius,REAL h)
+{
+	REAL volume = fm_sphereVolume(radius); // volume of the sphere portion.
+	REAL ch = h-radius*2; // this is the cylinder length
+	if ( ch > 0 )
+	{
+		volume+=fm_cylinderVolume(radius,ch);
+	}
+	return volume;
+}
+
+void  fm_transform(const REAL matrix[16],const REAL v[3],REAL t[3]) // rotate and translate this point
+{
+  if ( matrix )
+  {
+	REAL tx = (matrix[0*4+0] * v[0]) +  (matrix[1*4+0] * v[1]) + (matrix[2*4+0] * v[2]) + matrix[3*4+0];
+	REAL ty = (matrix[0*4+1] * v[0]) +  (matrix[1*4+1] * v[1]) + (matrix[2*4+1] * v[2]) + matrix[3*4+1];
+	REAL tz = (matrix[0*4+2] * v[0]) +  (matrix[1*4+2] * v[1]) + (matrix[2*4+2] * v[2]) + matrix[3*4+2];
+	t[0] = tx;
+	t[1] = ty;
+	t[2] = tz;
+  }
+  else
+  {
+	t[0] = v[0];
+	t[1] = v[1];
+	t[2] = v[2];
+  }
+}
+
+void  fm_rotate(const REAL matrix[16],const REAL v[3],REAL t[3]) // rotate and translate this point
+{
+  if ( matrix )
+  {
+	REAL tx = (matrix[0*4+0] * v[0]) +  (matrix[1*4+0] * v[1]) + (matrix[2*4+0] * v[2]);
+	REAL ty = (matrix[0*4+1] * v[0]) +  (matrix[1*4+1] * v[1]) + (matrix[2*4+1] * v[2]);
+	REAL tz = (matrix[0*4+2] * v[0]) +  (matrix[1*4+2] * v[1]) + (matrix[2*4+2] * v[2]);
+	t[0] = tx;
+	t[1] = ty;
+	t[2] = tz;
+  }
+  else
+  {
+	t[0] = v[0];
+	t[1] = v[1];
+	t[2] = v[2];
+  }
+}
+
+
+REAL fm_distance(const REAL *p1,const REAL *p2)
+{
+	REAL dx = p1[0] - p2[0];
+	REAL dy = p1[1] - p2[1];
+	REAL dz = p1[2] - p2[2];
+
+	return (REAL)sqrt( dx*dx + dy*dy + dz *dz );
+}
+
+REAL fm_distanceSquared(const REAL *p1,const REAL *p2)
+{
+	REAL dx = p1[0] - p2[0];
+	REAL dy = p1[1] - p2[1];
+	REAL dz = p1[2] - p2[2];
+
+	return dx*dx + dy*dy + dz *dz;
+}
+
+
+REAL fm_distanceSquaredXZ(const REAL *p1,const REAL *p2)
+{
+	REAL dx = p1[0] - p2[0];
+	REAL dz = p1[2] - p2[2];
+
+	return dx*dx +  dz *dz;
+}
+
+
+REAL fm_computePlane(const REAL *A,const REAL *B,const REAL *C,REAL *n) // returns D
+{
+	REAL vx = (B[0] - C[0]);
+	REAL vy = (B[1] - C[1]);
+	REAL vz = (B[2] - C[2]);
+
+	REAL wx = (A[0] - B[0]);
+	REAL wy = (A[1] - B[1]);
+	REAL wz = (A[2] - B[2]);
+
+	REAL vw_x = vy * wz - vz * wy;
+	REAL vw_y = vz * wx - vx * wz;
+	REAL vw_z = vx * wy - vy * wx;
+
+	REAL mag = (REAL)sqrt((vw_x * vw_x) + (vw_y * vw_y) + (vw_z * vw_z));
+
+	if ( mag < 0.000001f )
+	{
+		mag = 0;
+	}
+	else
+	{
+		mag = 1.0f/mag;
+	}
+
+	REAL x = vw_x * mag;
+	REAL y = vw_y * mag;
+	REAL z = vw_z * mag;
+
+
+	REAL D = 0.0f - ((x*A[0])+(y*A[1])+(z*A[2]));
+
+  n[0] = x;
+  n[1] = y;
+  n[2] = z;
+
+	return D;
+}
+
+REAL fm_distToPlane(const REAL *plane,const REAL *p) // computes the distance of this point from the plane.
+{
+  return p[0]*plane[0]+p[1]*plane[1]+p[2]*plane[2]+plane[3];
+}
+
+REAL fm_dot(const REAL *p1,const REAL *p2)
+{
+  return p1[0]*p2[0]+p1[1]*p2[1]+p1[2]*p2[2];
+}
+
+void fm_cross(REAL *cross,const REAL *a,const REAL *b)
+{
+	cross[0] = a[1]*b[2] - a[2]*b[1];
+	cross[1] = a[2]*b[0] - a[0]*b[2];
+	cross[2] = a[0]*b[1] - a[1]*b[0];
+}
+
+REAL fm_computeNormalVector(REAL *n,const REAL *p1,const REAL *p2)
+{
+  n[0] = p2[0] - p1[0];
+  n[1] = p2[1] - p1[1];
+  n[2] = p2[2] - p1[2];
+  return fm_normalize(n);
+}
+
+bool  fm_computeWindingOrder(const REAL *p1,const REAL *p2,const REAL *p3) // returns true if the triangle is clockwise.
+{
+  bool ret = false;
+
+  REAL v1[3];
+  REAL v2[3];
+
+  fm_computeNormalVector(v1,p1,p2); // p2-p1 (as vector) and then normalized
+  fm_computeNormalVector(v2,p1,p3); // p3-p1 (as vector) and then normalized
+
+  REAL cross[3];
+
+  fm_cross(cross, v1, v2 );
+  REAL ref[3] = { 1, 0, 0 };
+
+  REAL d = fm_dot( cross, ref );
+
+
+  if ( d <= 0 )
+	ret = false;
+  else
+	ret = true;
+
+  return ret;
+}
+
+REAL fm_normalize(REAL *n) // normalize this vector
+{
+  REAL dist = (REAL)sqrt(n[0]*n[0] + n[1]*n[1] + n[2]*n[2]);
+  if ( dist > 0.0000001f )
+  {
+	REAL mag = 1.0f / dist;
+	n[0]*=mag;
+	n[1]*=mag;
+	n[2]*=mag;
+  }
+  else
+  {
+	n[0] = 1;
+	n[1] = 0;
+	n[2] = 0;
+  }
+
+  return dist;
+}
+
+
+void  fm_matrixMultiply(const REAL *pA,const REAL *pB,REAL *pM)
+{
+#if 1
+
+  REAL a = pA[0*4+0] * pB[0*4+0] + pA[0*4+1] * pB[1*4+0] + pA[0*4+2] * pB[2*4+0] + pA[0*4+3] * pB[3*4+0];
+  REAL b = pA[0*4+0] * pB[0*4+1] + pA[0*4+1] * pB[1*4+1] + pA[0*4+2] * pB[2*4+1] + pA[0*4+3] * pB[3*4+1];
+  REAL c = pA[0*4+0] * pB[0*4+2] + pA[0*4+1] * pB[1*4+2] + pA[0*4+2] * pB[2*4+2] + pA[0*4+3] * pB[3*4+2];
+  REAL d = pA[0*4+0] * pB[0*4+3] + pA[0*4+1] * pB[1*4+3] + pA[0*4+2] * pB[2*4+3] + pA[0*4+3] * pB[3*4+3];
+
+  REAL e = pA[1*4+0] * pB[0*4+0] + pA[1*4+1] * pB[1*4+0] + pA[1*4+2] * pB[2*4+0] + pA[1*4+3] * pB[3*4+0];
+  REAL f = pA[1*4+0] * pB[0*4+1] + pA[1*4+1] * pB[1*4+1] + pA[1*4+2] * pB[2*4+1] + pA[1*4+3] * pB[3*4+1];
+  REAL g = pA[1*4+0] * pB[0*4+2] + pA[1*4+1] * pB[1*4+2] + pA[1*4+2] * pB[2*4+2] + pA[1*4+3] * pB[3*4+2];
+  REAL h = pA[1*4+0] * pB[0*4+3] + pA[1*4+1] * pB[1*4+3] + pA[1*4+2] * pB[2*4+3] + pA[1*4+3] * pB[3*4+3];
+
+  REAL i = pA[2*4+0] * pB[0*4+0] + pA[2*4+1] * pB[1*4+0] + pA[2*4+2] * pB[2*4+0] + pA[2*4+3] * pB[3*4+0];
+  REAL j = pA[2*4+0] * pB[0*4+1] + pA[2*4+1] * pB[1*4+1] + pA[2*4+2] * pB[2*4+1] + pA[2*4+3] * pB[3*4+1];
+  REAL k = pA[2*4+0] * pB[0*4+2] + pA[2*4+1] * pB[1*4+2] + pA[2*4+2] * pB[2*4+2] + pA[2*4+3] * pB[3*4+2];
+  REAL l = pA[2*4+0] * pB[0*4+3] + pA[2*4+1] * pB[1*4+3] + pA[2*4+2] * pB[2*4+3] + pA[2*4+3] * pB[3*4+3];
+
+  REAL m = pA[3*4+0] * pB[0*4+0] + pA[3*4+1] * pB[1*4+0] + pA[3*4+2] * pB[2*4+0] + pA[3*4+3] * pB[3*4+0];
+  REAL n = pA[3*4+0] * pB[0*4+1] + pA[3*4+1] * pB[1*4+1] + pA[3*4+2] * pB[2*4+1] + pA[3*4+3] * pB[3*4+1];
+  REAL o = pA[3*4+0] * pB[0*4+2] + pA[3*4+1] * pB[1*4+2] + pA[3*4+2] * pB[2*4+2] + pA[3*4+3] * pB[3*4+2];
+  REAL p = pA[3*4+0] * pB[0*4+3] + pA[3*4+1] * pB[1*4+3] + pA[3*4+2] * pB[2*4+3] + pA[3*4+3] * pB[3*4+3];
+
+  pM[0] = a;
+  pM[1] = b;
+  pM[2] = c;
+  pM[3] = d;
+
+  pM[4] = e;
+  pM[5] = f;
+  pM[6] = g;
+  pM[7] = h;
+
+  pM[8] = i;
+  pM[9] = j;
+  pM[10] = k;
+  pM[11] = l;
+
+  pM[12] = m;
+  pM[13] = n;
+  pM[14] = o;
+  pM[15] = p;
+
+
+#else
+	memset(pM, 0, sizeof(REAL)*16);
+	for(int32_t i=0; i<4; i++ )
+		for(int32_t j=0; j<4; j++ )
+			for(int32_t k=0; k<4; k++ )
+				pM[4*i+j] +=  pA[4*i+k] * pB[4*k+j];
+#endif
+}
+
+
+void  fm_eulerToQuatDX(REAL x,REAL y,REAL z,REAL *quat) // convert euler angles to quaternion using the fucked up DirectX method
+{
+  REAL matrix[16];
+  fm_eulerToMatrix(x,y,z,matrix);
+  fm_matrixToQuat(matrix,quat);
+}
+
+// implementation copied from: http://blogs.msdn.com/mikepelton/archive/2004/10/29/249501.aspx
+void  fm_eulerToMatrixDX(REAL x,REAL y,REAL z,REAL *matrix) // convert euler angles to quaternion using the fucked up DirectX method.
+{
+  fm_identity(matrix);
+  matrix[0*4+0] = (REAL)(cos(z)*cos(y) + sin(z)*sin(x)*sin(y));
+  matrix[0*4+1] = (REAL)(sin(z)*cos(x));
+  matrix[0*4+2] = (REAL)(cos(z)*-sin(y) + sin(z)*sin(x)*cos(y));
+
+  matrix[1*4+0] = (REAL)(-sin(z)*cos(y)+cos(z)*sin(x)*sin(y));
+  matrix[1*4+1] = (REAL)(cos(z)*cos(x));
+  matrix[1*4+2] = (REAL)(sin(z)*sin(y) +cos(z)*sin(x)*cos(y));
+
+  matrix[2*4+0] = (REAL)(cos(x)*sin(y));
+  matrix[2*4+1] = (REAL)(-sin(x));
+  matrix[2*4+2] = (REAL)(cos(x)*cos(y));
+}
+
+
+void  fm_scale(REAL x,REAL y,REAL z,REAL *fscale) // apply scale to the matrix.
+{
+  fscale[0*4+0] = x;
+  fscale[1*4+1] = y;
+  fscale[2*4+2] = z;
+}
+
+
+void  fm_composeTransform(const REAL *position,const REAL *quat,const REAL *scale,REAL *matrix)
+{
+  fm_identity(matrix);
+  fm_quatToMatrix(quat,matrix);
+
+  if ( scale && ( scale[0] != 1 || scale[1] != 1 || scale[2] != 1 ) )
+  {
+	REAL work[16];
+	memcpy(work,matrix,sizeof(REAL)*16);
+	REAL mscale[16];
+	fm_identity(mscale);
+	fm_scale(scale[0],scale[1],scale[2],mscale);
+	fm_matrixMultiply(work,mscale,matrix);
+  }
+
+  matrix[12] = position[0];
+  matrix[13] = position[1];
+  matrix[14] = position[2];
+}
+
+
+void  fm_setTranslation(const REAL *translation,REAL *matrix)
+{
+  matrix[12] = translation[0];
+  matrix[13] = translation[1];
+  matrix[14] = translation[2];
+}
+
+static REAL enorm0_3d ( REAL x0, REAL y0, REAL z0, REAL x1, REAL y1, REAL z1 )
+
+/**********************************************************************/
+
+/*
+Purpose:
+
+ENORM0_3D computes the Euclidean norm of (P1-P0) in 3D.
+
+Modified:
+
+18 April 1999
+
+Author:
+
+John Burkardt
+
+Parameters:
+
+Input, REAL X0, Y0, Z0, X1, Y1, Z1, the coordinates of the points 
+P0 and P1.
+
+Output, REAL ENORM0_3D, the Euclidean norm of (P1-P0).
+*/
+{
+  REAL value;
+
+  value = (REAL)sqrt (
+	( x1 - x0 ) * ( x1 - x0 ) + 
+	( y1 - y0 ) * ( y1 - y0 ) + 
+	( z1 - z0 ) * ( z1 - z0 ) );
+
+  return value;
+}
+
+
+static REAL triangle_area_3d ( REAL x1, REAL y1, REAL z1, REAL x2,REAL y2, REAL z2, REAL x3, REAL y3, REAL z3 )
+
+						/**********************************************************************/
+
+						/*
+						Purpose:
+
+						TRIANGLE_AREA_3D computes the area of a triangle in 3D.
+
+						Modified:
+
+						22 April 1999
+
+						Author:
+
+						John Burkardt
+
+						Parameters:
+
+						Input, REAL X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3, the (X,Y,Z)
+						coordinates of the corners of the triangle.
+
+						Output, REAL TRIANGLE_AREA_3D, the area of the triangle.
+						*/
+{
+  REAL a;
+  REAL alpha;
+  REAL area;
+  REAL b;
+  REAL base;
+  REAL c;
+  REAL dot;
+  REAL height;
+  /*
+  Find the projection of (P3-P1) onto (P2-P1).
+  */
+  dot = 
+	( x2 - x1 ) * ( x3 - x1 ) +
+	( y2 - y1 ) * ( y3 - y1 ) +
+	( z2 - z1 ) * ( z3 - z1 );
+
+  base = enorm0_3d ( x1, y1, z1, x2, y2, z2 );
+  /*
+  The height of the triangle is the length of (P3-P1) after its
+  projection onto (P2-P1) has been subtracted.
+  */
+  if ( base == 0.0 ) {
+
+	height = 0.0;
+
+  }
+  else {
+
+	alpha = dot / ( base * base );
+
+	a = x3 - x1 - alpha * ( x2 - x1 );
+	b = y3 - y1 - alpha * ( y2 - y1 );
+	c = z3 - z1 - alpha * ( z2 - z1 );
+
+	height = (REAL)sqrt ( a * a + b * b + c * c );
+
+  }
+
+  area = 0.5f * base * height;
+
+  return area;
+}
+
+
+REAL fm_computeArea(const REAL *p1,const REAL *p2,const REAL *p3)
+{
+  REAL ret = 0;
+
+  ret = triangle_area_3d(p1[0],p1[1],p1[2],p2[0],p2[1],p2[2],p3[0],p3[1],p3[2]);
+
+  return ret;
+}
+
+
+void  fm_lerp(const REAL *p1,const REAL *p2,REAL *dest,REAL lerpValue)
+{
+  dest[0] = ((p2[0] - p1[0])*lerpValue) + p1[0];
+  dest[1] = ((p2[1] - p1[1])*lerpValue) + p1[1];
+  dest[2] = ((p2[2] - p1[2])*lerpValue) + p1[2];
+}
+
+bool fm_pointTestXZ(const REAL *p,const REAL *i,const REAL *j)
+{
+  bool ret = false;
+
+  if (((( i[2] <= p[2] ) && ( p[2]  < j[2] )) || (( j[2] <= p[2] ) && ( p[2]  < i[2] ))) && ( p[0] < (j[0] - i[0]) * (p[2] - i[2]) / (j[2] - i[2]) + i[0]))
+	ret = true;
+
+  return ret;
+};
+
+
+bool  fm_insideTriangleXZ(const REAL *p,const REAL *p1,const REAL *p2,const REAL *p3)
+{
+  bool ret = false;
+
+  int32_t c = 0;
+  if ( fm_pointTestXZ(p,p1,p2) ) c = !c;
+  if ( fm_pointTestXZ(p,p2,p3) ) c = !c;
+  if ( fm_pointTestXZ(p,p3,p1) ) c = !c;
+  if ( c ) ret = true;
+
+  return ret;
+}
+
+bool  fm_insideAABB(const REAL *pos,const REAL *bmin,const REAL *bmax)
+{
+  bool ret = false;
+
+  if ( pos[0] >= bmin[0] && pos[0] <= bmax[0] &&
+	   pos[1] >= bmin[1] && pos[1] <= bmax[1] &&
+	   pos[2] >= bmin[2] && pos[2] <= bmax[2] )
+	ret = true;
+
+  return ret;
+}
+
+
+uint32_t fm_clipTestPoint(const REAL *bmin,const REAL *bmax,const REAL *pos)
+{
+  uint32_t ret = 0;
+
+  if ( pos[0] < bmin[0] )
+	ret|=FMCS_XMIN;
+  else if ( pos[0] > bmax[0] )
+	ret|=FMCS_XMAX;
+
+  if ( pos[1] < bmin[1] )
+	ret|=FMCS_YMIN;
+  else if ( pos[1] > bmax[1] )
+	ret|=FMCS_YMAX;
+
+  if ( pos[2] < bmin[2] )
+	ret|=FMCS_ZMIN;
+  else if ( pos[2] > bmax[2] )
+	ret|=FMCS_ZMAX;
+
+  return ret;
+}
+
+uint32_t fm_clipTestPointXZ(const REAL *bmin,const REAL *bmax,const REAL *pos) // only tests X and Z, not Y
+{
+  uint32_t ret = 0;
+
+  if ( pos[0] < bmin[0] )
+	ret|=FMCS_XMIN;
+  else if ( pos[0] > bmax[0] )
+	ret|=FMCS_XMAX;
+
+  if ( pos[2] < bmin[2] )
+	ret|=FMCS_ZMIN;
+  else if ( pos[2] > bmax[2] )
+	ret|=FMCS_ZMAX;
+
+  return ret;
+}
+
+uint32_t fm_clipTestAABB(const REAL *bmin,const REAL *bmax,const REAL *p1,const REAL *p2,const REAL *p3,uint32_t &andCode)
+{
+  uint32_t orCode = 0;
+
+  andCode = FMCS_XMIN | FMCS_XMAX | FMCS_YMIN | FMCS_YMAX | FMCS_ZMIN | FMCS_ZMAX;
+
+  uint32_t c = fm_clipTestPoint(bmin,bmax,p1);
+  orCode|=c;
+  andCode&=c;
+
+  c = fm_clipTestPoint(bmin,bmax,p2);
+  orCode|=c;
+  andCode&=c;
+
+  c = fm_clipTestPoint(bmin,bmax,p3);
+  orCode|=c;
+  andCode&=c;
+
+  return orCode;
+}
+
+bool intersect(const REAL *si,const REAL *ei,const REAL *bmin,const REAL *bmax,REAL *time)
+{
+  REAL st,et,fst = 0,fet = 1;
+
+  for (int32_t i = 0; i < 3; i++)
+  {
+	if (*si < *ei)
+	{
+	  if (*si > *bmax || *ei < *bmin)
+		return false;
+	  REAL di = *ei - *si;
+	  st = (*si < *bmin)? (*bmin - *si) / di: 0;
+	  et = (*ei > *bmax)? (*bmax - *si) / di: 1;
+	}
+	else
+	{
+	  if (*ei > *bmax || *si < *bmin)
+		return false;
+	  REAL di = *ei - *si;
+	  st = (*si > *bmax)? (*bmax - *si) / di: 0;
+	  et = (*ei < *bmin)? (*bmin - *si) / di: 1;
+	}
+
+	if (st > fst) fst = st;
+	if (et < fet) fet = et;
+	if (fet < fst)
+	  return false;
+	bmin++; bmax++;
+	si++; ei++;
+  }
+
+  *time = fst;
+  return true;
+}
+
+
+
+bool fm_lineTestAABB(const REAL *p1,const REAL *p2,const REAL *bmin,const REAL *bmax,REAL &time)
+{
+  bool sect = intersect(p1,p2,bmin,bmax,&time);
+  return sect;
+}
+
+
+bool fm_lineTestAABBXZ(const REAL *p1,const REAL *p2,const REAL *bmin,const REAL *bmax,REAL &time)
+{
+  REAL _bmin[3];
+  REAL _bmax[3];
+
+  _bmin[0] = bmin[0];
+  _bmin[1] = -1e9;
+  _bmin[2] = bmin[2];
+
+  _bmax[0] = bmax[0];
+  _bmax[1] = 1e9;
+  _bmax[2] = bmax[2];
+
+  bool sect = intersect(p1,p2,_bmin,_bmax,&time);
+
+  return sect;
+}
+
+void  fm_minmax(const REAL *p,REAL *bmin,REAL *bmax) // accumulate to a min-max value
+{
+
+  if ( p[0] < bmin[0] ) bmin[0] = p[0];
+  if ( p[1] < bmin[1] ) bmin[1] = p[1];
+  if ( p[2] < bmin[2] ) bmin[2] = p[2];
+
+  if ( p[0] > bmax[0] ) bmax[0] = p[0];
+  if ( p[1] > bmax[1] ) bmax[1] = p[1];
+  if ( p[2] > bmax[2] ) bmax[2] = p[2];
+
+}
+
+REAL fm_solveX(const REAL *plane,REAL y,REAL z) // solve for X given this plane equation and the other two components.
+{
+  REAL x = (y*plane[1]+z*plane[2]+plane[3]) / -plane[0];
+  return x;
+}
+
+REAL fm_solveY(const REAL *plane,REAL x,REAL z) // solve for Y given this plane equation and the other two components.
+{
+  REAL y = (x*plane[0]+z*plane[2]+plane[3]) / -plane[1];
+  return y;
+}
+
+
+REAL fm_solveZ(const REAL *plane,REAL x,REAL y) // solve for Y given this plane equation and the other two components.
+{
+  REAL z = (x*plane[0]+y*plane[1]+plane[3]) / -plane[2];
+  return z;
+}
+
+
+void  fm_getAABBCenter(const REAL *bmin,const REAL *bmax,REAL *center)
+{
+  center[0] = (bmax[0]-bmin[0])*0.5f+bmin[0];
+  center[1] = (bmax[1]-bmin[1])*0.5f+bmin[1];
+  center[2] = (bmax[2]-bmin[2])*0.5f+bmin[2];
+}
+
+FM_Axis fm_getDominantAxis(const REAL normal[3])
+{
+  FM_Axis ret = FM_XAXIS;
+
+  REAL x = (REAL)fabs(normal[0]);
+  REAL y = (REAL)fabs(normal[1]);
+  REAL z = (REAL)fabs(normal[2]);
+
+  if ( y > x && y > z )
+	ret = FM_YAXIS;
+  else if ( z > x && z > y )
+	ret = FM_ZAXIS;
+
+  return ret;
+}
+
+
+bool fm_lineSphereIntersect(const REAL *center,REAL radius,const REAL *p1,const REAL *p2,REAL *intersect)
+{
+  bool ret = false;
+
+  REAL dir[3];
+
+  dir[0] = p2[0]-p1[0];
+  dir[1] = p2[1]-p1[1];
+  dir[2] = p2[2]-p1[2];
+
+  REAL distance = (REAL)sqrt( dir[0]*dir[0]+dir[1]*dir[1]+dir[2]*dir[2]);
+
+  if ( distance > 0 )
+  {
+	REAL recip = 1.0f / distance;
+	dir[0]*=recip;
+	dir[1]*=recip;
+	dir[2]*=recip;
+	ret = fm_raySphereIntersect(center,radius,p1,dir,distance,intersect);
+  }
+  else
+  {
+	dir[0] = center[0]-p1[0];
+	dir[1] = center[1]-p1[1];
+	dir[2] = center[2]-p1[2];
+	REAL d2 = dir[0]*dir[0]+dir[1]*dir[1]+dir[2]*dir[2];
+	REAL r2 = radius*radius;
+	if ( d2 < r2 )
+	{
+	  ret = true;
+	  if ( intersect )
+	  {
+		intersect[0] = p1[0];
+		intersect[1] = p1[1];
+		intersect[2] = p1[2];
+	  }
+	}
+  }
+  return ret;
+}
+
+#define DOT(p1,p2) (p1[0]*p2[0]+p1[1]*p2[1]+p1[2]*p2[2])
+
+bool fm_raySphereIntersect(const REAL *center,REAL radius,const REAL *pos,const REAL *dir,REAL distance,REAL *intersect)
+{
+  bool ret = false;
+
+  REAL E0[3];
+
+  E0[0] = center[0] - pos[0];
+  E0[1] = center[1] - pos[1];
+  E0[2] = center[2] - pos[2];
+
+  REAL V[3];
+
+  V[0]  = dir[0];
+  V[1]  = dir[1];
+  V[2]  = dir[2];
+
+
+  REAL dist2   = E0[0]*E0[0] + E0[1]*E0[1] + E0[2] * E0[2];
+  REAL radius2 = radius*radius; // radius squared..
+
+  // Bug Fix For Gem, if origin is *inside* the sphere, invert the
+  // direction vector so that we get a valid intersection location.
+  if ( dist2 < radius2 )
+  {
+	V[0]*=-1;
+	V[1]*=-1;
+	V[2]*=-1;
+  }
+
+
+	REAL v = DOT(E0,V);
+
+	REAL disc = radius2 - (dist2 - v*v);
+
+	if (disc > 0.0f)
+	{
+		if ( intersect )
+		{
+		  REAL d = (REAL)sqrt(disc);
+	  REAL diff = v-d;
+	  if ( diff < distance )
+	  {
+		intersect[0] = pos[0]+V[0]*diff;
+		intersect[1] = pos[1]+V[1]*diff;
+		intersect[2] = pos[2]+V[2]*diff;
+		ret = true;
+	  }
+	}
+	}
+
+	return ret;
+}
+
+
+void fm_catmullRom(REAL *out_vector,const REAL *p1,const REAL *p2,const REAL *p3,const REAL *p4, const REAL s)
+{
+  REAL s_squared = s * s;
+  REAL s_cubed = s_squared * s;
+
+  REAL coefficient_p1 = -s_cubed + 2*s_squared - s;
+  REAL coefficient_p2 = 3 * s_cubed - 5 * s_squared + 2;
+  REAL coefficient_p3 = -3 * s_cubed +4 * s_squared + s;
+  REAL coefficient_p4 = s_cubed - s_squared;
+
+  out_vector[0] = (coefficient_p1 * p1[0] + coefficient_p2 * p2[0] + coefficient_p3 * p3[0] + coefficient_p4 * p4[0])*0.5f;
+  out_vector[1] = (coefficient_p1 * p1[1] + coefficient_p2 * p2[1] + coefficient_p3 * p3[1] + coefficient_p4 * p4[1])*0.5f;
+  out_vector[2] = (coefficient_p1 * p1[2] + coefficient_p2 * p2[2] + coefficient_p3 * p3[2] + coefficient_p4 * p4[2])*0.5f;
+}
+
+bool fm_intersectAABB(const REAL *bmin1,const REAL *bmax1,const REAL *bmin2,const REAL *bmax2)
+{
+  if ((bmin1[0] > bmax2[0]) || (bmin2[0] > bmax1[0])) return false;
+  if ((bmin1[1] > bmax2[1]) || (bmin2[1] > bmax1[1])) return false;
+  if ((bmin1[2] > bmax2[2]) || (bmin2[2] > bmax1[2])) return false;
+  return true;
+
+}
+
+bool  fm_insideAABB(const REAL *obmin,const REAL *obmax,const REAL *tbmin,const REAL *tbmax) // test if bounding box tbmin/tmbax is fully inside obmin/obmax
+{
+  bool ret = false;
+
+  if ( tbmax[0] <= obmax[0] &&
+	   tbmax[1] <= obmax[1] &&
+	   tbmax[2] <= obmax[2] &&
+	   tbmin[0] >= obmin[0] &&
+	   tbmin[1] >= obmin[1] &&
+	   tbmin[2] >= obmin[2] ) ret = true;
+
+  return ret;
+}
+
+
+// Reference, from Stan Melax in Game Gems I
+//  Quaternion q;
+//  vector3 c = CrossProduct(v0,v1);
+//  REAL   d = DotProduct(v0,v1);
+//  REAL   s = (REAL)sqrt((1+d)*2);
+//  q.x = c.x / s;
+//  q.y = c.y / s;
+//  q.z = c.z / s;
+//  q.w = s /2.0f;
+//  return q;
+void fm_rotationArc(const REAL *v0,const REAL *v1,REAL *quat)
+{
+  REAL cross[3];
+
+  fm_cross(cross,v0,v1);
+  REAL d = fm_dot(v0,v1);
+
+  if( d<= -0.99999f ) // 180 about x axis
+  {
+	  if ( fabsf((float)v0[0]) < 0.1f )
+	  {
+		  quat[0] = 0;
+		  quat[1] = v0[2];
+		  quat[2] = -v0[1];
+		  quat[3] = 0;
+	  }
+	  else
+	  {
+		  quat[0] = v0[1];
+		  quat[1] = -v0[0];
+		  quat[2] = 0;
+		  quat[3] = 0;
+	  }
+	  REAL magnitudeSquared = quat[0]*quat[0] + quat[1]*quat[1] + quat[2]*quat[2] + quat[3]*quat[3];
+	  REAL magnitude = sqrtf((float)magnitudeSquared);
+	  REAL recip = 1.0f / magnitude;
+	  quat[0]*=recip;
+	  quat[1]*=recip;
+	  quat[2]*=recip;
+	  quat[3]*=recip;
+  }
+  else
+  {
+	  REAL s = (REAL)sqrt((1+d)*2);
+	  REAL recip = 1.0f / s;
+
+	  quat[0] = cross[0] * recip;
+	  quat[1] = cross[1] * recip;
+	  quat[2] = cross[2] * recip;
+	  quat[3] = s * 0.5f;
+  }
+}
+
+
+REAL fm_distancePointLineSegment(const REAL *Point,const REAL *LineStart,const REAL *LineEnd,REAL *intersection,LineSegmentType &type,REAL epsilon)
+{
+  REAL ret;
+
+  REAL LineMag = fm_distance( LineEnd, LineStart );
+
+  if ( LineMag > 0 )
+  {
+	REAL U = ( ( ( Point[0] - LineStart[0] ) * ( LineEnd[0] - LineStart[0] ) ) + ( ( Point[1] - LineStart[1] ) * ( LineEnd[1] - LineStart[1] ) ) + ( ( Point[2] - LineStart[2] ) * ( LineEnd[2] - LineStart[2] ) ) ) / ( LineMag * LineMag );
+	if( U < 0.0f || U > 1.0f )
+	{
+	  REAL d1 = fm_distanceSquared(Point,LineStart);
+	  REAL d2 = fm_distanceSquared(Point,LineEnd);
+	  if ( d1 <= d2 )
+	  {
+		ret = (REAL)sqrt(d1);
+		intersection[0] = LineStart[0];
+		intersection[1] = LineStart[1];
+		intersection[2] = LineStart[2];
+		type = LS_START;
+	  }
+	  else
+	  {
+		ret = (REAL)sqrt(d2);
+		intersection[0] = LineEnd[0];
+		intersection[1] = LineEnd[1];
+		intersection[2] = LineEnd[2];
+		type = LS_END;
+	  }
+	}
+	else
+	{
+	  intersection[0] = LineStart[0] + U * ( LineEnd[0] - LineStart[0] );
+	  intersection[1] = LineStart[1] + U * ( LineEnd[1] - LineStart[1] );
+	  intersection[2] = LineStart[2] + U * ( LineEnd[2] - LineStart[2] );
+
+	  ret = fm_distance(Point,intersection);
+
+	  REAL d1 = fm_distanceSquared(intersection,LineStart);
+	  REAL d2 = fm_distanceSquared(intersection,LineEnd);
+	  REAL mag = (epsilon*2)*(epsilon*2);
+
+	  if ( d1 < mag ) // if less than 1/100th the total distance, treat is as the 'start'
+	  {
+		type = LS_START;
+	  }
+	  else if ( d2 < mag )
+	  {
+		type = LS_END;
+	  }
+	  else
+	  {
+		type = LS_MIDDLE;
+	  }
+
+	}
+  }
+  else
+  {
+	ret = LineMag;
+	intersection[0] = LineEnd[0];
+	intersection[1] = LineEnd[1];
+	intersection[2] = LineEnd[2];
+	type = LS_END;
+  }
+
+  return ret;
+}
+
+
+#ifndef BEST_FIT_PLANE_H
+
+#define BEST_FIT_PLANE_H
+
+template <class Type> class Eigen
+{
+public:
+
+
+  void DecrSortEigenStuff(void)
+  {
+	Tridiagonal(); //diagonalize the matrix.
+	QLAlgorithm(); //
+	DecreasingSort();
+	GuaranteeRotation();
+  }
+
+  void Tridiagonal(void)
+  {
+	Type fM00 = mElement[0][0];
+	Type fM01 = mElement[0][1];
+	Type fM02 = mElement[0][2];
+	Type fM11 = mElement[1][1];
+	Type fM12 = mElement[1][2];
+	Type fM22 = mElement[2][2];
+
+	m_afDiag[0] = fM00;
+	m_afSubd[2] = 0;
+	if (fM02 != (Type)0.0)
+	{
+	  Type fLength = (REAL)sqrt(fM01*fM01+fM02*fM02);
+	  Type fInvLength = ((Type)1.0)/fLength;
+	  fM01 *= fInvLength;
+	  fM02 *= fInvLength;
+	  Type fQ = ((Type)2.0)*fM01*fM12+fM02*(fM22-fM11);
+	  m_afDiag[1] = fM11+fM02*fQ;
+	  m_afDiag[2] = fM22-fM02*fQ;
+	  m_afSubd[0] = fLength;
+	  m_afSubd[1] = fM12-fM01*fQ;
+	  mElement[0][0] = (Type)1.0;
+	  mElement[0][1] = (Type)0.0;
+	  mElement[0][2] = (Type)0.0;
+	  mElement[1][0] = (Type)0.0;
+	  mElement[1][1] = fM01;
+	  mElement[1][2] = fM02;
+	  mElement[2][0] = (Type)0.0;
+	  mElement[2][1] = fM02;
+	  mElement[2][2] = -fM01;
+	  m_bIsRotation = false;
+	}
+	else
+	{
+	  m_afDiag[1] = fM11;
+	  m_afDiag[2] = fM22;
+	  m_afSubd[0] = fM01;
+	  m_afSubd[1] = fM12;
+	  mElement[0][0] = (Type)1.0;
+	  mElement[0][1] = (Type)0.0;
+	  mElement[0][2] = (Type)0.0;
+	  mElement[1][0] = (Type)0.0;
+	  mElement[1][1] = (Type)1.0;
+	  mElement[1][2] = (Type)0.0;
+	  mElement[2][0] = (Type)0.0;
+	  mElement[2][1] = (Type)0.0;
+	  mElement[2][2] = (Type)1.0;
+	  m_bIsRotation = true;
+	}
+  }
+
+  bool QLAlgorithm(void)
+  {
+	const int32_t iMaxIter = 32;
+
+	for (int32_t i0 = 0; i0 <3; i0++)
+	{
+	  int32_t i1;
+	  for (i1 = 0; i1 < iMaxIter; i1++)
+	  {
+		int32_t i2;
+		for (i2 = i0; i2 <= (3-2); i2++)
+		{
+		  Type fTmp = Type(fabs(m_afDiag[i2]) + fabs(m_afDiag[i2+1]));
+		  if ( fabs(m_afSubd[i2]) + fTmp == fTmp )
+			break;
+		}
+		if (i2 == i0)
+		{
+		  break;
+		}
+
+		Type fG = (m_afDiag[i0+1] - m_afDiag[i0])/(((Type)2.0) * m_afSubd[i0]);
+		Type fR = (REAL)sqrt(fG*fG+(Type)1.0);
+		if (fG < (Type)0.0)
+		{
+		  fG = m_afDiag[i2]-m_afDiag[i0]+m_afSubd[i0]/(fG-fR);
+		}
+		else
+		{
+		  fG = m_afDiag[i2]-m_afDiag[i0]+m_afSubd[i0]/(fG+fR);
+		}
+		Type fSin = (Type)1.0, fCos = (Type)1.0, fP = (Type)0.0;
+		for (int32_t i3 = i2-1; i3 >= i0; i3--)
+		{
+		  Type fF = fSin*m_afSubd[i3];
+		  Type fB = fCos*m_afSubd[i3];
+		  if (fabs(fF) >= fabs(fG))
+		  {
+			fCos = fG/fF;
+			fR = (REAL)sqrt(fCos*fCos+(Type)1.0);
+			m_afSubd[i3+1] = fF*fR;
+			fSin = ((Type)1.0)/fR;
+			fCos *= fSin;
+		  }
+		  else
+		  {
+			fSin = fF/fG;
+			fR = (REAL)sqrt(fSin*fSin+(Type)1.0);
+			m_afSubd[i3+1] = fG*fR;
+			fCos = ((Type)1.0)/fR;
+			fSin *= fCos;
+		  }
+		  fG = m_afDiag[i3+1]-fP;
+		  fR = (m_afDiag[i3]-fG)*fSin+((Type)2.0)*fB*fCos;
+		  fP = fSin*fR;
+		  m_afDiag[i3+1] = fG+fP;
+		  fG = fCos*fR-fB;
+		  for (int32_t i4 = 0; i4 < 3; i4++)
+		  {
+			fF = mElement[i4][i3+1];
+			mElement[i4][i3+1] = fSin*mElement[i4][i3]+fCos*fF;
+			mElement[i4][i3] = fCos*mElement[i4][i3]-fSin*fF;
+		  }
+		}
+		m_afDiag[i0] -= fP;
+		m_afSubd[i0] = fG;
+		m_afSubd[i2] = (Type)0.0;
+	  }
+	  if (i1 == iMaxIter)
+	  {
+		return false;
+	  }
+	}
+	return true;
+  }
+
+  void DecreasingSort(void)
+  {
+	//sort eigenvalues in decreasing order, e[0] >= ... >= e[iSize-1]
+	for (int32_t i0 = 0, i1; i0 <= 3-2; i0++)
+	{
+	  // locate maximum eigenvalue
+	  i1 = i0;
+	  Type fMax = m_afDiag[i1];
+	  int32_t i2;
+	  for (i2 = i0+1; i2 < 3; i2++)
+	  {
+		if (m_afDiag[i2] > fMax)
+		{
+		  i1 = i2;
+		  fMax = m_afDiag[i1];
+		}
+	  }
+
+	  if (i1 != i0)
+	  {
+		// swap eigenvalues
+		m_afDiag[i1] = m_afDiag[i0];
+		m_afDiag[i0] = fMax;
+		// swap eigenvectors
+		for (i2 = 0; i2 < 3; i2++)
+		{
+		  Type fTmp = mElement[i2][i0];
+		  mElement[i2][i0] = mElement[i2][i1];
+		  mElement[i2][i1] = fTmp;
+		  m_bIsRotation = !m_bIsRotation;
+		}
+	  }
+	}
+  }
+
+
+  void GuaranteeRotation(void)
+  {
+	if (!m_bIsRotation)
+	{
+	  // change sign on the first column
+	  for (int32_t iRow = 0; iRow <3; iRow++)
+	  {
+		mElement[iRow][0] = -mElement[iRow][0];
+	  }
+	}
+  }
+
+  Type mElement[3][3];
+  Type m_afDiag[3];
+  Type m_afSubd[3];
+  bool m_bIsRotation;
+};
+
+#endif
+
+bool fm_computeBestFitPlane(uint32_t vcount,
+					 const REAL *points,
+					 uint32_t vstride,
+					 const REAL *weights,
+					 uint32_t wstride,
+					 REAL *plane,
+					REAL *center)
+{
+  bool ret = false;
+
+  REAL kOrigin[3] = { 0, 0, 0 };
+
+  REAL wtotal = 0;
+
+  {
+	const char *source  = (const char *) points;
+	const char *wsource = (const char *) weights;
+
+	for (uint32_t i=0; i<vcount; i++)
+	{
+
+	  const REAL *p = (const REAL *) source;
+
+	  REAL w = 1;
+
+	  if ( wsource )
+	  {
+		const REAL *ws = (const REAL *) wsource;
+		w = *ws; //
+		wsource+=wstride;
+	  }
+
+	  kOrigin[0]+=p[0]*w;
+	  kOrigin[1]+=p[1]*w;
+	  kOrigin[2]+=p[2]*w;
+
+	  wtotal+=w;
+
+	  source+=vstride;
+	}
+  }
+
+  REAL recip = 1.0f / wtotal; // reciprocal of total weighting
+
+  kOrigin[0]*=recip;
+  kOrigin[1]*=recip;
+  kOrigin[2]*=recip;
+
+  center[0] = kOrigin[0];
+  center[1] = kOrigin[1];
+  center[2] = kOrigin[2];
+
+
+  REAL fSumXX=0;
+  REAL fSumXY=0;
+  REAL fSumXZ=0;
+
+  REAL fSumYY=0;
+  REAL fSumYZ=0;
+  REAL fSumZZ=0;
+
+
+  {
+	const char *source  = (const char *) points;
+	const char *wsource = (const char *) weights;
+
+	for (uint32_t i=0; i<vcount; i++)
+	{
+
+	  const REAL *p = (const REAL *) source;
+
+	  REAL w = 1;
+
+	  if ( wsource )
+	  {
+		const REAL *ws = (const REAL *) wsource;
+		w = *ws; //
+		wsource+=wstride;
+	  }
+
+	  REAL kDiff[3];
+
+	  kDiff[0] = w*(p[0] - kOrigin[0]); // apply vertex weighting!
+	  kDiff[1] = w*(p[1] - kOrigin[1]);
+	  kDiff[2] = w*(p[2] - kOrigin[2]);
+
+	  fSumXX+= kDiff[0] * kDiff[0]; // sum of the squares of the differences.
+	  fSumXY+= kDiff[0] * kDiff[1]; // sum of the squares of the differences.
+	  fSumXZ+= kDiff[0] * kDiff[2]; // sum of the squares of the differences.
+
+	  fSumYY+= kDiff[1] * kDiff[1];
+	  fSumYZ+= kDiff[1] * kDiff[2];
+	  fSumZZ+= kDiff[2] * kDiff[2];
+
+
+	  source+=vstride;
+	}
+  }
+
+  fSumXX *= recip;
+  fSumXY *= recip;
+  fSumXZ *= recip;
+  fSumYY *= recip;
+  fSumYZ *= recip;
+  fSumZZ *= recip;
+
+  // setup the eigensolver
+  Eigen<REAL> kES;
+
+  kES.mElement[0][0] = fSumXX;
+  kES.mElement[0][1] = fSumXY;
+  kES.mElement[0][2] = fSumXZ;
+
+  kES.mElement[1][0] = fSumXY;
+  kES.mElement[1][1] = fSumYY;
+  kES.mElement[1][2] = fSumYZ;
+
+  kES.mElement[2][0] = fSumXZ;
+  kES.mElement[2][1] = fSumYZ;
+  kES.mElement[2][2] = fSumZZ;
+
+  // compute eigenstuff, smallest eigenvalue is in last position
+  kES.DecrSortEigenStuff();
+
+  REAL kNormal[3];
+
+  kNormal[0] = kES.mElement[0][2];
+  kNormal[1] = kES.mElement[1][2];
+  kNormal[2] = kES.mElement[2][2];
+
+  // the minimum energy
+  plane[0] = kNormal[0];
+  plane[1] = kNormal[1];
+  plane[2] = kNormal[2];
+
+  plane[3] = 0 - fm_dot(kNormal,kOrigin);
+
+  ret = true;
+
+  return ret;
+}
+
+
+bool fm_colinear(const REAL a1[3],const REAL a2[3],const REAL b1[3],const REAL b2[3],REAL epsilon)  // true if these two line segments are co-linear.
+{
+  bool ret = false;
+
+  REAL dir1[3];
+  REAL dir2[3];
+
+  dir1[0] = (a2[0] - a1[0]);
+  dir1[1] = (a2[1] - a1[1]);
+  dir1[2] = (a2[2] - a1[2]);
+
+  dir2[0] = (b2[0]-a1[0]) - (b1[0]-a1[0]);
+  dir2[1] = (b2[1]-a1[1]) - (b1[1]-a1[1]);
+  dir2[2] = (b2[2]-a2[2]) - (b1[2]-a2[2]);
+
+  fm_normalize(dir1);
+  fm_normalize(dir2);
+
+  REAL dot = fm_dot(dir1,dir2);
+
+  if ( dot >= epsilon )
+  {
+	ret = true;
+  }
+
+
+  return ret;
+}
+
+bool fm_colinear(const REAL *p1,const REAL *p2,const REAL *p3,REAL epsilon)
+{
+  bool ret = false;
+
+  REAL dir1[3];
+  REAL dir2[3];
+
+  dir1[0] = p2[0] - p1[0];
+  dir1[1] = p2[1] - p1[1];
+  dir1[2] = p2[2] - p1[2];
+
+  dir2[0] = p3[0] - p2[0];
+  dir2[1] = p3[1] - p2[1];
+  dir2[2] = p3[2] - p2[2];
+
+  fm_normalize(dir1);
+  fm_normalize(dir2);
+
+  REAL dot = fm_dot(dir1,dir2);
+
+  if ( dot >= epsilon )
+  {
+	ret = true;
+  }
+
+
+  return ret;
+}
+
+void  fm_initMinMax(const REAL *p,REAL *bmin,REAL *bmax)
+{
+  bmax[0] = bmin[0] = p[0];
+  bmax[1] = bmin[1] = p[1];
+  bmax[2] = bmin[2] = p[2];
+}
+
+IntersectResult fm_intersectLineSegments2d(const REAL *a1,const REAL *a2,const REAL *b1,const REAL *b2,REAL *intersection)
+{
+  IntersectResult ret;
+
+  REAL denom  = ((b2[1] - b1[1])*(a2[0] - a1[0])) - ((b2[0] - b1[0])*(a2[1] - a1[1]));
+  REAL nume_a = ((b2[0] - b1[0])*(a1[1] - b1[1])) - ((b2[1] - b1[1])*(a1[0] - b1[0]));
+  REAL nume_b = ((a2[0] - a1[0])*(a1[1] - b1[1])) - ((a2[1] - a1[1])*(a1[0] - b1[0]));
+  if (denom == 0 )
+  {
+	if(nume_a == 0 && nume_b == 0)
+	{
+	  ret = IR_COINCIDENT;
+	}
+	else
+	{
+	  ret = IR_PARALLEL;
+	}
+  }
+  else
+  {
+
+	REAL recip = 1 / denom;
+	REAL ua = nume_a * recip;
+	REAL ub = nume_b * recip;
+
+	if(ua >= 0 && ua <= 1 && ub >= 0 && ub <= 1 )
+	{
+	  // Get the intersection point.
+	  intersection[0] = a1[0] + ua*(a2[0] - a1[0]);
+	  intersection[1] = a1[1] + ua*(a2[1] - a1[1]);
+	  ret = IR_DO_INTERSECT;
+	}
+	else
+	{
+	  ret = IR_DONT_INTERSECT;
+	}
+  }
+  return ret;
+}
+
+IntersectResult fm_intersectLineSegments2dTime(const REAL *a1,const REAL *a2,const REAL *b1,const REAL *b2,REAL &t1,REAL &t2)
+{
+  IntersectResult ret;
+
+  REAL denom  = ((b2[1] - b1[1])*(a2[0] - a1[0])) - ((b2[0] - b1[0])*(a2[1] - a1[1]));
+  REAL nume_a = ((b2[0] - b1[0])*(a1[1] - b1[1])) - ((b2[1] - b1[1])*(a1[0] - b1[0]));
+  REAL nume_b = ((a2[0] - a1[0])*(a1[1] - b1[1])) - ((a2[1] - a1[1])*(a1[0] - b1[0]));
+  if (denom == 0 )
+  {
+	if(nume_a == 0 && nume_b == 0)
+	{
+	  ret = IR_COINCIDENT;
+	}
+	else
+	{
+	  ret = IR_PARALLEL;
+	}
+  }
+  else
+  {
+
+	REAL recip = 1 / denom;
+	REAL ua = nume_a * recip;
+	REAL ub = nume_b * recip;
+
+	if(ua >= 0 && ua <= 1 && ub >= 0 && ub <= 1 )
+	{
+	  t1 = ua;
+	  t2 = ub;
+	  ret = IR_DO_INTERSECT;
+	}
+	else
+	{
+	  ret = IR_DONT_INTERSECT;
+	}
+  }
+  return ret;
+}
+
+//**** Plane Triangle Intersection
+
+
+
+
+
+// assumes that the points are on opposite sides of the plane!
+bool fm_intersectPointPlane(const REAL *p1,const REAL *p2,REAL *split,const REAL *plane)
+{
+
+  REAL dp1 = fm_distToPlane(plane,p1);
+  REAL dp2 = fm_distToPlane(plane, p2);
+  if (dp1 <= 0 && dp2 <= 0)
+  {
+	  return false;
+  }
+  if (dp1 >= 0 && dp2 >= 0)
+  {
+	  return false;
+  }
+
+  REAL dir[3];
+
+  dir[0] = p2[0] - p1[0];
+  dir[1] = p2[1] - p1[1];
+  dir[2] = p2[2] - p1[2];
+
+  REAL dot1 = dir[0]*plane[0] + dir[1]*plane[1] + dir[2]*plane[2];
+  REAL dot2 = dp1 - plane[3];
+
+  REAL    t = -(plane[3] + dot2 ) / dot1;
+
+  split[0] = (dir[0]*t)+p1[0];
+  split[1] = (dir[1]*t)+p1[1];
+  split[2] = (dir[2]*t)+p1[2];
+
+  return true;
+}
+
+PlaneTriResult fm_getSidePlane(const REAL *p,const REAL *plane,REAL epsilon)
+{
+  PlaneTriResult ret = PTR_ON_PLANE;
+
+  REAL d = fm_distToPlane(plane,p);
+
+  if ( d < -epsilon || d > epsilon )
+  {
+	if ( d > 0 )
+		ret =  PTR_FRONT; // it is 'in front' within the provided epsilon value.
+	else
+	  ret = PTR_BACK;
+  }
+
+  return ret;
+}
+
+
+
+#ifndef PLANE_TRIANGLE_INTERSECTION_H
+
+#define PLANE_TRIANGLE_INTERSECTION_H
+
+#define MAXPTS 256
+
+template <class Type> class point
+{
+public:
+
+  void set(const Type *p)
+  {
+	x = p[0];
+	y = p[1];
+	z = p[2];
+  }
+
+  Type x;
+  Type y;
+  Type z;
+};
+
+template <class Type> class plane
+{
+public:
+  plane(const Type *p)
+  {
+	normal.x = p[0];
+	normal.y = p[1];
+	normal.z = p[2];
+	D        = p[3];
+  }
+
+  Type Classify_Point(const point<Type> &p)
+  {
+	return p.x*normal.x + p.y*normal.y + p.z*normal.z + D;
+  }
+
+  point<Type> normal;
+  Type  D;
+};
+
+template <class Type> class polygon
+{
+public:
+  polygon(void)
+  {
+	mVcount = 0;
+  }
+
+  polygon(const Type *p1,const Type *p2,const Type *p3)
+  {
+	mVcount = 3;
+	mVertices[0].set(p1);
+	mVertices[1].set(p2);
+	mVertices[2].set(p3);
+  }
+
+
+  int32_t NumVertices(void) const { return mVcount; };
+
+  const point<Type>& Vertex(int32_t index)
+  {
+	if ( index < 0 ) index+=mVcount;
+	return mVertices[index];
+  };
+
+
+  void set(const point<Type> *pts,int32_t count)
+  {
+	for (int32_t i=0; i<count; i++)
+	{
+	  mVertices[i] = pts[i];
+	}
+	mVcount = count;
+  }
+
+
+  void Split_Polygon(polygon<Type> *poly,plane<Type> *part, polygon<Type> &front, polygon<Type> &back)
+  {
+	int32_t   count = poly->NumVertices ();
+	int32_t   out_c = 0, in_c = 0;
+	point<Type> ptA, ptB,outpts[MAXPTS],inpts[MAXPTS];
+	Type sideA, sideB;
+	ptA = poly->Vertex (count - 1);
+	sideA = part->Classify_Point (ptA);
+	for (int32_t i = -1; ++i < count;)
+	{
+	  ptB = poly->Vertex(i);
+	  sideB = part->Classify_Point(ptB);
+	  if (sideB > 0)
+	  {
+		if (sideA < 0)
+		{
+			  point<Type> v;
+		  fm_intersectPointPlane(&ptB.x, &ptA.x, &v.x, &part->normal.x );
+		  outpts[out_c++] = inpts[in_c++] = v;
+		}
+		outpts[out_c++] = ptB;
+	  }
+	  else if (sideB < 0)
+	  {
+		if (sideA > 0)
+		{
+		  point<Type> v;
+		  fm_intersectPointPlane(&ptB.x, &ptA.x, &v.x, &part->normal.x );
+		  outpts[out_c++] = inpts[in_c++] = v;
+		}
+		inpts[in_c++] = ptB;
+	  }
+	  else
+		 outpts[out_c++] = inpts[in_c++] = ptB;
+	  ptA = ptB;
+	  sideA = sideB;
+	}
+
+	front.set(&outpts[0], out_c);
+	back.set(&inpts[0], in_c);
+  }
+
+  int32_t           mVcount;
+  point<Type>   mVertices[MAXPTS];
+};
+
+
+
+#endif
+
+static inline void add(const REAL *p,REAL *dest,uint32_t tstride,uint32_t &pcount)
+{
+  char *d = (char *) dest;
+  d = d + pcount*tstride;
+  dest = (REAL *) d;
+  dest[0] = p[0];
+  dest[1] = p[1];
+  dest[2] = p[2];
+  pcount++;
+	assert( pcount <= 4 );
+}
+
+
+PlaneTriResult fm_planeTriIntersection(const REAL *_plane,    // the plane equation in Ax+By+Cz+D format
+									const REAL *triangle, // the source triangle.
+									uint32_t tstride,  // stride in bytes of the input and output *vertices*
+									REAL        epsilon,  // the co-planar epsilon value.
+									REAL       *front,    // the triangle in front of the
+									uint32_t &fcount,  // number of vertices in the 'front' triangle
+									REAL       *back,     // the triangle in back of the plane
+									uint32_t &bcount) // the number of vertices in the 'back' triangle.
+{
+
+  fcount = 0;
+  bcount = 0;
+
+  const char *tsource = (const char *) triangle;
+
+  // get the three vertices of the triangle.
+  const REAL *p1     = (const REAL *) (tsource);
+  const REAL *p2     = (const REAL *) (tsource+tstride);
+  const REAL *p3     = (const REAL *) (tsource+tstride*2);
+
+
+  PlaneTriResult r1   = fm_getSidePlane(p1,_plane,epsilon); // compute the side of the plane each vertex is on
+  PlaneTriResult r2   = fm_getSidePlane(p2,_plane,epsilon);
+  PlaneTriResult r3   = fm_getSidePlane(p3,_plane,epsilon);
+
+  // If any of the points lay right *on* the plane....
+  if ( r1 == PTR_ON_PLANE || r2 == PTR_ON_PLANE || r3 == PTR_ON_PLANE )
+  {
+	// If the triangle is completely co-planar, then just treat it as 'front' and return!
+	if ( r1 == PTR_ON_PLANE && r2 == PTR_ON_PLANE && r3 == PTR_ON_PLANE )
+	{
+	  add(p1,front,tstride,fcount);
+	  add(p2,front,tstride,fcount);
+	  add(p3,front,tstride,fcount);
+	  return PTR_FRONT;
+	}
+	// Decide to place the co-planar points on the same side as the co-planar point.
+	PlaneTriResult r= PTR_ON_PLANE;
+	if ( r1 != PTR_ON_PLANE )
+	  r = r1;
+	else if ( r2 != PTR_ON_PLANE )
+	  r = r2;
+	else if ( r3 != PTR_ON_PLANE )
+	  r = r3;
+
+	if ( r1 == PTR_ON_PLANE ) r1 = r;
+	if ( r2 == PTR_ON_PLANE ) r2 = r;
+	if ( r3 == PTR_ON_PLANE ) r3 = r;
+
+  }
+
+  if ( r1 == r2 && r1 == r3 ) // if all three vertices are on the same side of the plane.
+  {
+	if ( r1 == PTR_FRONT ) // if all three are in front of the plane, then copy to the 'front' output triangle.
+	{
+	  add(p1,front,tstride,fcount);
+	  add(p2,front,tstride,fcount);
+	  add(p3,front,tstride,fcount);
+	}
+	else
+	{
+	  add(p1,back,tstride,bcount); // if all three are in 'back' then copy to the 'back' output triangle.
+	  add(p2,back,tstride,bcount);
+	  add(p3,back,tstride,bcount);
+	}
+	return r1; // if all three points are on the same side of the plane return result
+  }
+
+
+  polygon<REAL> pi(p1,p2,p3);
+  polygon<REAL>  pfront,pback;
+
+  plane<REAL>    part(_plane);
+
+  pi.Split_Polygon(&pi,&part,pfront,pback);
+
+  for (int32_t i=0; i<pfront.mVcount; i++)
+  {
+	add( &pfront.mVertices[i].x, front, tstride, fcount );
+  }
+
+  for (int32_t i=0; i<pback.mVcount; i++)
+  {
+	add( &pback.mVertices[i].x, back, tstride, bcount );
+  }
+
+  PlaneTriResult ret = PTR_SPLIT;
+
+  if ( fcount < 3 ) fcount = 0;
+  if ( bcount < 3 ) bcount = 0;
+
+  if ( fcount == 0 && bcount )
+	ret = PTR_BACK;
+
+  if ( bcount == 0 && fcount )
+	ret = PTR_FRONT;
+
+
+  return ret;
+}
+
+// computes the OBB for this set of points relative to this transform matrix.
+void computeOBB(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *sides,REAL *matrix)
+{
+  const char *src = (const char *) points;
+
+  REAL bmin[3] = { 1e9, 1e9, 1e9 };
+  REAL bmax[3] = { -1e9, -1e9, -1e9 };
+
+  for (uint32_t i=0; i<vcount; i++)
+  {
+	const REAL *p = (const REAL *) src;
+	REAL t[3];
+
+	fm_inverseRT(matrix, p, t ); // inverse rotate translate
+
+	if ( t[0] < bmin[0] ) bmin[0] = t[0];
+	if ( t[1] < bmin[1] ) bmin[1] = t[1];
+	if ( t[2] < bmin[2] ) bmin[2] = t[2];
+
+	if ( t[0] > bmax[0] ) bmax[0] = t[0];
+	if ( t[1] > bmax[1] ) bmax[1] = t[1];
+	if ( t[2] > bmax[2] ) bmax[2] = t[2];
+
+	src+=pstride;
+  }
+
+  REAL center[3];
+
+  sides[0] = bmax[0]-bmin[0];
+  sides[1] = bmax[1]-bmin[1];
+  sides[2] = bmax[2]-bmin[2];
+
+  center[0] = sides[0]*0.5f+bmin[0];
+  center[1] = sides[1]*0.5f+bmin[1];
+  center[2] = sides[2]*0.5f+bmin[2];
+
+  REAL ocenter[3];
+
+  fm_rotate(matrix,center,ocenter);
+
+  matrix[12]+=ocenter[0];
+  matrix[13]+=ocenter[1];
+  matrix[14]+=ocenter[2];
+
+}
+
+void fm_computeBestFitOBB(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *sides,REAL *matrix,bool bruteForce)
+{
+  REAL plane[4];
+  REAL center[3];
+  fm_computeBestFitPlane(vcount,points,pstride,0,0,plane,center);
+  fm_planeToMatrix(plane,matrix);
+  computeOBB( vcount, points, pstride, sides, matrix );
+
+  REAL refmatrix[16];
+  memcpy(refmatrix,matrix,16*sizeof(REAL));
+
+  REAL volume = sides[0]*sides[1]*sides[2];
+  if ( bruteForce )
+  {
+	for (REAL a=10; a<180; a+=10)
+	{
+	  REAL quat[4];
+	  fm_eulerToQuat(0,a*FM_DEG_TO_RAD,0,quat);
+	  REAL temp[16];
+	  REAL pmatrix[16];
+	  fm_quatToMatrix(quat,temp);
+	  fm_matrixMultiply(temp,refmatrix,pmatrix);
+	  REAL psides[3];
+	  computeOBB( vcount, points, pstride, psides, pmatrix );
+	  REAL v = psides[0]*psides[1]*psides[2];
+	  if ( v < volume )
+	  {
+		volume = v;
+		memcpy(matrix,pmatrix,sizeof(REAL)*16);
+		sides[0] = psides[0];
+		sides[1] = psides[1];
+		sides[2] = psides[2];
+	  }
+	}
+  }
+}
+
+void fm_computeBestFitOBB(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *sides,REAL *pos,REAL *quat,bool bruteForce)
+{
+  REAL matrix[16];
+  fm_computeBestFitOBB(vcount,points,pstride,sides,matrix,bruteForce);
+  fm_getTranslation(matrix,pos);
+  fm_matrixToQuat(matrix,quat);
+}
+
+void fm_computeBestFitABB(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *sides,REAL *pos)
+{
+	REAL bmin[3];
+	REAL bmax[3];
+
+  bmin[0] = points[0];
+  bmin[1] = points[1];
+  bmin[2] = points[2];
+
+  bmax[0] = points[0];
+  bmax[1] = points[1];
+  bmax[2] = points[2];
+
+	const char *cp = (const char *) points;
+	for (uint32_t i=0; i<vcount; i++)
+	{
+		const REAL *p = (const REAL *) cp;
+
+		if ( p[0] < bmin[0] ) bmin[0] = p[0];
+		if ( p[1] < bmin[1] ) bmin[1] = p[1];
+		if ( p[2] < bmin[2] ) bmin[2] = p[2];
+
+	if ( p[0] > bmax[0] ) bmax[0] = p[0];
+	if ( p[1] > bmax[1] ) bmax[1] = p[1];
+	if ( p[2] > bmax[2] ) bmax[2] = p[2];
+
+	cp+=pstride;
+	}
+
+
+	sides[0] = bmax[0] - bmin[0];
+	sides[1] = bmax[1] - bmin[1];
+	sides[2] = bmax[2] - bmin[2];
+
+	pos[0] = bmin[0]+sides[0]*0.5f;
+	pos[1] = bmin[1]+sides[1]*0.5f;
+	pos[2] = bmin[2]+sides[2]*0.5f;
+
+}
+
+
+void fm_planeToMatrix(const REAL *plane,REAL *matrix) // convert a plane equation to a 4x4 rotation matrix
+{
+  REAL ref[3] = { 0, 1, 0 };
+  REAL quat[4];
+  fm_rotationArc(ref,plane,quat);
+  fm_quatToMatrix(quat,matrix);
+  REAL origin[3] = { 0, -plane[3], 0 };
+  REAL center[3];
+  fm_transform(matrix,origin,center);
+  fm_setTranslation(center,matrix);
+}
+
+void fm_planeToQuat(const REAL *plane,REAL *quat,REAL *pos) // convert a plane equation to a quaternion and translation
+{
+  REAL ref[3] = { 0, 1, 0 };
+  REAL matrix[16];
+  fm_rotationArc(ref,plane,quat);
+  fm_quatToMatrix(quat,matrix);
+  REAL origin[3] = { 0, plane[3], 0 };
+  fm_transform(matrix,origin,pos);
+}
+
+void fm_eulerMatrix(REAL ax,REAL ay,REAL az,REAL *matrix) // convert euler (in radians) to a dest 4x4 matrix (translation set to zero)
+{
+  REAL quat[4];
+  fm_eulerToQuat(ax,ay,az,quat);
+  fm_quatToMatrix(quat,matrix);
+}
+
+
+//**********************************************************
+//**********************************************************
+//**** Vertex Welding
+//**********************************************************
+//**********************************************************
+
+#ifndef VERTEX_INDEX_H
+
+#define VERTEX_INDEX_H
+
+namespace VERTEX_INDEX
+{
+
+class KdTreeNode;
+
+typedef std::vector< KdTreeNode * > KdTreeNodeVector;
+
+enum Axes
+{
+  X_AXIS = 0,
+  Y_AXIS = 1,
+  Z_AXIS = 2
+};
+
+class KdTreeFindNode
+{
+public:
+  KdTreeFindNode(void)
+  {
+	mNode = 0;
+	mDistance = 0;
+  }
+  KdTreeNode  *mNode;
+  double        mDistance;
+};
+
+class KdTreeInterface
+{
+public:
+  virtual const double * getPositionDouble(uint32_t index) const = 0;
+  virtual const float  * getPositionFloat(uint32_t index) const = 0;
+};
+
+class KdTreeNode
+{
+public:
+  KdTreeNode(void)
+  {
+	mIndex = 0;
+	mLeft = 0;
+	mRight = 0;
+  }
+
+  KdTreeNode(uint32_t index)
+  {
+	mIndex = index;
+	mLeft = 0;
+	mRight = 0;
+  };
+
+	~KdTreeNode(void)
+  {
+  }
+
+
+  void addDouble(KdTreeNode *node,Axes dim,const KdTreeInterface *iface)
+  {
+	const double *nodePosition = iface->getPositionDouble( node->mIndex );
+	const double *position     = iface->getPositionDouble( mIndex );
+	switch ( dim )
+	{
+	  case X_AXIS:
+		if ( nodePosition[0] <= position[0] )
+		{
+		  if ( mLeft )
+			mLeft->addDouble(node,Y_AXIS,iface);
+		  else
+			mLeft = node;
+		}
+		else
+		{
+		  if ( mRight )
+			mRight->addDouble(node,Y_AXIS,iface);
+		  else
+			mRight = node;
+		}
+		break;
+	  case Y_AXIS:
+		if ( nodePosition[1] <= position[1] )
+		{
+		  if ( mLeft )
+			mLeft->addDouble(node,Z_AXIS,iface);
+		  else
+			mLeft = node;
+		}
+		else
+		{
+		  if ( mRight )
+			mRight->addDouble(node,Z_AXIS,iface);
+		  else
+			mRight = node;
+		}
+		break;
+	  case Z_AXIS:
+		if ( nodePosition[2] <= position[2] )
+		{
+		  if ( mLeft )
+			mLeft->addDouble(node,X_AXIS,iface);
+		  else
+			mLeft = node;
+		}
+		else
+		{
+		  if ( mRight )
+			mRight->addDouble(node,X_AXIS,iface);
+		  else
+			mRight = node;
+		}
+		break;
+	}
+
+  }
+
+
+  void addFloat(KdTreeNode *node,Axes dim,const KdTreeInterface *iface)
+  {
+	const float *nodePosition = iface->getPositionFloat( node->mIndex );
+	const float *position     = iface->getPositionFloat( mIndex );
+	switch ( dim )
+	{
+	  case X_AXIS:
+		if ( nodePosition[0] <= position[0] )
+		{
+		  if ( mLeft )
+			mLeft->addFloat(node,Y_AXIS,iface);
+		  else
+			mLeft = node;
+		}
+		else
+		{
+		  if ( mRight )
+			mRight->addFloat(node,Y_AXIS,iface);
+		  else
+			mRight = node;
+		}
+		break;
+	  case Y_AXIS:
+		if ( nodePosition[1] <= position[1] )
+		{
+		  if ( mLeft )
+			mLeft->addFloat(node,Z_AXIS,iface);
+		  else
+			mLeft = node;
+		}
+		else
+		{
+		  if ( mRight )
+			mRight->addFloat(node,Z_AXIS,iface);
+		  else
+			mRight = node;
+		}
+		break;
+	  case Z_AXIS:
+		if ( nodePosition[2] <= position[2] )
+		{
+		  if ( mLeft )
+			mLeft->addFloat(node,X_AXIS,iface);
+		  else
+			mLeft = node;
+		}
+		else
+		{
+		  if ( mRight )
+			mRight->addFloat(node,X_AXIS,iface);
+		  else
+			mRight = node;
+		}
+		break;
+	}
+
+  }
+
+
+  uint32_t getIndex(void) const { return mIndex; };
+
+  void search(Axes axis,const double *pos,double radius,uint32_t &count,uint32_t maxObjects,KdTreeFindNode *found,const KdTreeInterface *iface)
+  {
+
+	const double *position = iface->getPositionDouble(mIndex);
+
+	double dx = pos[0] - position[0];
+	double dy = pos[1] - position[1];
+	double dz = pos[2] - position[2];
+
+	KdTreeNode *search1 = 0;
+	KdTreeNode *search2 = 0;
+
+	switch ( axis )
+	{
+	  case X_AXIS:
+	   if ( dx <= 0 )     // JWR  if we are to the left
+	   {
+		search1 = mLeft; // JWR  then search to the left
+		if ( -dx < radius )  // JWR  if distance to the right is less than our search radius, continue on the right as well.
+		  search2 = mRight;
+	   }
+	   else
+	   {
+		 search1 = mRight; // JWR  ok, we go down the left tree
+		 if ( dx < radius ) // JWR  if the distance from the right is less than our search radius
+				search2 = mLeft;
+		}
+		axis = Y_AXIS;
+		break;
+	  case Y_AXIS:
+		if ( dy <= 0 )
+		{
+		  search1 = mLeft;
+		  if ( -dy < radius )
+					search2 = mRight;
+		}
+		else
+		{
+		  search1 = mRight;
+		  if ( dy < radius )
+					search2 = mLeft;
+		}
+		axis = Z_AXIS;
+		break;
+	  case Z_AXIS:
+		if ( dz <= 0 )
+		{
+		  search1 = mLeft;
+		  if ( -dz < radius )
+					search2 = mRight;
+		}
+		else
+		{
+		  search1 = mRight;
+		  if ( dz < radius )
+					search2 = mLeft;
+		}
+		axis = X_AXIS;
+		break;
+	}
+
+	double r2 = radius*radius;
+	double m  = dx*dx+dy*dy+dz*dz;
+
+	if ( m < r2 )
+	{
+	  switch ( count )
+	  {
+		case 0:
+		  found[count].mNode = this;
+		  found[count].mDistance = m;
+		  break;
+		case 1:
+		  if ( m < found[0].mDistance )
+		  {
+			if ( maxObjects == 1 )
+			{
+			  found[0].mNode = this;
+			  found[0].mDistance = m;
+			}
+			else
+			{
+			  found[1] = found[0];
+			  found[0].mNode = this;
+			  found[0].mDistance = m;
+			}
+		  }
+		  else if ( maxObjects > 1)
+		  {
+			found[1].mNode = this;
+			found[1].mDistance = m;
+		  }
+		  break;
+		default:
+		  {
+			bool inserted = false;
+
+			for (uint32_t i=0; i<count; i++)
+			{
+			  if ( m < found[i].mDistance ) // if this one is closer than a pre-existing one...
+			  {
+				// insertion sort...
+				uint32_t scan = count;
+				if ( scan >= maxObjects ) scan=maxObjects-1;
+				for (uint32_t j=scan; j>i; j--)
+				{
+				  found[j] = found[j-1];
+				}
+				found[i].mNode = this;
+				found[i].mDistance = m;
+				inserted = true;
+				break;
+			  }
+			}
+
+			if ( !inserted && count < maxObjects )
+			{
+			  found[count].mNode = this;
+			  found[count].mDistance = m;
+			}
+		  }
+		  break;
+	  }
+	  count++;
+	  if ( count > maxObjects )
+	  {
+		count = maxObjects;
+	  }
+	}
+
+
+	if ( search1 )
+		search1->search( axis, pos,radius, count, maxObjects, found, iface);
+
+	if ( search2 )
+		search2->search( axis, pos,radius, count, maxObjects, found, iface);
+
+  }
+
+  void search(Axes axis,const float *pos,float radius,uint32_t &count,uint32_t maxObjects,KdTreeFindNode *found,const KdTreeInterface *iface)
+  {
+
+	const float *position = iface->getPositionFloat(mIndex);
+
+	float dx = pos[0] - position[0];
+	float dy = pos[1] - position[1];
+	float dz = pos[2] - position[2];
+
+	KdTreeNode *search1 = 0;
+	KdTreeNode *search2 = 0;
+
+	switch ( axis )
+	{
+	  case X_AXIS:
+	   if ( dx <= 0 )     // JWR  if we are to the left
+	   {
+		search1 = mLeft; // JWR  then search to the left
+		if ( -dx < radius )  // JWR  if distance to the right is less than our search radius, continue on the right as well.
+		  search2 = mRight;
+	   }
+	   else
+	   {
+		 search1 = mRight; // JWR  ok, we go down the left tree
+		 if ( dx < radius ) // JWR  if the distance from the right is less than our search radius
+				search2 = mLeft;
+		}
+		axis = Y_AXIS;
+		break;
+	  case Y_AXIS:
+		if ( dy <= 0 )
+		{
+		  search1 = mLeft;
+		  if ( -dy < radius )
+					search2 = mRight;
+		}
+		else
+		{
+		  search1 = mRight;
+		  if ( dy < radius )
+					search2 = mLeft;
+		}
+		axis = Z_AXIS;
+		break;
+	  case Z_AXIS:
+		if ( dz <= 0 )
+		{
+		  search1 = mLeft;
+		  if ( -dz < radius )
+					search2 = mRight;
+		}
+		else
+		{
+		  search1 = mRight;
+		  if ( dz < radius )
+					search2 = mLeft;
+		}
+		axis = X_AXIS;
+		break;
+	}
+
+	float r2 = radius*radius;
+	float m  = dx*dx+dy*dy+dz*dz;
+
+	if ( m < r2 )
+	{
+	  switch ( count )
+	  {
+		case 0:
+		  found[count].mNode = this;
+		  found[count].mDistance = m;
+		  break;
+		case 1:
+		  if ( m < found[0].mDistance )
+		  {
+			if ( maxObjects == 1 )
+			{
+			  found[0].mNode = this;
+			  found[0].mDistance = m;
+			}
+			else
+			{
+			  found[1] = found[0];
+			  found[0].mNode = this;
+			  found[0].mDistance = m;
+			}
+		  }
+		  else if ( maxObjects > 1)
+		  {
+			found[1].mNode = this;
+			found[1].mDistance = m;
+		  }
+		  break;
+		default:
+		  {
+			bool inserted = false;
+
+			for (uint32_t i=0; i<count; i++)
+			{
+			  if ( m < found[i].mDistance ) // if this one is closer than a pre-existing one...
+			  {
+				// insertion sort...
+				uint32_t scan = count;
+				if ( scan >= maxObjects ) scan=maxObjects-1;
+				for (uint32_t j=scan; j>i; j--)
+				{
+				  found[j] = found[j-1];
+				}
+				found[i].mNode = this;
+				found[i].mDistance = m;
+				inserted = true;
+				break;
+			  }
+			}
+
+			if ( !inserted && count < maxObjects )
+			{
+			  found[count].mNode = this;
+			  found[count].mDistance = m;
+			}
+		  }
+		  break;
+	  }
+	  count++;
+	  if ( count > maxObjects )
+	  {
+		count = maxObjects;
+	  }
+	}
+
+
+	if ( search1 )
+		search1->search( axis, pos,radius, count, maxObjects, found, iface);
+
+	if ( search2 )
+		search2->search( axis, pos,radius, count, maxObjects, found, iface);
+
+  }
+
+private:
+
+  void setLeft(KdTreeNode *left) { mLeft = left; };
+  void setRight(KdTreeNode *right) { mRight = right; };
+
+	KdTreeNode *getLeft(void)         { return mLeft; }
+	KdTreeNode *getRight(void)        { return mRight; }
+
+  uint32_t          mIndex;
+  KdTreeNode     *mLeft;
+  KdTreeNode     *mRight;
+};
+
+
+#define MAX_BUNDLE_SIZE 1024  // 1024 nodes at a time, to minimize memory allocation and guarantee that pointers are persistent.
+
+class KdTreeNodeBundle 
+{
+public:
+
+  KdTreeNodeBundle(void)
+  {
+	mNext = 0;
+	mIndex = 0;
+  }
+
+  bool isFull(void) const
+  {
+	return (bool)( mIndex == MAX_BUNDLE_SIZE );
+  }
+
+  KdTreeNode * getNextNode(void)
+  {
+	assert(mIndex<MAX_BUNDLE_SIZE);
+	KdTreeNode *ret = &mNodes[mIndex];
+	mIndex++;
+	return ret;
+  }
+
+  KdTreeNodeBundle  *mNext;
+  uint32_t             mIndex;
+  KdTreeNode         mNodes[MAX_BUNDLE_SIZE];
+};
+
+
+typedef std::vector< double > DoubleVector;
+typedef std::vector< float >  FloatVector;
+
+class KdTree : public KdTreeInterface
+{
+public:
+  KdTree(void)
+  {
+	mRoot = 0;
+	mBundle = 0;
+	mVcount = 0;
+	mUseDouble = false;
+  }
+
+  virtual ~KdTree(void)
+  {
+	reset();
+  }
+
+  const double * getPositionDouble(uint32_t index) const
+  {
+	assert( mUseDouble );
+	assert ( index < mVcount );
+	return  &mVerticesDouble[index*3];
+  }
+
+  const float * getPositionFloat(uint32_t index) const
+  {
+	assert( !mUseDouble );
+	assert ( index < mVcount );
+	return  &mVerticesFloat[index*3];
+  }
+
+  uint32_t search(const double *pos,double radius,uint32_t maxObjects,KdTreeFindNode *found) const
+  {
+	assert( mUseDouble );
+	if ( !mRoot )	return 0;
+	uint32_t count = 0;
+	mRoot->search(X_AXIS,pos,radius,count,maxObjects,found,this);
+	return count;
+  }
+
+  uint32_t search(const float *pos,float radius,uint32_t maxObjects,KdTreeFindNode *found) const
+  {
+	assert( !mUseDouble );
+	if ( !mRoot )	return 0;
+	uint32_t count = 0;
+	mRoot->search(X_AXIS,pos,radius,count,maxObjects,found,this);
+	return count;
+  }
+
+  void reset(void)
+  {
+	mRoot = 0;
+	mVerticesDouble.clear();
+	mVerticesFloat.clear();
+	KdTreeNodeBundle *bundle = mBundle;
+	while ( bundle )
+	{
+	  KdTreeNodeBundle *next = bundle->mNext;
+	  delete bundle;
+	  bundle = next;
+	}
+	mBundle = 0;
+	mVcount = 0;
+  }
+
+  uint32_t add(double x,double y,double z)
+  {
+	assert(mUseDouble);
+	uint32_t ret = mVcount;
+	mVerticesDouble.push_back(x);
+	mVerticesDouble.push_back(y);
+	mVerticesDouble.push_back(z);
+	mVcount++;
+	KdTreeNode *node = getNewNode(ret);
+	if ( mRoot )
+	{
+	  mRoot->addDouble(node,X_AXIS,this);
+	}
+	else
+	{
+	  mRoot = node;
+	}
+	return ret;
+  }
+
+  uint32_t add(float x,float y,float z)
+  {
+	assert(!mUseDouble);
+	uint32_t ret = mVcount;
+	mVerticesFloat.push_back(x);
+	mVerticesFloat.push_back(y);
+	mVerticesFloat.push_back(z);
+	mVcount++;
+	KdTreeNode *node = getNewNode(ret);
+	if ( mRoot )
+	{
+	  mRoot->addFloat(node,X_AXIS,this);
+	}
+	else
+	{
+	  mRoot = node;
+	}
+	return ret;
+  }
+
+  KdTreeNode * getNewNode(uint32_t index)
+  {
+	if ( mBundle == 0 )
+	{
+	  mBundle = new KdTreeNodeBundle;
+	}
+	if ( mBundle->isFull() )
+	{
+	  KdTreeNodeBundle *bundle = new KdTreeNodeBundle;
+	  mBundle->mNext = bundle;
+	  mBundle = bundle;
+	}
+	KdTreeNode *node = mBundle->getNextNode();
+	new ( node ) KdTreeNode(index);
+	return node;
+  }
+
+  uint32_t getNearest(const double *pos,double radius,bool &_found) const // returns the nearest possible neighbor's index.
+  {
+	assert( mUseDouble );
+	uint32_t ret = 0;
+
+	_found = false;
+	KdTreeFindNode found[1];
+	uint32_t count = search(pos,radius,1,found);
+	if ( count )
+	{
+	  KdTreeNode *node = found[0].mNode;
+	  ret = node->getIndex();
+	  _found = true;
+	}
+	return ret;
+  }
+
+  uint32_t getNearest(const float *pos,float radius,bool &_found) const // returns the nearest possible neighbor's index.
+  {
+	assert( !mUseDouble );
+	uint32_t ret = 0;
+
+	_found = false;
+	KdTreeFindNode found[1];
+	uint32_t count = search(pos,radius,1,found);
+	if ( count )
+	{
+	  KdTreeNode *node = found[0].mNode;
+	  ret = node->getIndex();
+	  _found = true;
+	}
+	return ret;
+  }
+
+  const double * getVerticesDouble(void) const
+  {
+	assert( mUseDouble );
+	const double *ret = 0;
+	if ( !mVerticesDouble.empty() )
+	{
+	  ret = &mVerticesDouble[0];
+	}
+	return ret;
+  }
+
+  const float * getVerticesFloat(void) const
+  {
+	assert( !mUseDouble );
+	const float * ret = 0;
+	if ( !mVerticesFloat.empty() )
+	{
+	  ret = &mVerticesFloat[0];
+	}
+	return ret;
+  }
+
+  uint32_t getVcount(void) const { return mVcount; };
+
+  void setUseDouble(bool useDouble)
+  {
+	mUseDouble = useDouble;
+  }
+
+private:
+  bool                    mUseDouble;
+  KdTreeNode             *mRoot;
+  KdTreeNodeBundle       *mBundle;
+  uint32_t                  mVcount;
+  DoubleVector            mVerticesDouble;
+  FloatVector             mVerticesFloat;
+};
+
+}; // end of namespace VERTEX_INDEX
+
+class MyVertexIndex : public fm_VertexIndex
+{
+public:
+  MyVertexIndex(double granularity,bool snapToGrid)
+  {
+	mDoubleGranularity = granularity;
+	mFloatGranularity  = (float)granularity;
+	mSnapToGrid        = snapToGrid;
+	mUseDouble         = true;
+	mKdTree.setUseDouble(true);
+  }
+
+  MyVertexIndex(float granularity,bool snapToGrid)
+  {
+	mDoubleGranularity = granularity;
+	mFloatGranularity  = (float)granularity;
+	mSnapToGrid        = snapToGrid;
+	mUseDouble         = false;
+	mKdTree.setUseDouble(false);
+  }
+
+  virtual ~MyVertexIndex(void)
+  {
+
+  }
+
+
+  double snapToGrid(double p)
+  {
+	double m = fmod(p,mDoubleGranularity);
+	p-=m;
+	return p;
+  }
+
+  float snapToGrid(float p)
+  {
+	float m = fmodf(p,mFloatGranularity);
+	p-=m;
+	return p;
+  }
+
+  uint32_t    getIndex(const float *_p,bool &newPos)  // get index for a vector float
+  {
+	uint32_t ret;
+
+	if ( mUseDouble )
+	{
+	  double p[3];
+	  p[0] = _p[0];
+	  p[1] = _p[1];
+	  p[2] = _p[2];
+	  return getIndex(p,newPos);
+	}
+
+	newPos = false;
+
+	float p[3];
+
+	if ( mSnapToGrid )
+	{
+	  p[0] = snapToGrid(_p[0]);
+	  p[1] = snapToGrid(_p[1]);
+	  p[2] = snapToGrid(_p[2]);
+	}
+	else
+	{
+	  p[0] = _p[0];
+	  p[1] = _p[1];
+	  p[2] = _p[2];
+	}
+
+	bool found;
+	ret = mKdTree.getNearest(p,mFloatGranularity,found);
+	if ( !found )
+	{
+	  newPos = true;
+	  ret = mKdTree.add(p[0],p[1],p[2]);
+	}
+
+
+	return ret;
+  }
+
+  uint32_t    getIndex(const double *_p,bool &newPos)  // get index for a vector double
+  {
+	uint32_t ret;
+
+	if ( !mUseDouble )
+	{
+	  float p[3];
+	  p[0] = (float)_p[0];
+	  p[1] = (float)_p[1];
+	  p[2] = (float)_p[2];
+	  return getIndex(p,newPos);
+	}
+
+	newPos = false;
+
+	double p[3];
+
+	if ( mSnapToGrid )
+	{
+	  p[0] = snapToGrid(_p[0]);
+	  p[1] = snapToGrid(_p[1]);
+	  p[2] = snapToGrid(_p[2]);
+	}
+	else
+	{
+	  p[0] = _p[0];
+	  p[1] = _p[1];
+	  p[2] = _p[2];
+	}
+
+	bool found;
+	ret = mKdTree.getNearest(p,mDoubleGranularity,found);
+	if ( !found )
+	{
+	  newPos = true;
+	  ret = mKdTree.add(p[0],p[1],p[2]);
+	}
+
+
+	return ret;
+  }
+
+  const float *   getVerticesFloat(void) const
+  {
+	const float * ret = 0;
+
+	assert( !mUseDouble );
+
+	ret = mKdTree.getVerticesFloat();
+
+	return ret;
+  }
+
+  const double *  getVerticesDouble(void) const
+  {
+	const double * ret = 0;
+
+	assert( mUseDouble );
+
+	ret = mKdTree.getVerticesDouble();
+
+	return ret;
+  }
+
+  const float *   getVertexFloat(uint32_t index) const
+  {
+	const float * ret  = 0;
+	assert( !mUseDouble );
+#ifdef _DEBUG
+	uint32_t vcount = mKdTree.getVcount();
+	assert( index < vcount );
+#endif
+	ret =  mKdTree.getVerticesFloat();
+	ret = &ret[index*3];
+	return ret;
+  }
+
+  const double *   getVertexDouble(uint32_t index) const
+  {
+	const double * ret = 0;
+	assert( mUseDouble );
+#ifdef _DEBUG
+	uint32_t vcount = mKdTree.getVcount();
+	assert( index < vcount );
+#endif
+	ret =  mKdTree.getVerticesDouble();
+	ret = &ret[index*3];
+
+	return ret;
+  }
+
+  uint32_t    getVcount(void) const
+  {
+	return mKdTree.getVcount();
+  }
+
+  bool isDouble(void) const
+  {
+	return mUseDouble;
+  }
+
+
+  bool            saveAsObj(const char *fname,uint32_t tcount,uint32_t *indices)
+  {
+	bool ret = false;
+
+
+	FILE *fph = fopen(fname,"wb");
+	if ( fph )
+	{
+	  ret = true;
+
+	  uint32_t vcount    = getVcount();
+	  if ( mUseDouble )
+	  {
+		const double *v  = getVerticesDouble();
+		for (uint32_t i=0; i<vcount; i++)
+		{
+		  fprintf(fph,"v %0.9f %0.9f %0.9f\r\n", (float)v[0], (float)v[1], (float)v[2] );
+		  v+=3;
+		}
+	  }
+	  else
+	  {
+		const float *v  = getVerticesFloat();
+		for (uint32_t i=0; i<vcount; i++)
+		{
+		  fprintf(fph,"v %0.9f %0.9f %0.9f\r\n", v[0], v[1], v[2] );
+		  v+=3;
+		}
+	  }
+
+	  for (uint32_t i=0; i<tcount; i++)
+	  {
+		uint32_t i1 = *indices++;
+		uint32_t i2 = *indices++;
+		uint32_t i3 = *indices++;
+		fprintf(fph,"f %d %d %d\r\n", i1+1, i2+1, i3+1 );
+	  }
+	  fclose(fph);
+	}
+
+	return ret;
+  }
+
+private:
+  bool    mUseDouble:1;
+  bool    mSnapToGrid:1;
+  double  mDoubleGranularity;
+  float   mFloatGranularity;
+  VERTEX_INDEX::KdTree  mKdTree;
+};
+
+fm_VertexIndex * fm_createVertexIndex(double granularity,bool snapToGrid) // create an indexed vertex system for doubles
+{
+  MyVertexIndex *ret = new MyVertexIndex(granularity,snapToGrid);
+  return static_cast< fm_VertexIndex *>(ret);
+}
+
+fm_VertexIndex * fm_createVertexIndex(float granularity,bool snapToGrid)  // create an indexed vertext system for floats
+{
+  MyVertexIndex *ret = new MyVertexIndex(granularity,snapToGrid);
+  return static_cast< fm_VertexIndex *>(ret);
+}
+
+void          fm_releaseVertexIndex(fm_VertexIndex *vindex)
+{
+  MyVertexIndex *m = static_cast< MyVertexIndex *>(vindex);
+  delete m;
+}
+
+#endif   // END OF VERTEX WELDING CODE
+
+
+REAL fm_computeBestFitAABB(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *bmin,REAL *bmax) // returns the diagonal distance
+{
+
+  const uint8_t *source = (const uint8_t *) points;
+
+	bmin[0] = points[0];
+	bmin[1] = points[1];
+	bmin[2] = points[2];
+
+	bmax[0] = points[0];
+	bmax[1] = points[1];
+	bmax[2] = points[2];
+
+
+  for (uint32_t i=1; i<vcount; i++)
+  {
+	source+=pstride;
+	const REAL *p = (const REAL *) source;
+
+	if ( p[0] < bmin[0] ) bmin[0] = p[0];
+	if ( p[1] < bmin[1] ) bmin[1] = p[1];
+	if ( p[2] < bmin[2] ) bmin[2] = p[2];
+
+		if ( p[0] > bmax[0] ) bmax[0] = p[0];
+		if ( p[1] > bmax[1] ) bmax[1] = p[1];
+		if ( p[2] > bmax[2] ) bmax[2] = p[2];
+
+  }
+
+  REAL dx = bmax[0] - bmin[0];
+  REAL dy = bmax[1] - bmin[1];
+  REAL dz = bmax[2] - bmin[2];
+
+	return (REAL) sqrt( dx*dx + dy*dy + dz*dz );
+
+}
+
+
+
+/* a = b - c */
+#define vector(a,b,c) \
+	(a)[0] = (b)[0] - (c)[0];	\
+	(a)[1] = (b)[1] - (c)[1];	\
+	(a)[2] = (b)[2] - (c)[2];
+
+
+
+#define innerProduct(v,q) \
+		((v)[0] * (q)[0] + \
+		(v)[1] * (q)[1] + \
+		(v)[2] * (q)[2])
+
+#define crossProduct(a,b,c) \
+	(a)[0] = (b)[1] * (c)[2] - (c)[1] * (b)[2]; \
+	(a)[1] = (b)[2] * (c)[0] - (c)[2] * (b)[0]; \
+	(a)[2] = (b)[0] * (c)[1] - (c)[0] * (b)[1];
+
+
+bool fm_lineIntersectsTriangle(const REAL *rayStart,const REAL *rayEnd,const REAL *p1,const REAL *p2,const REAL *p3,REAL *sect)
+{
+	REAL dir[3];
+
+  dir[0] = rayEnd[0] - rayStart[0];
+  dir[1] = rayEnd[1] - rayStart[1];
+  dir[2] = rayEnd[2] - rayStart[2];
+
+  REAL d = (REAL)sqrt(dir[0]*dir[0] + dir[1]*dir[1] + dir[2]*dir[2]);
+  REAL r = 1.0f / d;
+
+  dir[0]*=r;
+  dir[1]*=r;
+  dir[2]*=r;
+
+
+  REAL t;
+
+	bool ret = fm_rayIntersectsTriangle(rayStart, dir, p1, p2, p3, t );
+
+	if ( ret )
+	{
+		if ( t > d )
+		{
+			sect[0] = rayStart[0] + dir[0]*t;
+			sect[1] = rayStart[1] + dir[1]*t;
+			sect[2] = rayStart[2] + dir[2]*t;
+		}
+		else
+		{
+			ret = false;
+		}
+	}
+
+  return ret;
+}
+
+
+
+bool fm_rayIntersectsTriangle(const REAL *p,const REAL *d,const REAL *v0,const REAL *v1,const REAL *v2,REAL &t)
+{
+	REAL e1[3],e2[3],h[3],s[3],q[3];
+	REAL a,f,u,v;
+
+	vector(e1,v1,v0);
+	vector(e2,v2,v0);
+	crossProduct(h,d,e2);
+	a = innerProduct(e1,h);
+
+	if (a > -0.00001 && a < 0.00001)
+		return(false);
+
+	f = 1/a;
+	vector(s,p,v0);
+	u = f * (innerProduct(s,h));
+
+	if (u < 0.0 || u > 1.0)
+		return(false);
+
+	crossProduct(q,s,e1);
+	v = f * innerProduct(d,q);
+	if (v < 0.0 || u + v > 1.0)
+		return(false);
+	// at this stage we can compute t to find out where
+	// the intersection point is on the line
+	t = f * innerProduct(e2,q);
+	if (t > 0) // ray intersection
+		return(true);
+	else // this means that there is a line intersection
+		 // but not a ray intersection
+		 return (false);
+}
+
+
+inline REAL det(const REAL *p1,const REAL *p2,const REAL *p3)
+{
+  return  p1[0]*p2[1]*p3[2] + p2[0]*p3[1]*p1[2] + p3[0]*p1[1]*p2[2] -p1[0]*p3[1]*p2[2] - p2[0]*p1[1]*p3[2] - p3[0]*p2[1]*p1[2];
+}
+
+
+REAL  fm_computeMeshVolume(const REAL *vertices,uint32_t tcount,const uint32_t *indices)
+{
+	REAL volume = 0;
+
+	for (uint32_t i=0; i<tcount; i++,indices+=3)
+	{
+	const REAL *p1 = &vertices[ indices[0]*3 ];
+		const REAL *p2 = &vertices[ indices[1]*3 ];
+		const REAL *p3 = &vertices[ indices[2]*3 ];
+		volume+=det(p1,p2,p3); // compute the volume of the tetrahedra relative to the origin.
+	}
+
+	volume*=(1.0f/6.0f);
+	if ( volume < 0 )
+		volume*=-1;
+	return volume;
+}
+
+
+const REAL * fm_getPoint(const REAL *points,uint32_t pstride,uint32_t index)
+{
+  const uint8_t *scan = (const uint8_t *)points;
+  scan+=(index*pstride);
+  return (REAL *)scan;
+}
+
+
+bool fm_insideTriangle(REAL Ax, REAL Ay,
+					  REAL Bx, REAL By,
+					  REAL Cx, REAL Cy,
+					  REAL Px, REAL Py)
+
+{
+  REAL ax, ay, bx, by, cx, cy, apx, apy, bpx, bpy, cpx, cpy;
+  REAL cCROSSap, bCROSScp, aCROSSbp;
+
+  ax = Cx - Bx;  ay = Cy - By;
+  bx = Ax - Cx;  by = Ay - Cy;
+  cx = Bx - Ax;  cy = By - Ay;
+  apx= Px - Ax;  apy= Py - Ay;
+  bpx= Px - Bx;  bpy= Py - By;
+  cpx= Px - Cx;  cpy= Py - Cy;
+
+  aCROSSbp = ax*bpy - ay*bpx;
+  cCROSSap = cx*apy - cy*apx;
+  bCROSScp = bx*cpy - by*cpx;
+
+  return ((aCROSSbp >= 0.0f) && (bCROSScp >= 0.0f) && (cCROSSap >= 0.0f));
+}
+
+
+REAL fm_areaPolygon2d(uint32_t pcount,const REAL *points,uint32_t pstride)
+{
+  int32_t n = (int32_t)pcount;
+
+  REAL A=0.0f;
+  for(int32_t p=n-1,q=0; q<n; p=q++)
+  {
+	const REAL *p1 = fm_getPoint(points,pstride,p);
+	const REAL *p2 = fm_getPoint(points,pstride,q);
+	A+= p1[0]*p2[1] - p2[0]*p1[1];
+  }
+  return A*0.5f;
+}
+
+
+bool  fm_pointInsidePolygon2d(uint32_t pcount,const REAL *points,uint32_t pstride,const REAL *point,uint32_t xindex,uint32_t yindex)
+{
+  uint32_t j = pcount-1;
+  int32_t oddNodes = 0;
+
+  REAL x = point[xindex];
+  REAL y = point[yindex];
+
+  for (uint32_t i=0; i<pcount; i++)
+  {
+	const REAL *p1 = fm_getPoint(points,pstride,i);
+	const REAL *p2 = fm_getPoint(points,pstride,j);
+
+	REAL x1 = p1[xindex];
+	REAL y1 = p1[yindex];
+
+	REAL x2 = p2[xindex];
+	REAL y2 = p2[yindex];
+
+	if ( (y1 < y && y2 >= y) ||  (y2 < y && y1 >= y) )
+	{
+	  if (x1+(y-y1)/(y2-y1)*(x2-x1)<x)
+	  {
+		oddNodes = 1-oddNodes;
+	  }
+	}
+	j = i;
+  }
+
+  return oddNodes ? true : false;
+}
+
+
+uint32_t fm_consolidatePolygon(uint32_t pcount,const REAL *points,uint32_t pstride,REAL *_dest,REAL epsilon) // collapses co-linear edges.
+{
+  uint32_t ret = 0;
+
+
+  if ( pcount >= 3 )
+  {
+	const REAL *prev = fm_getPoint(points,pstride,pcount-1);
+	const REAL *current = points;
+	const REAL *next    = fm_getPoint(points,pstride,1);
+	REAL *dest = _dest;
+
+	for (uint32_t i=0; i<pcount; i++)
+	{
+
+	  next = (i+1)==pcount ? points : next;
+
+	  if ( !fm_colinear(prev,current,next,epsilon) )
+	  {
+		dest[0] = current[0];
+		dest[1] = current[1];
+		dest[2] = current[2];
+
+		dest+=3;
+		ret++;
+	  }
+
+	  prev = current;
+	  current+=3;
+	  next+=3;
+
+	}
+  }
+
+  return ret;
+}
+
+
+#ifndef RECT3D_TEMPLATE
+
+#define RECT3D_TEMPLATE
+
+template <class T> class Rect3d
+{
+public:
+  Rect3d(void) { };
+
+  Rect3d(const T *bmin,const T *bmax)
+  {
+
+	mMin[0] = bmin[0];
+	mMin[1] = bmin[1];
+	mMin[2] = bmin[2];
+
+	mMax[0] = bmax[0];
+	mMax[1] = bmax[1];
+	mMax[2] = bmax[2];
+
+  }
+
+  void SetMin(const T *bmin)
+  {
+	mMin[0] = bmin[0];
+	mMin[1] = bmin[1];
+	mMin[2] = bmin[2];
+  }
+
+  void SetMax(const T *bmax)
+  {
+	mMax[0] = bmax[0];
+	mMax[1] = bmax[1];
+	mMax[2] = bmax[2];
+  }
+
+	void SetMin(T x,T y,T z)
+	{
+		mMin[0] = x;
+		mMin[1] = y;
+		mMin[2] = z;
+	}
+
+	void SetMax(T x,T y,T z)
+	{
+		mMax[0] = x;
+		mMax[1] = y;
+		mMax[2] = z;
+	}
+
+  T mMin[3];
+  T mMax[3];
+};
+
+#endif
+
+void splitRect(uint32_t axis,
+						   const Rect3d<REAL> &source,
+							 Rect3d<REAL> &b1,
+							 Rect3d<REAL> &b2,
+							 const REAL *midpoint)
+{
+	switch ( axis )
+	{
+		case 0:
+			b1.SetMin(source.mMin);
+			b1.SetMax( midpoint[0], source.mMax[1], source.mMax[2] );
+
+			b2.SetMin( midpoint[0], source.mMin[1], source.mMin[2] );
+			b2.SetMax(source.mMax);
+
+			break;
+		case 1:
+			b1.SetMin(source.mMin);
+			b1.SetMax( source.mMax[0], midpoint[1], source.mMax[2] );
+
+			b2.SetMin( source.mMin[0], midpoint[1], source.mMin[2] );
+			b2.SetMax(source.mMax);
+
+			break;
+		case 2:
+			b1.SetMin(source.mMin);
+			b1.SetMax( source.mMax[0], source.mMax[1], midpoint[2] );
+
+			b2.SetMin( source.mMin[0], source.mMin[1], midpoint[2] );
+			b2.SetMax(source.mMax);
+
+			break;
+	}
+}
+
+bool fm_computeSplitPlane(uint32_t vcount,
+						  const REAL *vertices,
+						  uint32_t /* tcount */,
+						  const uint32_t * /* indices */,
+						  REAL *plane)
+{
+
+  REAL sides[3];
+  REAL matrix[16];
+
+  fm_computeBestFitOBB( vcount, vertices, sizeof(REAL)*3, sides, matrix );
+
+  REAL bmax[3];
+  REAL bmin[3];
+
+  bmax[0] = sides[0]*0.5f;
+  bmax[1] = sides[1]*0.5f;
+  bmax[2] = sides[2]*0.5f;
+
+  bmin[0] = -bmax[0];
+  bmin[1] = -bmax[1];
+  bmin[2] = -bmax[2];
+
+
+  REAL dx = sides[0];
+  REAL dy = sides[1];
+  REAL dz = sides[2];
+
+
+	uint32_t axis = 0;
+
+	if ( dy > dx )
+	{
+		axis = 1;
+	}
+
+	if ( dz > dx && dz > dy )
+	{
+		axis = 2;
+	}
+
+  REAL p1[3];
+  REAL p2[3];
+  REAL p3[3];
+
+  p3[0] = p2[0] = p1[0] = bmin[0] + dx*0.5f;
+  p3[1] = p2[1] = p1[1] = bmin[1] + dy*0.5f;
+  p3[2] = p2[2] = p1[2] = bmin[2] + dz*0.5f;
+
+  Rect3d<REAL> b(bmin,bmax);
+
+  Rect3d<REAL> b1,b2;
+
+  splitRect(axis,b,b1,b2,p1);
+
+
+  switch ( axis )
+  {
+	case 0:
+	  p2[1] = bmin[1];
+	  p2[2] = bmin[2];
+
+	  if ( dz > dy )
+	  {
+		p3[1] = bmax[1];
+		p3[2] = bmin[2];
+	  }
+	  else
+	  {
+		p3[1] = bmin[1];
+		p3[2] = bmax[2];
+	  }
+
+	  break;
+	case 1:
+	  p2[0] = bmin[0];
+	  p2[2] = bmin[2];
+
+	  if ( dx > dz )
+	  {
+		p3[0] = bmax[0];
+		p3[2] = bmin[2];
+	  }
+	  else
+	  {
+		p3[0] = bmin[0];
+		p3[2] = bmax[2];
+	  }
+
+	  break;
+	case 2:
+	  p2[0] = bmin[0];
+	  p2[1] = bmin[1];
+
+	  if ( dx > dy )
+	  {
+		p3[0] = bmax[0];
+		p3[1] = bmin[1];
+	  }
+	  else
+	  {
+		p3[0] = bmin[0];
+		p3[1] = bmax[1];
+	  }
+
+	  break;
+  }
+
+  REAL tp1[3];
+  REAL tp2[3];
+  REAL tp3[3];
+
+  fm_transform(matrix,p1,tp1);
+  fm_transform(matrix,p2,tp2);
+  fm_transform(matrix,p3,tp3);
+
+	plane[3] = fm_computePlane(tp1,tp2,tp3,plane);
+
+  return true;
+
+}
+
+#pragma warning(disable:4100)
+
+void fm_nearestPointInTriangle(const REAL * /*nearestPoint*/,const REAL * /*p1*/,const REAL * /*p2*/,const REAL * /*p3*/,REAL * /*nearest*/)
+{
+
+}
+
+static REAL Partial(const REAL *a,const REAL *p) 
+{
+	return (a[0]*p[1]) - (p[0]*a[1]);
+}
+
+REAL  fm_areaTriangle(const REAL *p0,const REAL *p1,const REAL *p2)
+{
+  REAL A = Partial(p0,p1);
+	A+= Partial(p1,p2);
+	A+= Partial(p2,p0);
+	return A*0.5f;
+}
+
+void fm_subtract(const REAL *A,const REAL *B,REAL *diff) // compute A-B and store the result in 'diff'
+{
+  diff[0] = A[0]-B[0];
+  diff[1] = A[1]-B[1];
+  diff[2] = A[2]-B[2];
+}
+
+
+void  fm_multiplyTransform(const REAL *pA,const REAL *pB,REAL *pM)
+{
+
+  REAL a = pA[0*4+0] * pB[0*4+0] + pA[0*4+1] * pB[1*4+0] + pA[0*4+2] * pB[2*4+0] + pA[0*4+3] * pB[3*4+0];
+  REAL b = pA[0*4+0] * pB[0*4+1] + pA[0*4+1] * pB[1*4+1] + pA[0*4+2] * pB[2*4+1] + pA[0*4+3] * pB[3*4+1];
+  REAL c = pA[0*4+0] * pB[0*4+2] + pA[0*4+1] * pB[1*4+2] + pA[0*4+2] * pB[2*4+2] + pA[0*4+3] * pB[3*4+2];
+  REAL d = pA[0*4+0] * pB[0*4+3] + pA[0*4+1] * pB[1*4+3] + pA[0*4+2] * pB[2*4+3] + pA[0*4+3] * pB[3*4+3];
+
+  REAL e = pA[1*4+0] * pB[0*4+0] + pA[1*4+1] * pB[1*4+0] + pA[1*4+2] * pB[2*4+0] + pA[1*4+3] * pB[3*4+0];
+  REAL f = pA[1*4+0] * pB[0*4+1] + pA[1*4+1] * pB[1*4+1] + pA[1*4+2] * pB[2*4+1] + pA[1*4+3] * pB[3*4+1];
+  REAL g = pA[1*4+0] * pB[0*4+2] + pA[1*4+1] * pB[1*4+2] + pA[1*4+2] * pB[2*4+2] + pA[1*4+3] * pB[3*4+2];
+  REAL h = pA[1*4+0] * pB[0*4+3] + pA[1*4+1] * pB[1*4+3] + pA[1*4+2] * pB[2*4+3] + pA[1*4+3] * pB[3*4+3];
+
+  REAL i = pA[2*4+0] * pB[0*4+0] + pA[2*4+1] * pB[1*4+0] + pA[2*4+2] * pB[2*4+0] + pA[2*4+3] * pB[3*4+0];
+  REAL j = pA[2*4+0] * pB[0*4+1] + pA[2*4+1] * pB[1*4+1] + pA[2*4+2] * pB[2*4+1] + pA[2*4+3] * pB[3*4+1];
+  REAL k = pA[2*4+0] * pB[0*4+2] + pA[2*4+1] * pB[1*4+2] + pA[2*4+2] * pB[2*4+2] + pA[2*4+3] * pB[3*4+2];
+  REAL l = pA[2*4+0] * pB[0*4+3] + pA[2*4+1] * pB[1*4+3] + pA[2*4+2] * pB[2*4+3] + pA[2*4+3] * pB[3*4+3];
+
+  REAL m = pA[3*4+0] * pB[0*4+0] + pA[3*4+1] * pB[1*4+0] + pA[3*4+2] * pB[2*4+0] + pA[3*4+3] * pB[3*4+0];
+  REAL n = pA[3*4+0] * pB[0*4+1] + pA[3*4+1] * pB[1*4+1] + pA[3*4+2] * pB[2*4+1] + pA[3*4+3] * pB[3*4+1];
+  REAL o = pA[3*4+0] * pB[0*4+2] + pA[3*4+1] * pB[1*4+2] + pA[3*4+2] * pB[2*4+2] + pA[3*4+3] * pB[3*4+2];
+  REAL p = pA[3*4+0] * pB[0*4+3] + pA[3*4+1] * pB[1*4+3] + pA[3*4+2] * pB[2*4+3] + pA[3*4+3] * pB[3*4+3];
+
+  pM[0] = a;  pM[1] = b;  pM[2] = c;  pM[3] = d;
+
+  pM[4] = e;  pM[5] = f;  pM[6] = g;  pM[7] = h;
+
+  pM[8] = i;  pM[9] = j;  pM[10] = k;  pM[11] = l;
+
+  pM[12] = m;  pM[13] = n;  pM[14] = o;  pM[15] = p;
+}
+
+void fm_multiply(REAL *A,REAL scalar)
+{
+  A[0]*=scalar;
+  A[1]*=scalar;
+  A[2]*=scalar;
+}
+
+void fm_add(const REAL *A,const REAL *B,REAL *sum)
+{
+  sum[0] = A[0]+B[0];
+  sum[1] = A[1]+B[1];
+  sum[2] = A[2]+B[2];
+}
+
+void fm_copy3(const REAL *source,REAL *dest)
+{
+  dest[0] = source[0];
+  dest[1] = source[1];
+  dest[2] = source[2];
+}
+
+
+uint32_t  fm_copyUniqueVertices(uint32_t vcount,const REAL *input_vertices,REAL *output_vertices,uint32_t tcount,const uint32_t *input_indices,uint32_t *output_indices)
+{
+  uint32_t ret = 0;
+
+  REAL *vertices = (REAL *)malloc(sizeof(REAL)*vcount*3);
+  memcpy(vertices,input_vertices,sizeof(REAL)*vcount*3);
+  REAL *dest = output_vertices;
+
+  uint32_t *reindex = (uint32_t *)malloc(sizeof(uint32_t)*vcount);
+  memset(reindex,0xFF,sizeof(uint32_t)*vcount);
+
+  uint32_t icount = tcount*3;
+
+  for (uint32_t i=0; i<icount; i++)
+  {
+	uint32_t index = *input_indices++;
+
+	assert( index < vcount );
+
+	if ( reindex[index] == 0xFFFFFFFF )
+	{
+	  *output_indices++ = ret;
+	  reindex[index] = ret;
+	  const REAL *pos = &vertices[index*3];
+	  dest[0] = pos[0];
+	  dest[1] = pos[1];
+	  dest[2] = pos[2];
+	  dest+=3;
+	  ret++;
+	}
+	else
+	{
+	  *output_indices++ = reindex[index];
+	}
+  }
+  free(vertices);
+  free(reindex);
+  return ret;
+}
+
+bool    fm_isMeshCoplanar(uint32_t tcount,const uint32_t *indices,const REAL *vertices,bool doubleSided) // returns true if this collection of indexed triangles are co-planar!
+{
+  bool ret = true;
+
+  if ( tcount > 0 )
+  {
+	uint32_t i1 = indices[0];
+	uint32_t i2 = indices[1];
+	uint32_t i3 = indices[2];
+	const REAL *p1 = &vertices[i1*3];
+	const REAL *p2 = &vertices[i2*3];
+	const REAL *p3 = &vertices[i3*3];
+	REAL plane[4];
+	plane[3] = fm_computePlane(p1,p2,p3,plane);
+	const uint32_t *scan = &indices[3];
+	for (uint32_t i=1; i<tcount; i++)
+	{
+	  i1 = *scan++;
+	  i2 = *scan++;
+	  i3 = *scan++;
+	  p1 = &vertices[i1*3];
+	  p2 = &vertices[i2*3];
+	  p3 = &vertices[i3*3];
+	  REAL _plane[4];
+	  _plane[3] = fm_computePlane(p1,p2,p3,_plane);
+	  if ( !fm_samePlane(plane,_plane,0.01f,0.001f,doubleSided) )
+	  {
+		ret = false;
+		break;
+	  }
+	}
+  }
+  return ret;
+}
+
+
+bool fm_samePlane(const REAL p1[4],const REAL p2[4],REAL normalEpsilon,REAL dEpsilon,bool doubleSided)
+{
+  bool ret = false;
+
+#if 0
+  if (p1[0] == p2[0] &&
+	  p1[1] == p2[1] &&
+	  p1[2] == p2[2] &&
+	  p1[3] == p2[3])
+  {
+	  ret = true;
+  }
+#else
+  REAL diff = (REAL) fabs(p1[3]-p2[3]);
+  if ( diff < dEpsilon ) // if the plane -d  co-efficient is within our epsilon
+  {
+	REAL dot = fm_dot(p1,p2); // compute the dot-product of the vector normals.
+	if ( doubleSided ) dot = (REAL)fabs(dot);
+	REAL dmin = 1 - normalEpsilon;
+	REAL dmax = 1 + normalEpsilon;
+	if ( dot >= dmin && dot <= dmax )
+	{
+	  ret = true; // then the plane equation is for practical purposes identical.
+	}
+  }
+#endif
+  return ret;
+}
+
+
+void  fm_initMinMax(REAL bmin[3],REAL bmax[3])
+{
+  bmin[0] = FLT_MAX;
+  bmin[1] = FLT_MAX;
+  bmin[2] = FLT_MAX;
+
+  bmax[0] = -FLT_MAX;
+  bmax[1] = -FLT_MAX;
+  bmax[2] = -FLT_MAX;
+}
+
+void fm_inflateMinMax(REAL bmin[3], REAL bmax[3], REAL ratio)
+{
+	REAL inflate = fm_distance(bmin, bmax)*0.5f*ratio;
+
+	bmin[0] -= inflate;
+	bmin[1] -= inflate;
+	bmin[2] -= inflate;
+
+	bmax[0] += inflate;
+	bmax[1] += inflate;
+	bmax[2] += inflate;
+}
+
+#ifndef TESSELATE_H
+
+#define TESSELATE_H
+
+typedef std::vector< uint32_t > UintVector;
+
+class Myfm_Tesselate : public fm_Tesselate
+{
+public:
+  virtual ~Myfm_Tesselate(void)
+  {
+
+  }
+
+  const uint32_t * tesselate(fm_VertexIndex *vindex,uint32_t tcount,const uint32_t *indices,float longEdge,uint32_t maxDepth,uint32_t &outcount)
+  {
+	const uint32_t *ret = 0;
+
+	mMaxDepth = maxDepth;
+	mLongEdge  = longEdge*longEdge;
+	mLongEdgeD = mLongEdge;
+	mVertices = vindex;
+
+	if ( mVertices->isDouble() )
+	{
+	  uint32_t vcount = mVertices->getVcount();
+	  double *vertices = (double *)malloc(sizeof(double)*vcount*3);
+	  memcpy(vertices,mVertices->getVerticesDouble(),sizeof(double)*vcount*3);
+
+	  for (uint32_t i=0; i<tcount; i++)
+	  {
+		uint32_t i1 = *indices++;
+		uint32_t i2 = *indices++;
+		uint32_t i3 = *indices++;
+
+		const double *p1 = &vertices[i1*3];
+		const double *p2 = &vertices[i2*3];
+		const double *p3 = &vertices[i3*3];
+
+		tesselate(p1,p2,p3,0);
+
+	  }
+	  free(vertices);
+	}
+	else
+	{
+	  uint32_t vcount = mVertices->getVcount();
+	  float *vertices = (float *)malloc(sizeof(float)*vcount*3);
+	  memcpy(vertices,mVertices->getVerticesFloat(),sizeof(float)*vcount*3);
+
+
+	  for (uint32_t i=0; i<tcount; i++)
+	  {
+		uint32_t i1 = *indices++;
+		uint32_t i2 = *indices++;
+		uint32_t i3 = *indices++;
+
+		const float *p1 = &vertices[i1*3];
+		const float *p2 = &vertices[i2*3];
+		const float *p3 = &vertices[i3*3];
+
+		tesselate(p1,p2,p3,0);
+
+	  }
+	  free(vertices);
+	}
+
+	outcount = (uint32_t)(mIndices.size()/3);
+	ret = &mIndices[0];
+
+
+	return ret;
+  }
+
+  void tesselate(const float *p1,const float *p2,const float *p3,uint32_t recurse)
+  {
+	bool split = false;
+	float l1,l2,l3;
+
+	l1 = l2 = l3 = 0;
+
+	if ( recurse < mMaxDepth )
+	{
+	  l1 = fm_distanceSquared(p1,p2);
+		l2 = fm_distanceSquared(p2,p3);
+		l3 = fm_distanceSquared(p3,p1);
+
+	  if (  l1 > mLongEdge || l2 > mLongEdge || l3 > mLongEdge )
+		split = true;
+
+	}
+
+	if ( split )
+	{
+		uint32_t edge;
+
+		if ( l1 >= l2 && l1 >= l3 )
+			edge = 0;
+		else if ( l2 >= l1 && l2 >= l3 )
+			edge = 1;
+		else
+			edge = 2;
+
+			float splits[3];
+
+		switch ( edge )
+		{
+			case 0:
+				{
+			fm_lerp(p1,p2,splits,0.5f);
+			tesselate(p1,splits,p3, recurse+1 );
+			tesselate(splits,p2,p3, recurse+1 );
+				}
+				break;
+			case 1:
+				{
+			fm_lerp(p2,p3,splits,0.5f);
+			tesselate(p1,p2,splits, recurse+1 );
+			tesselate(p1,splits,p3, recurse+1 );
+				}
+				break;
+			case 2:
+				{
+					fm_lerp(p3,p1,splits,0.5f);
+			tesselate(p1,p2,splits, recurse+1 );
+			tesselate(splits,p2,p3, recurse+1 );
+				}
+				break;
+		}
+	}
+	else
+	{
+	  bool newp;
+
+	  uint32_t i1 = mVertices->getIndex(p1,newp);
+	  uint32_t i2 = mVertices->getIndex(p2,newp);
+	  uint32_t i3 = mVertices->getIndex(p3,newp);
+
+	  mIndices.push_back(i1);
+	  mIndices.push_back(i2);
+	  mIndices.push_back(i3);
+	}
+
+  }
+
+  void tesselate(const double *p1,const double *p2,const double *p3,uint32_t recurse)
+  {
+	bool split = false;
+	double l1,l2,l3;
+
+	l1 = l2 = l3 = 0;
+
+	if ( recurse < mMaxDepth )
+	{
+	  l1 = fm_distanceSquared(p1,p2);
+		l2 = fm_distanceSquared(p2,p3);
+		l3 = fm_distanceSquared(p3,p1);
+
+	  if (  l1 > mLongEdgeD || l2 > mLongEdgeD || l3 > mLongEdgeD )
+		split = true;
+
+	}
+
+	if ( split )
+	{
+		uint32_t edge;
+
+		if ( l1 >= l2 && l1 >= l3 )
+			edge = 0;
+		else if ( l2 >= l1 && l2 >= l3 )
+			edge = 1;
+		else
+			edge = 2;
+
+			double splits[3];
+
+		switch ( edge )
+		{
+			case 0:
+				{
+			fm_lerp(p1,p2,splits,0.5);
+			tesselate(p1,splits,p3, recurse+1 );
+			tesselate(splits,p2,p3, recurse+1 );
+				}
+				break;
+			case 1:
+				{
+			fm_lerp(p2,p3,splits,0.5);
+			tesselate(p1,p2,splits, recurse+1 );
+			tesselate(p1,splits,p3, recurse+1 );
+				}
+				break;
+			case 2:
+				{
+					fm_lerp(p3,p1,splits,0.5);
+			tesselate(p1,p2,splits, recurse+1 );
+			tesselate(splits,p2,p3, recurse+1 );
+				}
+				break;
+		}
+	}
+	else
+	{
+	  bool newp;
+
+	  uint32_t i1 = mVertices->getIndex(p1,newp);
+	  uint32_t i2 = mVertices->getIndex(p2,newp);
+	  uint32_t i3 = mVertices->getIndex(p3,newp);
+
+	  mIndices.push_back(i1);
+	  mIndices.push_back(i2);
+	  mIndices.push_back(i3);
+	}
+
+  }
+
+private:
+  float           mLongEdge;
+  double          mLongEdgeD;
+  fm_VertexIndex *mVertices;
+  UintVector    mIndices;
+  uint32_t          mMaxDepth;
+};
+
+fm_Tesselate * fm_createTesselate(void)
+{
+  Myfm_Tesselate *m = new Myfm_Tesselate;
+  return static_cast< fm_Tesselate * >(m);
+}
+
+void           fm_releaseTesselate(fm_Tesselate *t)
+{
+  Myfm_Tesselate *m = static_cast< Myfm_Tesselate *>(t);
+  delete m;
+}
+
+#endif
+
+
+#ifndef RAY_ABB_INTERSECT
+
+#define RAY_ABB_INTERSECT
+
+//! Integer representation of a floating-point value.
+#define IR(x)	((uint32_t&)x)
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+*	A method to compute a ray-AABB intersection.
+*	Original code by Andrew Woo, from "Graphics Gems", Academic Press, 1990
+*	Optimized code by Pierre Terdiman, 2000 (~20-30% faster on my Celeron 500)
+*	Epsilon value added by Klaus Hartmann. (discarding it saves a few cycles only)
+*
+*	Hence this version is faster as well as more robust than the original one.
+*
+*	Should work provided:
+*	1) the integer representation of 0.0f is 0x00000000
+*	2) the sign bit of the float is the most significant one
+*
+*	Report bugs: [email protected]
+*
+*	\param		aabb		[in] the axis-aligned bounding box
+*	\param		origin		[in] ray origin
+*	\param		dir			[in] ray direction
+*	\param		coord		[out] impact coordinates
+*	\return		true if ray intersects AABB
+*/
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#define RAYAABB_EPSILON 0.00001f
+bool fm_intersectRayAABB(const float MinB[3],const float MaxB[3],const float origin[3],const float dir[3],float coord[3])
+{
+  bool Inside = true;
+  float MaxT[3];
+  MaxT[0]=MaxT[1]=MaxT[2]=-1.0f;
+
+  // Find candidate planes.
+  for(uint32_t i=0;i<3;i++)
+  {
+	if(origin[i] < MinB[i])
+	{
+	  coord[i]	= MinB[i];
+	  Inside		= false;
+
+	  // Calculate T distances to candidate planes
+	  if(IR(dir[i]))	MaxT[i] = (MinB[i] - origin[i]) / dir[i];
+	}
+	else if(origin[i] > MaxB[i])
+	{
+	  coord[i]	= MaxB[i];
+	  Inside		= false;
+
+	  // Calculate T distances to candidate planes
+	  if(IR(dir[i]))	MaxT[i] = (MaxB[i] - origin[i]) / dir[i];
+	}
+  }
+
+  // Ray origin inside bounding box
+  if(Inside)
+  {
+	coord[0] = origin[0];
+	coord[1] = origin[1];
+	coord[2] = origin[2];
+	return true;
+  }
+
+  // Get largest of the maxT's for final choice of intersection
+  uint32_t WhichPlane = 0;
+  if(MaxT[1] > MaxT[WhichPlane])	WhichPlane = 1;
+  if(MaxT[2] > MaxT[WhichPlane])	WhichPlane = 2;
+
+  // Check final candidate actually inside box
+  if(IR(MaxT[WhichPlane])&0x80000000) return false;
+
+  for(uint32_t i=0;i<3;i++)
+  {
+	if(i!=WhichPlane)
+	{
+	  coord[i] = origin[i] + MaxT[WhichPlane] * dir[i];
+#ifdef RAYAABB_EPSILON
+	  if(coord[i] < MinB[i] - RAYAABB_EPSILON || coord[i] > MaxB[i] + RAYAABB_EPSILON)	return false;
+#else
+	  if(coord[i] < MinB[i] || coord[i] > MaxB[i])	return false;
+#endif
+	}
+  }
+  return true;	// ray hits box
+}
+
+bool fm_intersectLineSegmentAABB(const float bmin[3],const float bmax[3],const float p1[3],const float p2[3],float intersect[3])
+{
+  bool ret = false;
+
+  float dir[3];
+  dir[0] = p2[0] - p1[0];
+  dir[1] = p2[1] - p1[1];
+  dir[2] = p2[2] - p1[2];
+  float dist = fm_normalize(dir);
+  if ( dist > RAYAABB_EPSILON )
+  {
+	ret = fm_intersectRayAABB(bmin,bmax,p1,dir,intersect);
+	if ( ret )
+	{
+	  float d = fm_distanceSquared(p1,intersect);
+	  if ( d  > (dist*dist) )
+	  {
+		ret = false;
+	  }
+	}
+  }
+  return ret;
+}
+
+#endif
+
+#ifndef OBB_TO_AABB
+
+#define OBB_TO_AABB
+
+#pragma warning(disable:4100)
+
+void    fm_OBBtoAABB(const float /*obmin*/[3],const float /*obmax*/[3],const float /*matrix*/[16],float /*abmin*/[3],float /*abmax*/[3])
+{
+  assert(0); // not yet implemented.
+}
+
+
+const REAL * computePos(uint32_t index,const REAL *vertices,uint32_t vstride)
+{
+  const char *tmp = (const char *)vertices;
+  tmp+=(index*vstride);
+  return (const REAL*)tmp;
+}
+
+void computeNormal(uint32_t index,REAL *normals,uint32_t nstride,const REAL *normal)
+{
+  char *tmp = (char *)normals;
+  tmp+=(index*nstride);
+  REAL *dest = (REAL *)tmp;
+  dest[0]+=normal[0];
+  dest[1]+=normal[1];
+  dest[2]+=normal[2];
+}
+
+void fm_computeMeanNormals(uint32_t vcount,       // the number of vertices
+						   const REAL *vertices,     // the base address of the vertex position data.
+						   uint32_t vstride,      // the stride between position data.
+						   REAL *normals,            // the base address  of the destination for mean vector normals
+						   uint32_t nstride,      // the stride between normals
+						   uint32_t tcount,       // the number of triangles
+						   const uint32_t *indices)     // the triangle indices
+{
+
+  // Step #1 : Zero out the vertex normals
+  char *dest = (char *)normals;
+  for (uint32_t i=0; i<vcount; i++)
+  {
+	REAL *n = (REAL *)dest;
+	n[0] = 0;
+	n[1] = 0;
+	n[2] = 0;
+	dest+=nstride;
+  }
+
+  // Step #2 : Compute the face normals and accumulate them
+  const uint32_t *scan = indices;
+  for (uint32_t i=0; i<tcount; i++)
+  {
+
+	uint32_t i1 = *scan++;
+	uint32_t i2 = *scan++;
+	uint32_t i3 = *scan++;
+
+	const REAL *p1 = computePos(i1,vertices,vstride);
+	const REAL *p2 = computePos(i2,vertices,vstride);
+	const REAL *p3 = computePos(i3,vertices,vstride);
+
+	REAL normal[3];
+	fm_computePlane(p3,p2,p1,normal);
+
+	computeNormal(i1,normals,nstride,normal);
+	computeNormal(i2,normals,nstride,normal);
+	computeNormal(i3,normals,nstride,normal);
+  }
+
+
+  // Normalize the accumulated normals
+  dest = (char *)normals;
+  for (uint32_t i=0; i<vcount; i++)
+  {
+	REAL *n = (REAL *)dest;
+	fm_normalize(n);
+	dest+=nstride;
+  }
+
+}
+
+#endif
+
+
+#define BIGNUMBER 100000000.0  		/* hundred million */
+
+static inline void Set(REAL *n,REAL x,REAL y,REAL z)
+{
+	n[0] = x;
+	n[1] = y;
+	n[2] = z;
+};
+
+static inline void Copy(REAL *dest,const REAL *source)
+{
+	dest[0] = source[0];
+	dest[1] = source[1];
+	dest[2] = source[2];
+}
+
+
+REAL  fm_computeBestFitSphere(uint32_t vcount,const REAL *points,uint32_t pstride,REAL *center)
+{
+	REAL radius;
+	REAL radius2;
+
+	REAL xmin[3];
+	REAL xmax[3];
+	REAL ymin[3];
+	REAL ymax[3];
+	REAL zmin[3];
+	REAL zmax[3];
+	REAL dia1[3];
+	REAL dia2[3];
+
+	/* FIRST PASS: find 6 minima/maxima points */
+	Set(xmin,BIGNUMBER,BIGNUMBER,BIGNUMBER);
+	Set(xmax,-BIGNUMBER,-BIGNUMBER,-BIGNUMBER);
+	Set(ymin,BIGNUMBER,BIGNUMBER,BIGNUMBER);
+	Set(ymax,-BIGNUMBER,-BIGNUMBER,-BIGNUMBER);
+	Set(zmin,BIGNUMBER,BIGNUMBER,BIGNUMBER);
+	Set(zmax,-BIGNUMBER,-BIGNUMBER,-BIGNUMBER);
+
+	{
+		const char *scan = (const char *)points;
+		for (uint32_t i=0; i<vcount; i++)
+		{
+			const REAL *caller_p = (const REAL *)scan;
+			if (caller_p[0]<xmin[0])
+				Copy(xmin,caller_p); /* New xminimum point */
+			if (caller_p[0]>xmax[0])
+				Copy(xmax,caller_p);
+			if (caller_p[1]<ymin[1])
+				Copy(ymin,caller_p);
+			if (caller_p[1]>ymax[1])
+				Copy(ymax,caller_p);
+			if (caller_p[2]<zmin[2])
+				Copy(zmin,caller_p);
+			if (caller_p[2]>zmax[2])
+				Copy(zmax,caller_p);
+			scan+=pstride;
+		}
+	}
+
+	/* Set xspan = distance between the 2 points xmin & xmax (squared) */
+	REAL dx = xmax[0] - xmin[0];
+	REAL dy = xmax[1] - xmin[1];
+	REAL dz = xmax[2] - xmin[2];
+	REAL xspan = dx*dx + dy*dy + dz*dz;
+
+/* Same for y & z spans */
+	dx = ymax[0] - ymin[0];
+	dy = ymax[1] - ymin[1];
+	dz = ymax[2] - ymin[2];
+	REAL yspan = dx*dx + dy*dy + dz*dz;
+
+	dx = zmax[0] - zmin[0];
+	dy = zmax[1] - zmin[1];
+	dz = zmax[2] - zmin[2];
+	REAL zspan = dx*dx + dy*dy + dz*dz;
+
+	/* Set points dia1 & dia2 to the maximally separated pair */
+	Copy(dia1,xmin);
+	Copy(dia2,xmax); /* assume xspan biggest */
+	REAL maxspan = xspan;
+
+	if (yspan>maxspan)
+	{
+		maxspan = yspan;
+		Copy(dia1,ymin);
+		Copy(dia2,ymax);
+	}
+
+	if (zspan>maxspan)
+	{
+		maxspan = zspan;
+		Copy(dia1,zmin);
+		Copy(dia2,zmax);
+	}
+
+
+	/* dia1,dia2 is a diameter of initial sphere */
+	/* calc initial center */
+	center[0] = (dia1[0]+dia2[0])*0.5f;
+	center[1] = (dia1[1]+dia2[1])*0.5f;
+	center[2] = (dia1[2]+dia2[2])*0.5f;
+
+	/* calculate initial radius**2 and radius */
+
+	dx = dia2[0]-center[0]; /* x component of radius vector */
+	dy = dia2[1]-center[1]; /* y component of radius vector */
+	dz = dia2[2]-center[2]; /* z component of radius vector */
+
+	radius2 = dx*dx + dy*dy + dz*dz;
+	radius = REAL(sqrt(radius2));
+
+	/* SECOND PASS: increment current sphere */
+	{
+		const char *scan = (const char *)points;
+		for (uint32_t i=0; i<vcount; i++)
+		{
+			const REAL *caller_p = (const REAL *)scan;
+			dx = caller_p[0]-center[0];
+			dy = caller_p[1]-center[1];
+			dz = caller_p[2]-center[2];
+			REAL old_to_p_sq = dx*dx + dy*dy + dz*dz;
+			if (old_to_p_sq > radius2) 	/* do r**2 test first */
+			{ 	/* this point is outside of current sphere */
+				REAL old_to_p = REAL(sqrt(old_to_p_sq));
+				/* calc radius of new sphere */
+				radius = (radius + old_to_p) * 0.5f;
+				radius2 = radius*radius; 	/* for next r**2 compare */
+				REAL old_to_new = old_to_p - radius;
+				/* calc center of new sphere */
+				REAL recip = 1.0f /old_to_p;
+				REAL cx = (radius*center[0] + old_to_new*caller_p[0]) * recip;
+				REAL cy = (radius*center[1] + old_to_new*caller_p[1]) * recip;
+				REAL cz = (radius*center[2] + old_to_new*caller_p[2]) * recip;
+				Set(center,cx,cy,cz);
+				scan+=pstride;
+			}
+		}
+	}
+	return radius;
+}
+
+
+void fm_computeBestFitCapsule(uint32_t vcount,const REAL *points,uint32_t pstride,REAL &radius,REAL &height,REAL matrix[16],bool bruteForce)
+{
+  REAL sides[3];
+  REAL omatrix[16];
+  fm_computeBestFitOBB(vcount,points,pstride,sides,omatrix,bruteForce);
+
+  int32_t axis = 0;
+  if ( sides[0] > sides[1] && sides[0] > sides[2] )
+	axis = 0;
+  else if ( sides[1] > sides[0] && sides[1] > sides[2] )
+	axis = 1;
+  else 
+	axis = 2;
+
+  REAL localTransform[16];
+
+  REAL maxDist = 0;
+  REAL maxLen = 0;
+
+  switch ( axis )
+  {
+	case 0:
+	  {
+		fm_eulerMatrix(0,0,FM_PI/2,localTransform);
+		fm_matrixMultiply(localTransform,omatrix,matrix);
+
+		const uint8_t *scan = (const uint8_t *)points;
+		for (uint32_t i=0; i<vcount; i++)
+		{
+		  const REAL *p = (const REAL *)scan;
+		  REAL t[3];
+		  fm_inverseRT(omatrix,p,t);
+		  REAL dist = t[1]*t[1]+t[2]*t[2];
+		  if ( dist > maxDist )
+		  {
+			maxDist = dist;
+		  }
+		  REAL l = (REAL) fabs(t[0]);
+		  if ( l > maxLen )
+		  {
+			maxLen = l;
+		  }
+		  scan+=pstride;
+		}
+	  }
+	  height = sides[0];
+	  break;
+	case 1:
+	  {
+		fm_eulerMatrix(0,FM_PI/2,0,localTransform);
+		fm_matrixMultiply(localTransform,omatrix,matrix);
+
+		const uint8_t *scan = (const uint8_t *)points;
+		for (uint32_t i=0; i<vcount; i++)
+		{
+		  const REAL *p = (const REAL *)scan;
+		  REAL t[3];
+		  fm_inverseRT(omatrix,p,t);
+		  REAL dist = t[0]*t[0]+t[2]*t[2];
+		  if ( dist > maxDist )
+		  {
+			maxDist = dist;
+		  }
+		  REAL l = (REAL) fabs(t[1]);
+		  if ( l > maxLen )
+		  {
+			maxLen = l;
+		  }
+		  scan+=pstride;
+		}
+	  }
+	  height = sides[1];
+	  break;
+	case 2:
+	  {
+		fm_eulerMatrix(FM_PI/2,0,0,localTransform);
+		fm_matrixMultiply(localTransform,omatrix,matrix);
+
+		const uint8_t *scan = (const uint8_t *)points;
+		for (uint32_t i=0; i<vcount; i++)
+		{
+		  const REAL *p = (const REAL *)scan;
+		  REAL t[3];
+		  fm_inverseRT(omatrix,p,t);
+		  REAL dist = t[0]*t[0]+t[1]*t[1];
+		  if ( dist > maxDist )
+		  {
+			maxDist = dist;
+		  }
+		  REAL l = (REAL) fabs(t[2]);
+		  if ( l > maxLen )
+		  {
+			maxLen = l;
+		  }
+		  scan+=pstride;
+		}
+	  }
+	  height = sides[2];
+	  break;
+  }
+  radius = (REAL)sqrt(maxDist);
+  height = (maxLen*2)-(radius*2);
+}
+
+
+//************* Triangulation
+
+#ifndef TRIANGULATE_H
+
+#define TRIANGULATE_H
+
+typedef uint32_t TU32;
+
+class TVec
+{
+public:
+	TVec(double _x,double _y,double _z) { x = _x; y = _y; z = _z; };
+	TVec(void) { };
+
+  double x;
+  double y;
+  double z;
+};
+
+typedef std::vector< TVec >  TVecVector;
+typedef std::vector< TU32 >  TU32Vector;
+
+class CTriangulator
+{
+public:
+	///     Default constructor
+	CTriangulator();
+
+	///     Default destructor
+	virtual ~CTriangulator();
+
+	///     Triangulates the contour
+	void triangulate(TU32Vector &indices);
+
+	///     Returns the given point in the triangulator array
+	inline TVec get(const TU32 id) { return mPoints[id]; }
+
+	virtual void reset(void)
+	{
+		mInputPoints.clear();
+		mPoints.clear();
+		mIndices.clear();
+	}
+
+	virtual void addPoint(double x,double y,double z)
+	{
+		TVec v(x,y,z);
+		// update bounding box...
+		if ( mInputPoints.empty() )
+		{
+			mMin = v;
+			mMax = v;
+		}
+		else
+		{
+			if ( x < mMin.x ) mMin.x = x;
+			if ( y < mMin.y ) mMin.y = y;
+			if ( z < mMin.z ) mMin.z = z;
+
+			if ( x > mMax.x ) mMax.x = x;
+			if ( y > mMax.y ) mMax.y = y;
+			if ( z > mMax.z ) mMax.z = z;
+		}
+		mInputPoints.push_back(v);
+	}
+
+	// Triangulation happens in 2d.  We could inverse transform the polygon around the normal direction, or we just use the two most significant axes
+	// Here we find the two longest axes and use them to triangulate.  Inverse transforming them would introduce more doubling point error and isn't worth it.
+	virtual uint32_t * triangulate(uint32_t &tcount,double epsilon)
+	{
+		uint32_t *ret = 0;
+		tcount = 0;
+		mEpsilon = epsilon;
+
+		if ( !mInputPoints.empty() )
+		{
+			mPoints.clear();
+
+		  double dx = mMax.x - mMin.x; // locate the first, second and third longest edges and store them in i1, i2, i3
+		  double dy = mMax.y - mMin.y;
+		  double dz = mMax.z - mMin.z;
+
+		  uint32_t i1,i2,i3;
+
+		  if ( dx > dy && dx > dz )
+		  {
+			  i1 = 0;
+			  if ( dy > dz )
+			  {
+				  i2 = 1;
+				  i3 = 2;
+			  }
+			  else
+			  {
+				  i2 = 2;
+				  i3 = 1;
+			  }
+		  }
+		  else if ( dy > dx && dy > dz )
+		  {
+			  i1 = 1;
+			  if ( dx > dz )
+			  {
+				  i2 = 0;
+				  i3 = 2;
+			  }
+			  else
+			  {
+				  i2 = 2;
+				  i3 = 0;
+			  }
+		  }
+		  else
+		  {
+			  i1 = 2;
+			  if ( dx > dy )
+			  {
+				  i2 = 0;
+				  i3 = 1;
+			  }
+			  else
+			  {
+				  i2 = 1;
+				  i3 = 0;
+			  }
+		  }
+
+		  uint32_t pcount = (uint32_t)mInputPoints.size();
+		  const double *points = &mInputPoints[0].x;
+		  for (uint32_t i=0; i<pcount; i++)
+		  {
+			TVec v( points[i1], points[i2], points[i3] );
+			mPoints.push_back(v);
+			points+=3;
+		  }
+
+		  mIndices.clear();
+		  triangulate(mIndices);
+		  tcount = (uint32_t)mIndices.size()/3;
+		  if ( tcount )
+		  {
+			  ret = &mIndices[0];
+		  }
+		}
+		return ret;
+	}
+
+	virtual const double * getPoint(uint32_t index)
+	{
+		return &mInputPoints[index].x;
+	}
+
+
+private:
+	double                  mEpsilon;
+	TVec                   mMin;
+	TVec                   mMax;
+	TVecVector             mInputPoints;
+	TVecVector             mPoints;
+	TU32Vector             mIndices;
+
+	///     Tests if a point is inside the given triangle
+	bool _insideTriangle(const TVec& A, const TVec& B, const TVec& C,const TVec& P);
+
+	///     Returns the area of the contour
+	double _area();
+
+	bool _snip(int32_t u, int32_t v, int32_t w, int32_t n, int32_t *V);
+
+	///     Processes the triangulation
+	void _process(TU32Vector &indices);
+
+};
+
+///     Default constructor
+CTriangulator::CTriangulator(void)
+{
+}
+
+///     Default destructor
+CTriangulator::~CTriangulator()
+{
+}
+
+///     Triangulates the contour
+void CTriangulator::triangulate(TU32Vector &indices)
+{
+	_process(indices);
+}
+
+///     Processes the triangulation
+void CTriangulator::_process(TU32Vector &indices)
+{
+	const int32_t n = (const int32_t)mPoints.size();
+	if (n < 3)
+		return;
+	int32_t *V = (int32_t *)malloc(sizeof(int32_t)*n);
+
+	bool flipped = false;
+
+	if (0.0f < _area())
+	{
+		for (int32_t v = 0; v < n; v++)
+			V[v] = v;
+	}
+	else
+	{
+		flipped = true;
+		for (int32_t v = 0; v < n; v++)
+			V[v] = (n - 1) - v;
+	}
+
+	int32_t nv = n;
+	int32_t count = 2 * nv;
+	for (int32_t m = 0, v = nv - 1; nv > 2;)
+	{
+		if (0 >= (count--))
+			return;
+
+		int32_t u = v;
+		if (nv <= u)
+			u = 0;
+		v = u + 1;
+		if (nv <= v)
+			v = 0;
+		int32_t w = v + 1;
+		if (nv <= w)
+			w = 0;
+
+		if (_snip(u, v, w, nv, V))
+		{
+			int32_t a, b, c, s, t;
+			a = V[u];
+			b = V[v];
+			c = V[w];
+			if ( flipped )
+			{
+				indices.push_back(a);
+				indices.push_back(b);
+				indices.push_back(c);
+			}
+			else
+			{
+				indices.push_back(c);
+				indices.push_back(b);
+				indices.push_back(a);
+			}
+			m++;
+			for (s = v, t = v + 1; t < nv; s++, t++)
+				V[s] = V[t];
+			nv--;
+			count = 2 * nv;
+		}
+	}
+
+	free(V);
+}
+
+///     Returns the area of the contour
+double CTriangulator::_area()
+{
+	int32_t n = (uint32_t)mPoints.size();
+	double A = 0.0f;
+	for (int32_t p = n - 1, q = 0; q < n; p = q++)
+	{
+		const TVec &pval = mPoints[p];
+		const TVec &qval = mPoints[q];
+		A += pval.x * qval.y - qval.x * pval.y;
+	}
+	A*=0.5f;
+	return A;
+}
+
+bool CTriangulator::_snip(int32_t u, int32_t v, int32_t w, int32_t n, int32_t *V)
+{
+	int32_t p;
+
+	const TVec &A = mPoints[ V[u] ];
+	const TVec &B = mPoints[ V[v] ];
+	const TVec &C = mPoints[ V[w] ];
+
+	if (mEpsilon > (((B.x - A.x) * (C.y - A.y)) - ((B.y - A.y) * (C.x - A.x))) )
+		return false;
+
+	for (p = 0; p < n; p++)
+	{
+		if ((p == u) || (p == v) || (p == w))
+			continue;
+		const TVec &P = mPoints[ V[p] ];
+		if (_insideTriangle(A, B, C, P))
+			return false;
+	}
+	return true;
+}
+
+///     Tests if a point is inside the given triangle
+bool CTriangulator::_insideTriangle(const TVec& A, const TVec& B, const TVec& C,const TVec& P)
+{
+	double ax, ay, bx, by, cx, cy, apx, apy, bpx, bpy, cpx, cpy;
+	double cCROSSap, bCROSScp, aCROSSbp;
+
+	ax = C.x - B.x;  ay = C.y - B.y;
+	bx = A.x - C.x;  by = A.y - C.y;
+	cx = B.x - A.x;  cy = B.y - A.y;
+	apx = P.x - A.x;  apy = P.y - A.y;
+	bpx = P.x - B.x;  bpy = P.y - B.y;
+	cpx = P.x - C.x;  cpy = P.y - C.y;
+
+	aCROSSbp = ax * bpy - ay * bpx;
+	cCROSSap = cx * apy - cy * apx;
+	bCROSScp = bx * cpy - by * cpx;
+
+	return ((aCROSSbp >= 0.0f) && (bCROSScp >= 0.0f) && (cCROSSap >= 0.0f));
+}
+
+class Triangulate : public fm_Triangulate
+{
+public:
+  Triangulate(void)
+  {
+	mPointsFloat = 0;
+	mPointsDouble = 0;
+  }
+
+  virtual ~Triangulate(void)
+  {
+	reset();
+  }
+  void reset(void)
+  {
+	free(mPointsFloat);
+	free(mPointsDouble);
+	mPointsFloat = 0;
+	mPointsDouble = 0;
+  }
+
+  virtual const double *       triangulate3d(uint32_t pcount,
+											 const double *_points,
+											 uint32_t vstride,
+											 uint32_t &tcount,
+											 bool consolidate,
+											 double epsilon)
+  {
+	reset();
+
+	double *points = (double *)malloc(sizeof(double)*pcount*3);
+	if ( consolidate )
+	{
+	  pcount = fm_consolidatePolygon(pcount,_points,vstride,points,1-epsilon);
+	}
+	else
+	{
+	  double *dest = points;
+	  for (uint32_t i=0; i<pcount; i++)
+	  {
+		const double *src = fm_getPoint(_points,vstride,i);
+		dest[0] = src[0];
+		dest[1] = src[1];
+		dest[2] = src[2];
+		dest+=3;
+	  }
+	  vstride = sizeof(double)*3;
+	}
+
+	if ( pcount >= 3 )
+	{
+	  CTriangulator ct;
+	  for (uint32_t i=0; i<pcount; i++)
+	  {
+		const double *src = fm_getPoint(points,vstride,i);
+		ct.addPoint( src[0], src[1], src[2] );
+	  }
+	  uint32_t _tcount;
+	  uint32_t *indices = ct.triangulate(_tcount,epsilon);
+	  if ( indices )
+	  {
+		tcount = _tcount;
+		mPointsDouble = (double *)malloc(sizeof(double)*tcount*3*3);
+		double *dest = mPointsDouble;
+		for (uint32_t i=0; i<tcount; i++)
+		{
+		  uint32_t i1 = indices[i*3+0];
+		  uint32_t i2 = indices[i*3+1];
+		  uint32_t i3 = indices[i*3+2];
+		  const double *p1 = ct.getPoint(i1);
+		  const double *p2 = ct.getPoint(i2);
+		  const double *p3 = ct.getPoint(i3);
+
+		  dest[0] = p1[0];
+		  dest[1] = p1[1];
+		  dest[2] = p1[2];
+
+		  dest[3] = p2[0];
+		  dest[4] = p2[1];
+		  dest[5] = p2[2];
+
+		  dest[6] = p3[0];
+		  dest[7] = p3[1];
+		  dest[8] = p3[2];
+		  dest+=9;
+		}
+	  }
+	}
+	free(points);
+
+	return mPointsDouble;
+  }
+
+  virtual const float  *       triangulate3d(uint32_t pcount,
+											 const float  *points,
+											 uint32_t vstride,
+											 uint32_t &tcount,
+											 bool consolidate,
+											 float epsilon)
+  {
+	reset();
+
+	double *temp = (double *)malloc(sizeof(double)*pcount*3);
+	double *dest = temp;
+	for (uint32_t i=0; i<pcount; i++)
+	{
+	  const float *p = fm_getPoint(points,vstride,i);
+	  dest[0] = p[0];
+	  dest[1] = p[1];
+	  dest[2] = p[2];
+	  dest+=3;
+	}
+	const double *results = triangulate3d(pcount,temp,sizeof(double)*3,tcount,consolidate,epsilon);
+	if ( results )
+	{
+	  uint32_t fcount = tcount*3*3;
+	  mPointsFloat = (float *)malloc(sizeof(float)*tcount*3*3);
+	  for (uint32_t i=0; i<fcount; i++)
+	  {
+		mPointsFloat[i] = (float) results[i];
+	  }
+	  free(mPointsDouble);
+	  mPointsDouble = 0;
+	}
+	free(temp);
+
+	return mPointsFloat;
+  }
+
+private:
+  float *mPointsFloat;
+  double *mPointsDouble;
+};
+
+fm_Triangulate * fm_createTriangulate(void)
+{
+  Triangulate *t = new Triangulate;
+  return static_cast< fm_Triangulate *>(t);
+}
+
+void             fm_releaseTriangulate(fm_Triangulate *t)
+{
+  Triangulate *tt = static_cast< Triangulate *>(t);
+  delete tt;
+}
+
+#endif
+
+bool validDistance(const REAL *p1,const REAL *p2,REAL epsilon)
+{
+	bool ret = true;
+
+	REAL dx = p1[0] - p2[0];
+	REAL dy = p1[1] - p2[1];
+	REAL dz = p1[2] - p2[2];
+	REAL dist = dx*dx+dy*dy+dz*dz;
+	if ( dist < (epsilon*epsilon) )
+	{
+		ret = false;
+	}
+	return ret;
+}
+
+bool fm_isValidTriangle(const REAL *p1,const REAL *p2,const REAL *p3,REAL epsilon)
+{
+  bool ret = false;
+
+  if ( validDistance(p1,p2,epsilon) &&
+	   validDistance(p1,p3,epsilon) &&
+	   validDistance(p2,p3,epsilon) )
+  {
+
+	  REAL area = fm_computeArea(p1,p2,p3);
+	  if ( area > epsilon )
+	  {
+		REAL _vertices[3*3],vertices[64*3];
+
+		_vertices[0] = p1[0];
+		_vertices[1] = p1[1];
+		_vertices[2] = p1[2];
+
+		_vertices[3] = p2[0];
+		_vertices[4] = p2[1];
+		_vertices[5] = p2[2];
+
+		_vertices[6] = p3[0];
+		_vertices[7] = p3[1];
+		_vertices[8] = p3[2];
+
+		uint32_t pcount = fm_consolidatePolygon(3,_vertices,sizeof(REAL)*3,vertices,1-epsilon);
+		if ( pcount == 3 )
+		{
+		  ret = true;
+		}
+	  }
+  }
+  return ret;
+}
+
+
+void  fm_multiplyQuat(const REAL *left,const REAL *right,REAL *quat)
+{
+	REAL a,b,c,d;
+
+	a = left[3]*right[3] - left[0]*right[0] - left[1]*right[1] - left[2]*right[2];
+	b = left[3]*right[0] + right[3]*left[0] + left[1]*right[2] - right[1]*left[2];
+	c = left[3]*right[1] + right[3]*left[1] + left[2]*right[0] - right[2]*left[0];
+	d = left[3]*right[2] + right[3]*left[2] + left[0]*right[1] - right[0]*left[1];
+
+	quat[3] = a;
+	quat[0] = b;
+	quat[1] = c;
+	quat[2] = d;
+}
+
+bool  fm_computeCentroid(uint32_t vcount,     // number of input data points
+						 const REAL *points,     // starting address of points array.
+						 REAL *center)
+
+{
+	bool ret = false;
+	if ( vcount )
+	{
+		center[0] = 0;
+		center[1] = 0;
+		center[2] = 0;
+		const REAL *p = points;
+		for (uint32_t i=0; i<vcount; i++)
+		{
+			center[0]+=p[0];
+			center[1]+=p[1];
+			center[2]+=p[2];
+			p += 3;
+		}
+		REAL recip = 1.0f / (REAL)vcount;
+		center[0]*=recip;
+		center[1]*=recip;
+		center[2]*=recip;
+		ret = true;
+	}
+	return ret;
+}
+
+bool  fm_computeCentroid(uint32_t vcount,     // number of input data points
+	const REAL *points,     // starting address of points array.
+	uint32_t triCount,
+	const uint32_t *indices,
+	REAL *center)
+
+{
+	bool ret = false;
+	if (vcount)
+	{
+		center[0] = 0;
+		center[1] = 0;
+		center[2] = 0;
+
+		REAL numerator[3] = { 0, 0, 0 };
+		REAL denominator = 0;
+
+		for (uint32_t i = 0; i < triCount; i++)
+		{
+			uint32_t i1 = indices[i * 3 + 0];
+			uint32_t i2 = indices[i * 3 + 1];
+			uint32_t i3 = indices[i * 3 + 2];
+
+			const REAL *p1 = &points[i1 * 3];
+			const REAL *p2 = &points[i2 * 3];
+			const REAL *p3 = &points[i3 * 3];
+
+			// Compute the sum of the three positions
+			REAL sum[3];
+			sum[0] = p1[0] + p2[0] + p3[0];
+			sum[1] = p1[1] + p2[1] + p3[1];
+			sum[2] = p1[2] + p2[2] + p3[2];
+
+			// Compute the average of the three positions
+			sum[0] = sum[0] / 3;
+			sum[1] = sum[1] / 3;
+			sum[2] = sum[2] / 3;
+
+			// Compute the area of this triangle
+			REAL area = fm_computeArea(p1, p2, p3);
+
+			numerator[0]+= (sum[0] * area);
+			numerator[1]+= (sum[1] * area);
+			numerator[2]+= (sum[2] * area);
+
+			denominator += area;
+
+		}
+		REAL recip = 1 / denominator;
+		center[0] = numerator[0] * recip;
+		center[1] = numerator[1] * recip;
+		center[2] = numerator[2] * recip;
+		ret = true;
+	}
+	return ret;
+}
+
+
+#ifndef TEMPLATE_VEC3
+#define TEMPLATE_VEC3
+template <class Type> class Vec3
+{
+public:
+	Vec3(void)
+	{
+
+	}
+	Vec3(Type _x,Type _y,Type _z)
+	{
+		x = _x;
+		y = _y;
+		z = _z;
+	}
+	Type x;
+	Type y;
+	Type z;
+};
+#endif
+
+void fm_transformAABB(const REAL bmin[3],const REAL bmax[3],const REAL matrix[16],REAL tbmin[3],REAL tbmax[3])
+{
+	Vec3<REAL> box[8];
+	box[0] = Vec3< REAL >( bmin[0], bmin[1], bmin[2] );
+	box[1] = Vec3< REAL >( bmax[0], bmin[1], bmin[2] );
+	box[2] = Vec3< REAL >( bmax[0], bmax[1], bmin[2] );
+	box[3] = Vec3< REAL >( bmin[0], bmax[1], bmin[2] );
+	box[4] = Vec3< REAL >( bmin[0], bmin[1], bmax[2] );
+	box[5] = Vec3< REAL >( bmax[0], bmin[1], bmax[2] );
+	box[6] = Vec3< REAL >( bmax[0], bmax[1], bmax[2] );
+	box[7] = Vec3< REAL >( bmin[0], bmax[1], bmax[2] );
+	// transform all 8 corners of the box and then recompute a new AABB
+	for (unsigned int i=0; i<8; i++)
+	{
+		Vec3< REAL > &p = box[i];
+		fm_transform(matrix,&p.x,&p.x);
+		if ( i == 0 )
+		{
+			tbmin[0] = tbmax[0] = p.x;
+			tbmin[1] = tbmax[1] = p.y;
+			tbmin[2] = tbmax[2] = p.z;
+		}
+		else
+		{
+			if ( p.x < tbmin[0] ) tbmin[0] = p.x;
+			if ( p.y < tbmin[1] ) tbmin[1] = p.y;
+			if ( p.z < tbmin[2] ) tbmin[2] = p.z;
+			if ( p.x > tbmax[0] ) tbmax[0] = p.x;
+			if ( p.y > tbmax[1] ) tbmax[1] = p.y;
+			if ( p.z > tbmax[2] ) tbmax[2] = p.z;
+		}
+	}
+}
+
+REAL  fm_normalizeQuat(REAL n[4]) // normalize this quat
+{
+	REAL dx = n[0]*n[0];
+	REAL dy = n[1]*n[1];
+	REAL dz = n[2]*n[2];
+	REAL dw = n[3]*n[3];
+
+	REAL dist = dx*dx+dy*dy+dz*dz+dw*dw;
+
+	dist = (REAL)sqrt(dist);
+
+	REAL recip = 1.0f / dist;
+
+	n[0]*=recip;
+	n[1]*=recip;
+	n[2]*=recip;
+	n[3]*=recip;
+
+	return dist;
+}
+
+
+}; // end of namespace

+ 1 - 1
Engine/source/CMakeLists.txt

@@ -51,7 +51,7 @@ torqueAddSourceDirectories("app" "app/net")
 # Handle console
 # Handle console
 torqueAddSourceDirectories("console")
 torqueAddSourceDirectories("console")
 torqueAddSourceDirectories("console/torquescript")
 torqueAddSourceDirectories("console/torquescript")
-
+set(TORQUE_INCLUDE_DIRECTORIES ${TORQUE_INCLUDE_DIRECTORIES} "ts/vhacd")
 # Handle Platform
 # Handle Platform
 torqueAddSourceDirectories("platform" "platform/threads" "platform/async"
 torqueAddSourceDirectories("platform" "platform/threads" "platform/async"
                                   "platform/input" "platform/output")
                                   "platform/input" "platform/output")

+ 2 - 0
Engine/source/platformWin32/winRedbook.cpp

@@ -24,6 +24,8 @@
 #include "platform/platformRedBook.h"
 #include "platform/platformRedBook.h"
 #include "core/strings/unicode.h"
 #include "core/strings/unicode.h"
 #include "core/strings/stringFunctions.h"
 #include "core/strings/stringFunctions.h"
+#include <windows.h>
+#include <mmsystem.h>
 
 
 class Win32RedBookDevice : public RedBookDevice
 class Win32RedBookDevice : public RedBookDevice
 {
 {

+ 163 - 65
Engine/source/ts/tsMeshFit.cpp

@@ -19,7 +19,6 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 // IN THE SOFTWARE.
 // IN THE SOFTWARE.
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
-
 #include "platform/platform.h"
 #include "platform/platform.h"
 
 
 #include "console/consoleTypes.h"
 #include "console/consoleTypes.h"
@@ -27,18 +26,13 @@
 #include "ts/tsShapeConstruct.h"
 #include "ts/tsShapeConstruct.h"
 #include "console/engineAPI.h"
 #include "console/engineAPI.h"
 
 
-// define macros required for ConvexDecomp headers
-#if defined( _WIN32 ) && !defined( WIN32 )
-#define WIN32
-#elif defined( __MACOSX__ ) && !defined( APPLE )
-#define APPLE
-#endif
-
-#include "convexDecomp/NvFloatMath.h"
-#include "convexDecomp/NvConvexDecomposition.h"
-#include "convexDecomp/NvStanHull.h"
+#define ENABLE_VHACD_IMPLEMENTATION 1
+#define VHACD_DISABLE_THREADING 0
+#include "ts/vhacd/VHACD.h"
+#include <FloatMath.h>
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
+
 static const Point3F sFacePlanes[] = {
 static const Point3F sFacePlanes[] = {
    Point3F( -1.0f,  0.0f,  0.0f ),
    Point3F( -1.0f,  0.0f,  0.0f ),
    Point3F(  1.0f,  0.0f,  0.0f ),
    Point3F(  1.0f,  0.0f,  0.0f ),
@@ -109,18 +103,18 @@ public:
 
 
    void fitBox( U32 vertCount, const F32* verts )
    void fitBox( U32 vertCount, const F32* verts )
    {
    {
-      CONVEX_DECOMPOSITION::fm_computeBestFitOBB( vertCount, verts, sizeof(F32)*3, (F32*)mBoxSides, (F32*)mBoxTransform );
+      FLOAT_MATH::fm_computeBestFitOBB( vertCount, verts, sizeof(F32)*3, (F32*)mBoxSides, (F32*)mBoxTransform, false );
       mBoxTransform.transpose();
       mBoxTransform.transpose();
    }
    }
 
 
    void fitSphere( U32 vertCount, const F32* verts )
    void fitSphere( U32 vertCount, const F32* verts )
    {
    {
-      mSphereRadius = CONVEX_DECOMPOSITION::fm_computeBestFitSphere( vertCount, verts, sizeof(F32)*3, (F32*)mSphereCenter );
+      mSphereRadius = FLOAT_MATH::fm_computeBestFitSphere( vertCount, verts, sizeof(F32)*3, (F32*)mSphereCenter );
    }
    }
 
 
    void fitCapsule( U32 vertCount, const F32* verts )
    void fitCapsule( U32 vertCount, const F32* verts )
    {
    {
-      CONVEX_DECOMPOSITION::fm_computeBestFitCapsule( vertCount, verts, sizeof(F32)*3, mCapRadius, mCapHeight, (F32*)mCapTransform );
+      FLOAT_MATH::fm_computeBestFitCapsule( vertCount, verts, sizeof(F32)*3, mCapRadius, mCapHeight, (F32*)mCapTransform );
       mCapTransform.transpose();
       mCapTransform.transpose();
    }
    }
 };
 };
@@ -449,12 +443,27 @@ void MeshFit::addSphere( F32 radius, const Point3F& center )
    if ( !mesh )
    if ( !mesh )
       return;
       return;
 
 
-   for ( S32 i = 0; i < mesh->mVertexData.size(); i++ )
+   if (mesh->mVerts.size() > 0)
    {
    {
-      TSMesh::__TSMeshVertexBase &vdata = mesh->mVertexData.getBase(i);
-      Point3F v = vdata.vert();
-      vdata.vert( v * radius );
+      for (S32 i = 0; i < mesh->mVerts.size(); i++)
+      {
+         Point3F v = mesh->mVerts[i];
+         mesh->mVerts[i] = v * radius;
+      }
+
+      mesh->mVertexData.setReady(false);
+   }
+   else
+   {
+      for (S32 i = 0; i < mesh->mVertexData.size(); i++)
+      {
+         TSMesh::__TSMeshVertexBase& vdata = mesh->mVertexData.getBase(i);
+         Point3F v = vdata.vert();
+         vdata.vert(v * radius);
+      }
    }
    }
+
+
    mesh->computeBounds();
    mesh->computeBounds();
 
 
    mMeshes.increment();
    mMeshes.increment();
@@ -483,12 +492,28 @@ void MeshFit::addCapsule( F32 radius, F32 height, const MatrixF& mat )
    // Translate and scale the mesh verts
    // Translate and scale the mesh verts
    height = mMax( 0, height );
    height = mMax( 0, height );
    F32 offset = ( height / ( 2 * radius ) ) - 0.5f;
    F32 offset = ( height / ( 2 * radius ) ) - 0.5f;
-   for ( S32 i = 0; i < mesh->mVertexData.size(); i++ )
+   if (mesh->mVerts.size() > 0)
+   {
+      for (S32 i = 0; i < mesh->mVerts.size(); i++)
+      {
+         Point3F v = mesh->mVerts[i];
+         v.y += ((v.y > 0) ? offset : -offset);
+         mesh->mVerts[i] = v * radius;
+      }
+
+      mesh->mVertexData.setReady(false);
+   }
+   else
    {
    {
-      Point3F v = mesh->mVertexData.getBase(i).vert();
-      v.y += ( ( v.y > 0 ) ? offset : -offset );
-      mesh->mVertexData.getBase(i).vert( v * radius );
+      for (S32 i = 0; i < mesh->mVertexData.size(); i++)
+      {
+         TSMesh::__TSMeshVertexBase& vdata = mesh->mVertexData.getBase(i);
+         Point3F v = vdata.vert();
+         v.y += ((v.y > 0) ? offset : -offset);
+         vdata.vert(v * radius);
+      }
    }
    }
+
    mesh->computeBounds();
    mesh->computeBounds();
 
 
    mMeshes.increment();
    mMeshes.increment();
@@ -572,6 +597,7 @@ void MeshFit::fitK_DOP( const Vector<Point3F>& planes )
    // Collect the intersection points of any 3 planes that lie inside
    // Collect the intersection points of any 3 planes that lie inside
    // the maximum distances found above
    // the maximum distances found above
    Vector<Point3F> points;
    Vector<Point3F> points;
+   Vector<U32> pointIndices;
    for ( S32 i = 0; i < planes.size()-2; i++ )
    for ( S32 i = 0; i < planes.size()-2; i++ )
    {
    {
       for ( S32 j = i+1; j < planes.size()-1; j++ )
       for ( S32 j = i+1; j < planes.size()-1; j++ )
@@ -599,32 +625,68 @@ void MeshFit::fitK_DOP( const Vector<Point3F>& planes )
                }
                }
             }
             }
 
 
-            if ( addPoint )
-               points.push_back( p );
+            if (addPoint)
+            {
+               points.push_back(p);
+               pointIndices.push_back(points.size() - 1);
+            }
          }
          }
       }
       }
    }
    }
 
 
-   // Create a convex hull from the point set
-   CONVEX_DECOMPOSITION::HullDesc hd;
-   hd.mVcount 			= points.size();
-   hd.mVertices 		= (F32*)points.address();
-   hd.mVertexStride 	= sizeof(Point3F);
-   hd.mMaxVertices 	= 64;
-   hd.mSkinWidth		= 0.0f;
+   VHACD::IVHACD::Parameters p;
+   p.m_fillMode            = VHACD::FillMode::FLOOD_FILL;
+   p.m_maxNumVerticesPerCH = 64;
+   p.m_shrinkWrap          = true;
+   p.m_maxRecursionDepth   = 64;
+   p.m_minimumVolumePercentErrorAllowed = 10;
+   p.m_resolution          = 10000;
+   p.m_maxConvexHulls      = 1;
 
 
-   CONVEX_DECOMPOSITION::HullLibrary hl;
-   CONVEX_DECOMPOSITION::HullResult result;
-   hl.CreateConvexHull( hd, result );
+   VHACD::IVHACD* iface = VHACD::CreateVHACD();
+
+   iface->Compute((F32*)points.address(), points.size(), (U32*)pointIndices.address(), pointIndices.size() / 3, p);
+
+   // safety loop.
+   while (!iface->IsReady())
+   {
+      Platform::sleep(1000);
+   }
+
+   // we only get the 1 in dop?
+   VHACD::IVHACD::ConvexHull ch;
+   iface->GetConvexHull(0, ch);
 
 
    // Create TSMesh from convex hull
    // Create TSMesh from convex hull
    mMeshes.increment();
    mMeshes.increment();
    MeshFit::Mesh& lastMesh = mMeshes.last();
    MeshFit::Mesh& lastMesh = mMeshes.last();
    lastMesh.type = MeshFit::Hull;
    lastMesh.type = MeshFit::Hull;
    lastMesh.transform.identity();
    lastMesh.transform.identity();
-   lastMesh.tsmesh = createTriMesh(result.mOutputVertices, result.mNumOutputVertices,
-                              result.mIndices, result.mNumFaces );
+
+   U32* indices = new U32[ch.m_triangles.size() * 3];
+   for (U32 ind = 0; ind < ch.m_triangles.size(); ind++)
+   {
+      indices[ind * 3 + 0] = ch.m_triangles[ind].mI0;
+      indices[ind * 3 + 1] = ch.m_triangles[ind].mI1;
+      indices[ind * 3 + 2] = ch.m_triangles[ind].mI2;
+   }
+
+   F32* resultPts = new F32[ch.m_points.size() * 3];
+   for (U32 pts = 0; pts < ch.m_points.size(); pts++)
+   {
+      resultPts[pts * 3 + 0] = ch.m_points[pts].mX;
+      resultPts[pts * 3 + 1] = ch.m_points[pts].mY;
+      resultPts[pts * 3 + 2] = ch.m_points[pts].mZ;
+   }
+
+   lastMesh.tsmesh = createTriMesh(resultPts, (S32)ch.m_points.size(),
+                                    indices, (S32)ch.m_triangles.size());
    lastMesh.tsmesh->computeBounds();
    lastMesh.tsmesh->computeBounds();
+
+   iface->Release();
+
+   delete[] resultPts;
+   delete[] indices;
 }
 }
 
 
 //---------------------------
 //---------------------------
@@ -632,34 +694,30 @@ void MeshFit::fitK_DOP( const Vector<Point3F>& planes )
 void MeshFit::fitConvexHulls( U32 depth, F32 mergeThreshold, F32 concavityThreshold, U32 maxHullVerts,
 void MeshFit::fitConvexHulls( U32 depth, F32 mergeThreshold, F32 concavityThreshold, U32 maxHullVerts,
                               F32 boxMaxError, F32 sphereMaxError, F32 capsuleMaxError )
                               F32 boxMaxError, F32 sphereMaxError, F32 capsuleMaxError )
 {
 {
-   const F32 SkinWidth      = 0.0f;
-   const F32 SplitThreshold = 2.0f;
+   VHACD::IVHACD::Parameters p;
+   p.m_fillMode = VHACD::FillMode::FLOOD_FILL;
+   p.m_maxNumVerticesPerCH = maxHullVerts;
+   p.m_shrinkWrap = true;
+   p.m_maxRecursionDepth = 64;
+   p.m_minimumVolumePercentErrorAllowed = 10;
+   p.m_resolution = 10000;
+   p.m_maxConvexHulls = depth;
 
 
-   CONVEX_DECOMPOSITION::iConvexDecomposition *ic = CONVEX_DECOMPOSITION::createConvexDecomposition();
+   VHACD::IVHACD* iface = VHACD::CreateVHACD_ASYNC();
 
 
-   for ( S32 i = 0; i < mIndices.size(); i += 3 )
+   iface->Compute((F32*)mVerts.address(), mVerts.size(), mIndices.address(), mIndices.size() / 3, p);
+
+   // safety loop.
+   while (!iface->IsReady())
    {
    {
-      ic->addTriangle(  (F32*)mVerts[mIndices[i]],
-                        (F32*)mVerts[mIndices[i+1]],
-                        (F32*)mVerts[mIndices[i+2]] );
+      Platform::sleep(1000);
    }
    }
 
 
-   ic->computeConvexDecomposition(
-      SkinWidth,
-      depth,
-      maxHullVerts,
-      concavityThreshold,
-      mergeThreshold,
-      SplitThreshold,
-      true,
-      false,
-      false );
-
    // Add a TSMesh for each hull
    // Add a TSMesh for each hull
-   for ( S32 i = 0; i < ic->getHullCount(); i++ )
+   for ( S32 i = 0; i < iface->GetNConvexHulls(); i++ )
    {
    {
-      CONVEX_DECOMPOSITION::ConvexHullResult result;
-      ic->getConvexHullResult( i, result );
+      VHACD::IVHACD::ConvexHull ch;
+      iface->GetConvexHull(i, ch);
 
 
       eMeshType meshType = MeshFit::Hull;
       eMeshType meshType = MeshFit::Hull;
 
 
@@ -667,23 +725,39 @@ void MeshFit::fitConvexHulls( U32 depth, F32 mergeThreshold, F32 concavityThresh
       if (( boxMaxError > 0 ) || ( sphereMaxError > 0 ) || ( capsuleMaxError > 0 ))
       if (( boxMaxError > 0 ) || ( sphereMaxError > 0 ) || ( capsuleMaxError > 0 ))
       {
       {
          // Compute error between actual mesh and fitted primitives
          // Compute error between actual mesh and fitted primitives
-         F32 meshVolume = CONVEX_DECOMPOSITION::fm_computeMeshVolume( result.mVertices, result.mTcount, result.mIndices );
+         F32* points = new F32[ch.m_points.size() * 3];
+         for (U32 pt = 0; pt < ch.m_points.size(); pt++)
+         {
+            points[pt * 3 + 0] = ch.m_points[pt].mX;
+            points[pt * 3 + 1] = ch.m_points[pt].mY;
+            points[pt * 3 + 2] = ch.m_points[pt].mZ;
+         }
+
+         U32* indices = new U32[ch.m_triangles.size() * 3];
+         for (U32 ind = 0; ind < ch.m_triangles.size(); ind++)
+         {
+            indices[ind * 3 + 0] = ch.m_triangles[ind].mI0;
+            indices[ind * 3 + 1] = ch.m_triangles[ind].mI1;
+            indices[ind * 3 + 2] = ch.m_triangles[ind].mI2;
+         }
+
+         F32 meshVolume = FLOAT_MATH::fm_computeMeshVolume(points, ch.m_triangles.size(), indices);
          PrimFit primFitter;
          PrimFit primFitter;
 
 
-         F32 boxError = 100.0f, sphereError = 100.0f, capsuleError = 100.0f;
+         F32 boxError = 100.0f, sphereError = 100.0f, capsuleError = 100.0;
          if ( boxMaxError > 0 )
          if ( boxMaxError > 0 )
          {
          {
-            primFitter.fitBox( result.mVcount, result.mVertices );
+            primFitter.fitBox(ch.m_points.size(), points);
             boxError = 100.0f * ( 1.0f - ( meshVolume / primFitter.getBoxVolume() ) );
             boxError = 100.0f * ( 1.0f - ( meshVolume / primFitter.getBoxVolume() ) );
          }
          }
          if ( sphereMaxError > 0 )
          if ( sphereMaxError > 0 )
          {
          {
-            primFitter.fitSphere( result.mVcount, result.mVertices );
-            sphereError = 100.0f * ( 1.0f - ( meshVolume / primFitter.getSphereVolume() ) );
+            primFitter.fitSphere(ch.m_points.size(), points);
+            sphereError = 100.0f * ( 1.0f - ( meshVolume / primFitter.getSphereVolume()));
          }
          }
          if ( capsuleMaxError > 0 )
          if ( capsuleMaxError > 0 )
          {
          {
-            primFitter.fitCapsule( result.mVcount, result.mVertices );
+            primFitter.fitCapsule(ch.m_points.size(), points);
             capsuleError = 100.0f * ( 1.0f - ( meshVolume / primFitter.getCapsuleVolume() ) );
             capsuleError = 100.0f * ( 1.0f - ( meshVolume / primFitter.getCapsuleVolume() ) );
          }
          }
 
 
@@ -713,6 +787,10 @@ void MeshFit::fitConvexHulls( U32 depth, F32 mergeThreshold, F32 concavityThresh
          else if ( meshType == MeshFit::Capsule )
          else if ( meshType == MeshFit::Capsule )
             addCapsule( primFitter.mCapRadius, primFitter.mCapHeight, primFitter.mCapTransform );
             addCapsule( primFitter.mCapRadius, primFitter.mCapHeight, primFitter.mCapTransform );
          // else fall through to Hull processing
          // else fall through to Hull processing
+         
+         // cleanup
+         delete[] points;
+         delete[] indices;
       }
       }
 
 
       if ( meshType == MeshFit::Hull )
       if ( meshType == MeshFit::Hull )
@@ -722,12 +800,32 @@ void MeshFit::fitConvexHulls( U32 depth, F32 mergeThreshold, F32 concavityThresh
          MeshFit::Mesh& lastMesh = mMeshes.last();
          MeshFit::Mesh& lastMesh = mMeshes.last();
          lastMesh.type = MeshFit::Hull;
          lastMesh.type = MeshFit::Hull;
          lastMesh.transform.identity();
          lastMesh.transform.identity();
-         lastMesh.tsmesh = createTriMesh(result.mVertices, result.mVcount, result.mIndices, result.mTcount);
+
+         U32* indices = new U32[ch.m_triangles.size() * 3];
+         for (U32 ind = 0; ind < ch.m_triangles.size(); ind++)
+         {
+            indices[ind * 3 + 0] = ch.m_triangles[ind].mI0;
+            indices[ind * 3 + 1] = ch.m_triangles[ind].mI1;
+            indices[ind * 3 + 2] = ch.m_triangles[ind].mI2;
+         }
+
+         F32* points = new F32[ch.m_points.size() * 3];
+         for (U32 pt = 0; pt < ch.m_points.size(); pt++)
+         {
+            points[pt * 3 + 0] = ch.m_points[pt].mX;
+            points[pt * 3 + 1] = ch.m_points[pt].mY;
+            points[pt * 3 + 2] = ch.m_points[pt].mZ;
+         }
+
+         lastMesh.tsmesh = createTriMesh(points, ch.m_points.size(), indices, ch.m_triangles.size());
          lastMesh.tsmesh->computeBounds();
          lastMesh.tsmesh->computeBounds();
+
+         delete[] points;
+         delete[] indices;
       }
       }
    }
    }
 
 
-   CONVEX_DECOMPOSITION::releaseConvexDecomposition( ic );
+   iface->Release();
 }
 }
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------

+ 8447 - 0
Engine/source/ts/vhacd/VHACD.h

@@ -0,0 +1,8447 @@
+/* Copyright (c) 2011 Khaled Mamou (kmamou at gmail dot com)
+ All rights reserved.
+ 
+ 
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+ 
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+ 
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+ 
+ 3. The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission.
+ 
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+
+#ifndef VHACD_H
+#    define VHACD_H
+
+// Please view this slide deck which describes usage and how the algorithm works.
+// https://docs.google.com/presentation/d/1OZ4mtZYrGEC8qffqb8F7Le2xzufiqvaPpRbLHKKgTIM/edit?usp=sharing
+
+// VHACD is now a header only library.
+// In just *one* of your CPP files *before* you include 'VHACD.h' you must declare
+// #define ENABLE_VHACD_IMPLEMENTATION 1
+// This will compile the implementation code into your project. If you don't
+// have this define, you will get link errors since the implementation code will
+// not be present. If you define it more than once in your code base, you will get
+// link errors due to a duplicate implementation. This is the same pattern used by
+// ImGui and StbLib and other popular open source libraries.
+
+#    define VHACD_VERSION_MAJOR 4
+#    define VHACD_VERSION_MINOR 1
+
+// Changes for version 4.1
+//
+// Various minor tweaks mostly to the test application and some default values.
+
+// Changes for version 4.0
+//
+// * The code has been significantly refactored to be cleaner and easier to maintain
+//      * All OpenCL related code removed
+//      * All Bullet code removed
+//      * All SIMD code removed
+//      * Old plane splitting code removed
+// 
+// * The code is now delivered as a single header file 'VHACD.h' which has both the API
+// * declaration as well as the implementation.  Simply add '#define ENABLE_VHACD_IMPLEMENTATION 1'
+// * to any CPP in your application prior to including 'VHACD.h'. Only do this in one CPP though.
+// * If you do not have this define once, you will get link errors since the implementation code
+// * will not be compiled in. If you have this define more than once, you are likely to get
+// * duplicate symbol link errors.
+//
+// * Since the library is now delivered as a single header file, we do not provide binaries
+// * or build scripts as these are not needed.
+//
+// * The old DebugView and test code has all been removed and replaced with a much smaller and
+// * simpler test console application with some test meshes to work with.
+//
+// * The convex hull generation code has changed. The previous version came from Bullet. 
+// * However, the new version is courtesy of Julio Jerez, the author of the Newton
+// * physics engine. His new version is faster and more numerically stable.
+//
+// * The code can now detect if the input mesh is, itself, already a convex object and
+// * can early out.
+//
+// * Significant performance improvements have been made to the code and it is now much
+// * faster, stable, and is easier to tune than previous versions.
+//
+// * A bug was fixed with the shrink wrapping code (project hull vertices) that could
+// * sometime produce artifacts in the results. The new version uses a 'closest point'
+// * algorithm that is more reliable.
+//
+// * You can now select which 'fill mode' to use. For perfectly closed meshes, the default
+// * behavior using a flood fill generally works fine. However, some meshes have small 
+// * holes in them and therefore the flood fill will fail, treating the mesh as being
+// * hollow. In these cases, you can use the 'raycast' fill option to determine which 
+// * parts of the voxelized mesh are 'inside' versus being 'outside'. Finally, there
+// * are some rare instances where a user might actually want the mesh to be treated as
+// * hollow, in which case you can pass in 'surface' only.
+// *
+// * A new optional virtual interface called 'IUserProfiler' was provided.
+// * This allows the user to provide an optional profiling callback interface to assist in
+// * diagnosing performance issues. This change was made by Danny Couture at Epic for the UE4 integration.
+// * Some profiling macros were also declared in support of this feature.
+// *
+// * Another new optional virtual interface called 'IUserTaskRunner' was provided.
+// * This interface is used to run logical 'tasks' in a background thread. If none is provided
+// * then a default implementation using std::thread will be executed.
+// * This change was made by Danny Couture at Epic to speed up the voxelization step.
+// *
+
+
+
+// The history of V-HACD:
+//
+// The initial version was written by John W. Ratcliff and was called 'ACD'
+// This version did not perform CSG operations on the source mesh, so if you 
+// recursed too deeply it would produce hollow results.
+//
+// The next version was written by Khaled Mamou and was called 'HACD'
+// In this version Khaled tried to perform a CSG operation on the source 
+// mesh to produce more robust results. However, Khaled learned that the
+// CSG library he was using had licensing issues so he started work on the
+// next version.
+//
+// The next version was called 'V-HACD' because Khaled made the observation
+// that plane splitting would be far easier to implement working in voxel space.
+// 
+// V-HACD has been integrated into UE4, Blender, and a number of other projects.
+// This new release, version4, is a significant refactor of the code to fix
+// some bugs, improve performance, and to make the codebase easier to maintain
+// going forward.
+
+#ifndef _STDINT
+#include <stdint.h>
+#endif
+
+#ifndef _FUNCTIONAL_
+#include <functional>
+#endif
+
+#ifndef _VECTOR_
+#include <vector>
+#endif
+
+#ifndef _ARRAY_
+#include <array>
+#endif
+
+#ifndef _CMATH_
+#include <cmath>
+#endif
+
+#ifndef _ALGORITHM_
+#include <algorithm>
+#endif
+
+namespace VHACD {
+
+struct Vertex
+{
+    double mX;
+    double mY;
+    double mZ;
+
+    Vertex() = default;
+    Vertex(double x, double y, double z) : mX(x), mY(y), mZ(z) {}
+
+    const double& operator[](size_t idx) const
+    {
+        switch(idx)
+        {
+            case 0: return mX;
+            case 1: return mY;
+            case 2: return mZ;
+        };
+        return mX;
+    }
+};
+
+struct Triangle
+{
+    uint32_t mI0;
+    uint32_t mI1;
+    uint32_t mI2;
+
+    Triangle() = default;
+    Triangle(uint32_t i0, uint32_t i1, uint32_t i2) : mI0(i0), mI1(i1), mI2(i2) {}
+};
+
+template <typename T>
+class Vector3
+{
+public:
+    /*
+    * Getters
+    */
+    T& operator[](size_t i);
+    const T& operator[](size_t i) const;
+    T& GetX();
+    T& GetY();
+    T& GetZ();
+    const T& GetX() const;
+    const T& GetY() const;
+    const T& GetZ() const;
+
+    /*
+    * Normalize and norming
+    */
+    T Normalize();
+    Vector3 Normalized();
+    T GetNorm() const;
+    T GetNormSquared() const;
+    int LongestAxis() const;
+
+    /*
+    * Vector-vector operations
+    */
+    Vector3& operator=(const Vector3& rhs);
+    Vector3& operator+=(const Vector3& rhs);
+    Vector3& operator-=(const Vector3& rhs);
+
+    Vector3 CWiseMul(const Vector3& rhs) const;
+    Vector3 Cross(const Vector3& rhs) const;
+    T Dot(const Vector3& rhs) const;
+    Vector3 operator+(const Vector3& rhs) const;
+    Vector3 operator-(const Vector3& rhs) const;
+
+    /*
+    * Vector-scalar operations
+    */
+    Vector3& operator-=(T a);
+    Vector3& operator+=(T a);
+    Vector3& operator/=(T a);
+    Vector3& operator*=(T a);
+
+    Vector3 operator*(T rhs) const;
+    Vector3 operator/(T rhs) const;
+
+    /*
+    * Unary operations
+    */
+    Vector3 operator-() const;
+
+    /*
+    * Comparison operators
+    */
+    bool operator<(const Vector3& rhs) const;
+    bool operator>(const Vector3& rhs) const;
+
+    /*
+     * Returns true if all elements of *this are greater than or equal to all elements of rhs, coefficient wise
+     * LE is less than or equal
+     */
+    bool CWiseAllGE(const Vector3<T>& rhs) const;
+    bool CWiseAllLE(const Vector3<T>& rhs) const;
+
+    Vector3 CWiseMin(const Vector3& rhs) const;
+    Vector3 CWiseMax(const Vector3& rhs) const;
+    T MinCoeff() const;
+    T MaxCoeff() const;
+
+    T MinCoeff(uint32_t& idx) const;
+    T MaxCoeff(uint32_t& idx) const;
+
+    /*
+    * Constructors
+    */
+    Vector3() = default;
+    Vector3(T a);
+    Vector3(T x, T y, T z);
+    Vector3(const Vector3& rhs);
+    ~Vector3() = default;
+
+    template <typename U>
+    Vector3(const Vector3<U>& rhs);
+
+    Vector3(const VHACD::Vertex&);
+    Vector3(const VHACD::Triangle&);
+
+    operator VHACD::Vertex() const;
+
+private:
+    std::array<T, 3> m_data{ T(0.0) };
+};
+
+typedef VHACD::Vector3<double> Vect3;
+
+struct BoundsAABB
+{
+    BoundsAABB() = default;
+    BoundsAABB(const std::vector<VHACD::Vertex>& points);
+    BoundsAABB(const Vect3& min,
+               const Vect3& max);
+
+    BoundsAABB Union(const BoundsAABB& b);
+
+    bool Intersects(const BoundsAABB& b) const;
+
+    double SurfaceArea() const;
+    double Volume() const;
+
+    BoundsAABB Inflate(double ratio) const;
+
+    VHACD::Vect3 ClosestPoint(const VHACD::Vect3& p) const;
+
+    VHACD::Vect3& GetMin();
+    VHACD::Vect3& GetMax();
+    const VHACD::Vect3& GetMin() const;
+    const VHACD::Vect3& GetMax() const;
+
+    VHACD::Vect3 GetSize() const;
+    VHACD::Vect3 GetCenter() const;
+
+    VHACD::Vect3 m_min{ double(0.0) };
+    VHACD::Vect3 m_max{ double(0.0) };
+};
+
+/**
+* This enumeration determines how the voxels as filled to create a solid
+* object. The default should be 'FLOOD_FILL' which generally works fine 
+* for closed meshes. However, if the mesh is not watertight, then using
+* RAYCAST_FILL may be preferable as it will determine if a voxel is part 
+* of the interior of the source mesh by raycasting around it.
+* 
+* Finally, there are some cases where you might actually want a convex 
+* decomposition to treat the source mesh as being hollow. If that is the
+* case you can pass in 'SURFACE_ONLY' and then the convex decomposition 
+* will converge only onto the 'skin' of the surface mesh.
+*/
+enum class FillMode
+{
+    FLOOD_FILL, // This is the default behavior, after the voxelization step it uses a flood fill to determine 'inside'
+                // from 'outside'. However, meshes with holes can fail and create hollow results.
+    SURFACE_ONLY, // Only consider the 'surface', will create 'skins' with hollow centers.
+    RAYCAST_FILL, // Uses raycasting to determine inside from outside.
+};
+
+class IVHACD
+{
+public:
+    /**
+    * This optional pure virtual interface is used to notify the caller of the progress
+    * of convex decomposition as well as a signal when it is complete when running in
+    * a background thread
+    */
+    class IUserCallback
+    {
+    public:
+        virtual ~IUserCallback(){};
+
+        /**
+        * Notifies the application of the current state of the convex decomposition operation
+        * 
+        * @param overallProgress : Total progress from 0-100%
+        * @param stageProgress : Progress of the current stage 0-100%
+        * @param stage : A text description of the current stage we are in
+        * @param operation : A text description of what operation is currently being performed.
+        */
+        virtual void Update(const double overallProgress,
+                            const double stageProgress,
+                            const char* const stage,
+                            const char* operation) = 0;
+
+        // This is an optional user callback which is only called when running V-HACD asynchronously.
+        // This is a callback performed to notify the user that the
+        // convex decomposition background process is completed. This call back will occur from
+        // a different thread so the user should take that into account.
+        virtual void NotifyVHACDComplete()
+        {
+        }
+    };
+
+    /**
+    * Optional user provided pure virtual interface to be notified of warning or informational messages
+    */
+    class IUserLogger
+    {
+    public:
+        virtual ~IUserLogger(){};
+        virtual void Log(const char* const msg) = 0;
+    };
+
+    /**
+    * An optional user provided pure virtual interface to perform a background task.
+    * This was added by Danny Couture at Epic as they wanted to use their own
+    * threading system instead of the standard library version which is the default.
+    */
+    class IUserTaskRunner
+    {
+    public:
+        virtual ~IUserTaskRunner(){};
+        virtual void* StartTask(std::function<void()> func) = 0;
+        virtual void JoinTask(void* Task) = 0;
+    };
+
+    /**
+    * A simple class that represents a convex hull as a triangle mesh with 
+    * double precision vertices. Polygons are not currently provided.
+    */
+    class ConvexHull
+    {
+    public:
+        std::vector<VHACD::Vertex>      m_points;
+        std::vector<VHACD::Triangle>    m_triangles;
+
+        double                          m_volume{ 0 };          // The volume of the convex hull
+        VHACD::Vect3                    m_center{ 0, 0, 0 };    // The centroid of the convex hull
+        uint32_t                        m_meshId{ 0 };          // A unique id for this convex hull
+        VHACD::Vect3            mBmin;                  // Bounding box minimum of the AABB
+        VHACD::Vect3            mBmax;                  // Bounding box maximum of the AABB
+    };
+
+    /**
+    * This class provides the parameters controlling the convex decomposition operation
+    */
+    class Parameters
+    {
+    public:
+        IUserCallback*      m_callback{nullptr};            // Optional user provided callback interface for progress
+        IUserLogger*        m_logger{nullptr};              // Optional user provided callback interface for log messages
+        IUserTaskRunner*    m_taskRunner{nullptr};          // Optional user provided interface for creating tasks
+        uint32_t            m_maxConvexHulls{ 64 };         // The maximum number of convex hulls to produce
+        uint32_t            m_resolution{ 400000 };         // The voxel resolution to use
+        double              m_minimumVolumePercentErrorAllowed{ 1 }; // if the voxels are within 1% of the volume of the hull, we consider this a close enough approximation
+        uint32_t            m_maxRecursionDepth{ 10 };        // The maximum recursion depth
+        bool                m_shrinkWrap{true};             // Whether or not to shrinkwrap the voxel positions to the source mesh on output
+        FillMode            m_fillMode{ FillMode::FLOOD_FILL }; // How to fill the interior of the voxelized mesh
+        uint32_t            m_maxNumVerticesPerCH{ 64 };    // The maximum number of vertices allowed in any output convex hull
+        bool                m_asyncACD{ true };             // Whether or not to run asynchronously, taking advantage of additional cores
+        uint32_t            m_minEdgeLength{ 2 };           // Once a voxel patch has an edge length of less than 4 on all 3 sides, we don't keep recursing
+        bool                m_findBestPlane{ false };       // Whether or not to attempt to split planes along the best location. Experimental feature. False by default.
+    };
+
+    /**
+    * Will cause the convex decomposition operation to be canceled early. No results will be produced but the background operation will end as soon as it can.
+    */
+    virtual void Cancel() = 0;
+
+    /**
+    * Compute a convex decomposition of a triangle mesh using float vertices and the provided user parameters.
+    * 
+    * @param points : The vertices of the source mesh as floats in the form of X1,Y1,Z1,  X2,Y2,Z2,.. etc.
+    * @param countPoints : The number of vertices in the source mesh.
+    * @param triangles : The indices of triangles in the source mesh in the form of I1,I2,I3, .... 
+    * @param countTriangles : The number of triangles in the source mesh
+    * @param params : The convex decomposition parameters to apply
+    * @return : Returns true if the convex decomposition operation can be started
+    */
+    virtual bool Compute(const float* const points,
+                         const uint32_t countPoints,
+                         const uint32_t* const triangles,
+                         const uint32_t countTriangles,
+                         const Parameters& params) = 0;
+
+    /**
+    * Compute a convex decomposition of a triangle mesh using double vertices and the provided user parameters.
+    * 
+    * @param points : The vertices of the source mesh as floats in the form of X1,Y1,Z1,  X2,Y2,Z2,.. etc.
+    * @param countPoints : The number of vertices in the source mesh.
+    * @param triangles : The indices of triangles in the source mesh in the form of I1,I2,I3, .... 
+    * @param countTriangles : The number of triangles in the source mesh
+    * @param params : The convex decomposition parameters to apply
+    * @return : Returns true if the convex decomposition operation can be started
+    */
+    virtual bool Compute(const double* const points,
+                         const uint32_t countPoints,
+                         const uint32_t* const triangles,
+                         const uint32_t countTriangles,
+                         const Parameters& params) = 0;
+
+    /**
+    * Returns the number of convex hulls that were produced.
+    * 
+    * @return : Returns the number of convex hulls produced, or zero if it failed or was canceled
+    */
+    virtual uint32_t GetNConvexHulls() const = 0;
+
+    /**
+    * Retrieves one of the convex hulls in the solution set
+    * 
+    * @param index : Which convex hull to retrieve
+    * @param ch : The convex hull descriptor to return
+    * @return : Returns true if the convex hull exists and could be retrieved
+    */
+    virtual bool GetConvexHull(const uint32_t index,
+                               ConvexHull& ch) const = 0;
+
+    /**
+    * Releases any memory allocated by the V-HACD class
+    */
+    virtual void Clean() = 0; // release internally allocated memory
+
+    /**
+    * Releases this instance of the V-HACD class
+    */
+    virtual void Release() = 0; // release IVHACD
+
+    // Will compute the center of mass of the convex hull decomposition results and return it
+    // in 'centerOfMass'.  Returns false if the center of mass could not be computed.
+    virtual bool ComputeCenterOfMass(double centerOfMass[3]) const = 0;
+
+    // In synchronous mode (non-multi-threaded) the state is always 'ready'
+    // In asynchronous mode, this returns true if the background thread is not still actively computing
+    // a new solution.  In an asynchronous config the 'IsReady' call will report any update or log
+    // messages in the caller's current thread.
+    virtual bool IsReady() const
+    {
+        return true;
+    }
+
+    /**
+    * At the request of LegionFu : [email protected]
+    * This method will return which convex hull is closest to the source position.
+    * You can use this method to figure out, for example, which vertices in the original
+    * source mesh are best associated with which convex hull.
+    * 
+    * @param pos : The input 3d position to test against
+    * 
+    * @return : Returns which convex hull this position is closest to.
+    */
+    virtual uint32_t findNearestConvexHull(const double pos[3],
+                                           double& distanceToHull) = 0;
+
+protected:
+    virtual ~IVHACD()
+    {
+    }
+};
+/*
+ * Out of line definitions
+ */
+
+    template <typename T>
+    T clamp(const T& v, const T& lo, const T& hi)
+    {
+        if (v < lo)
+        {
+            return lo;
+        }
+        if (v > hi)
+        {
+            return hi;
+        }
+        return v ;
+    }
+
+/*
+ * Getters
+ */
+    template <typename T>
+    inline T& Vector3<T>::operator[](size_t i)
+    {
+        return m_data[i];
+    }
+
+    template <typename T>
+    inline const T& Vector3<T>::operator[](size_t i) const
+    {
+        return m_data[i];
+    }
+
+    template <typename T>
+    inline T& Vector3<T>::GetX()
+    {
+        return m_data[0];
+    }
+
+    template <typename T>
+    inline T& Vector3<T>::GetY()
+    {
+        return m_data[1];
+    }
+
+    template <typename T>
+    inline T& Vector3<T>::GetZ()
+    {
+        return m_data[2];
+    }
+
+    template <typename T>
+    inline const T& Vector3<T>::GetX() const
+    {
+        return m_data[0];
+    }
+
+    template <typename T>
+    inline const T& Vector3<T>::GetY() const
+    {
+        return m_data[1];
+    }
+
+    template <typename T>
+    inline const T& Vector3<T>::GetZ() const
+    {
+        return m_data[2];
+    }
+
+/*
+ * Normalize and norming
+ */
+    template <typename T>
+    inline T Vector3<T>::Normalize()
+    {
+        T n = GetNorm();
+        if (n != T(0.0)) (*this) /= n;
+        return n;
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::Normalized()
+    {
+        Vector3<T> ret = *this;
+        T n = GetNorm();
+        if (n != T(0.0)) ret /= n;
+        return ret;
+    }
+
+    template <typename T>
+    inline T Vector3<T>::GetNorm() const
+    {
+        return std::sqrt(GetNormSquared());
+    }
+
+    template <typename T>
+    inline T Vector3<T>::GetNormSquared() const
+    {
+        return this->Dot(*this);
+    }
+
+    template <typename T>
+    inline int Vector3<T>::LongestAxis() const
+    {
+        auto it = std::max_element(m_data.begin(), m_data.end());
+        return int(std::distance(m_data.begin(), it));
+    }
+
+/*
+ * Vector-vector operations
+ */
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator=(const Vector3<T>& rhs)
+    {
+        GetX() = rhs.GetX();
+        GetY() = rhs.GetY();
+        GetZ() = rhs.GetZ();
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator+=(const Vector3<T>& rhs)
+    {
+        GetX() += rhs.GetX();
+        GetY() += rhs.GetY();
+        GetZ() += rhs.GetZ();
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator-=(const Vector3<T>& rhs)
+    {
+        GetX() -= rhs.GetX();
+        GetY() -= rhs.GetY();
+        GetZ() -= rhs.GetZ();
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::CWiseMul(const Vector3<T>& rhs) const
+    {
+        return Vector3<T>(GetX() * rhs.GetX(),
+                          GetY() * rhs.GetY(),
+                          GetZ() * rhs.GetZ());
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::Cross(const Vector3<T>& rhs) const
+    {
+        return Vector3<T>(GetY() * rhs.GetZ() - GetZ() * rhs.GetY(),
+                          GetZ() * rhs.GetX() - GetX() * rhs.GetZ(),
+                          GetX() * rhs.GetY() - GetY() * rhs.GetX());
+    }
+
+    template <typename T>
+    inline T Vector3<T>::Dot(const Vector3<T>& rhs) const
+    {
+        return   GetX() * rhs.GetX()
+                 + GetY() * rhs.GetY()
+                 + GetZ() * rhs.GetZ();
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::operator+(const Vector3<T>& rhs) const
+    {
+        return Vector3<T>(GetX() + rhs.GetX(),
+                          GetY() + rhs.GetY(),
+                          GetZ() + rhs.GetZ());
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::operator-(const Vector3<T>& rhs) const
+    {
+        return Vector3<T>(GetX() - rhs.GetX(),
+                          GetY() - rhs.GetY(),
+                          GetZ() - rhs.GetZ());
+    }
+
+    template <typename T>
+    inline Vector3<T> operator*(T lhs, const Vector3<T>& rhs)
+    {
+        return Vector3<T>(lhs * rhs.GetX(),
+                          lhs * rhs.GetY(),
+                          lhs * rhs.GetZ());
+    }
+
+/*
+ * Vector-scalar operations
+ */
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator-=(T a)
+    {
+        GetX() -= a;
+        GetY() -= a;
+        GetZ() -= a;
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator+=(T a)
+    {
+        GetX() += a;
+        GetY() += a;
+        GetZ() += a;
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator/=(T a)
+    {
+        GetX() /= a;
+        GetY() /= a;
+        GetZ() /= a;
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T>& Vector3<T>::operator*=(T a)
+    {
+        GetX() *= a;
+        GetY() *= a;
+        GetZ() *= a;
+        return *this;
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::operator*(T rhs) const
+    {
+        return Vector3<T>(GetX() * rhs,
+                          GetY() * rhs,
+                          GetZ() * rhs);
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::operator/(T rhs) const
+    {
+        return Vector3<T>(GetX() / rhs,
+                          GetY() / rhs,
+                          GetZ() / rhs);
+    }
+
+/*
+ * Unary operations
+ */
+    template <typename T>
+    inline Vector3<T> Vector3<T>::operator-() const
+    {
+        return Vector3<T>(-GetX(),
+                          -GetY(),
+                          -GetZ());
+    }
+
+/*
+ * Comparison operators
+ */
+    template <typename T>
+    inline bool Vector3<T>::operator<(const Vector3<T>& rhs) const
+    {
+        if (GetX() == rhs.GetX())
+        {
+            if (GetY() == rhs.GetY())
+            {
+                return (GetZ() < rhs.GetZ());
+            }
+            return (GetY() < rhs.GetY());
+        }
+        return (GetX() < rhs.GetX());
+    }
+
+    template <typename T>
+    inline bool Vector3<T>::operator>(const Vector3<T>& rhs) const
+    {
+        if (GetX() == rhs.GetX())
+        {
+            if (GetY() == rhs.GetY())
+            {
+                return (GetZ() > rhs.GetZ());
+            }
+            return (GetY() > rhs.GetY());
+        }
+        return (GetX() > rhs.GetZ());
+    }
+
+    template <typename T>
+    inline bool Vector3<T>::CWiseAllGE(const Vector3<T>& rhs) const
+    {
+        return    GetX() >= rhs.GetX()
+                  && GetY() >= rhs.GetY()
+                  && GetZ() >= rhs.GetZ();
+    }
+
+    template <typename T>
+    inline bool Vector3<T>::CWiseAllLE(const Vector3<T>& rhs) const
+    {
+        return    GetX() <= rhs.GetX()
+                  && GetY() <= rhs.GetY()
+                  && GetZ() <= rhs.GetZ();
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::CWiseMin(const Vector3<T>& rhs) const
+    {
+        return Vector3<T>(std::min(GetX(), rhs.GetX()),
+                          std::min(GetY(), rhs.GetY()),
+                          std::min(GetZ(), rhs.GetZ()));
+    }
+
+    template <typename T>
+    inline Vector3<T> Vector3<T>::CWiseMax(const Vector3<T>& rhs) const
+    {
+        return Vector3<T>(std::max(GetX(), rhs.GetX()),
+                          std::max(GetY(), rhs.GetY()),
+                          std::max(GetZ(), rhs.GetZ()));
+    }
+
+    template <typename T>
+    inline T Vector3<T>::MinCoeff() const
+    {
+        return *std::min_element(m_data.begin(), m_data.end());
+    }
+
+    template <typename T>
+    inline T Vector3<T>::MaxCoeff() const
+    {
+        return *std::max_element(m_data.begin(), m_data.end());
+    }
+
+    template <typename T>
+    inline T Vector3<T>::MinCoeff(uint32_t& idx) const
+    {
+        auto it = std::min_element(m_data.begin(), m_data.end());
+        idx = uint32_t(std::distance(m_data.begin(), it));
+        return *it;
+    }
+
+    template <typename T>
+    inline T Vector3<T>::MaxCoeff(uint32_t& idx) const
+    {
+        auto it = std::max_element(m_data.begin(), m_data.end());
+        idx = uint32_t(std::distance(m_data.begin(), it));
+        return *it;
+    }
+
+/*
+ * Constructors
+ */
+    template <typename T>
+    inline Vector3<T>::Vector3(T a)
+            : m_data{a, a, a}
+    {
+    }
+
+    template <typename T>
+    inline Vector3<T>::Vector3(T x, T y, T z)
+            : m_data{x, y, z}
+    {
+    }
+
+    template <typename T>
+    inline Vector3<T>::Vector3(const Vector3& rhs)
+            : m_data{rhs.m_data}
+    {
+    }
+
+    template <typename T>
+    template <typename U>
+    inline Vector3<T>::Vector3(const Vector3<U>& rhs)
+            : m_data{T(rhs.GetX()), T(rhs.GetY()), T(rhs.GetZ())}
+    {
+    }
+
+    template <typename T>
+    inline Vector3<T>::Vector3(const VHACD::Vertex& rhs)
+            : Vector3<T>(rhs.mX, rhs.mY, rhs.mZ)
+    {
+        static_assert(std::is_same<T, double>::value, "Vertex to Vector3 constructor only enabled for double");
+    }
+
+    template <typename T>
+    inline Vector3<T>::Vector3(const VHACD::Triangle& rhs)
+            : Vector3<T>(rhs.mI0, rhs.mI1, rhs.mI2)
+    {
+        static_assert(std::is_same<T, uint32_t>::value, "Triangle to Vector3 constructor only enabled for uint32_t");
+    }
+
+    template <typename T>
+    inline Vector3<T>::operator VHACD::Vertex() const
+    {
+        static_assert(std::is_same<T, double>::value, "Vector3 to Vertex conversion only enable for double");
+        return ::VHACD::Vertex( GetX(), GetY(), GetZ());
+    }
+
+IVHACD* CreateVHACD();      // Create a synchronous (blocking) implementation of V-HACD
+IVHACD* CreateVHACD_ASYNC();    // Create an asynchronous (non-blocking) implementation of V-HACD
+
+} // namespace VHACD
+
+#if ENABLE_VHACD_IMPLEMENTATION
+
+#include <assert.h>
+
+#ifndef _INC_MATH
+#include <math.h>
+#endif
+
+#ifndef _INC_STDLIB
+#include <stdlib.h>
+#endif
+
+#ifndef _INC_STRING
+#include <string.h>
+#endif
+
+#ifndef _INC_FLOAT
+#include <float.h>
+#endif
+
+#ifndef _INC_LIMITS
+#include <limits.h>
+#endif
+
+#ifndef _ARRAY_
+#include <array>
+#endif
+
+#ifndef _ATOMIC_
+#include <atomic>
+#endif
+
+#ifndef _CHRONO_
+#include <chrono>
+#endif
+
+#ifndef _CONDITION_VARIABLE_
+#include <condition_variable>
+#endif
+
+#ifndef _DEQUE_
+#include <deque>
+#endif
+
+#ifndef _FUTURE_
+#include <future>
+#endif
+
+#ifndef _IOSTREAM_
+#include <iostream>
+#endif
+
+#ifndef _LIST_
+#include <list>
+#endif
+
+#ifndef _MEMORY_
+#include <memory>
+#endif
+
+#ifndef _MUTEX_
+#include <mutex>
+#endif
+
+#ifndef _QUEUE_
+#include <queue>
+#endif
+
+#ifndef _THREAD_
+#include <thread>
+#endif
+
+#ifndef _UNORDERED_MAP_
+#include <unordered_map>
+#endif
+
+#ifndef _UNORDERED_SET_
+#include <unordered_set>
+#endif
+
+#ifndef _UTILITY_
+#include <utility>
+#endif
+
+#ifndef _VECTOR_
+#include <vector>
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4100 4127 4189 4244 4456 4701 4702 4996)
+#endif // _MSC_VER
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+// Minimum set of warnings used for cleanup
+// #pragma GCC diagnostic warning "-Wall"
+// #pragma GCC diagnostic warning "-Wextra"
+// #pragma GCC diagnostic warning "-Wpedantic"
+// #pragma GCC diagnostic warning "-Wold-style-cast"
+// #pragma GCC diagnostic warning "-Wnon-virtual-dtor"
+// #pragma GCC diagnostic warning "-Wshadow"
+#endif // __GNUC__
+
+// Scoped Timer
+namespace VHACD {
+
+class Timer
+{
+public:
+    Timer()
+        : m_startTime(std::chrono::high_resolution_clock::now())
+    {
+    }
+
+    void Reset()
+    {
+        m_startTime = std::chrono::high_resolution_clock::now();
+    }
+
+    double GetElapsedSeconds()
+    {
+        auto s = PeekElapsedSeconds();
+        Reset();
+        return s;
+    }
+
+    double PeekElapsedSeconds()
+    {
+        auto now = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> diff = now - m_startTime;
+        return diff.count();
+    }
+
+private:
+    std::chrono::time_point<std::chrono::high_resolution_clock> m_startTime;
+};
+
+class ScopedTime
+{
+public:
+    ScopedTime(const char* action,
+               VHACD::IVHACD::IUserLogger* logger)
+        : m_action(action)
+        , m_logger(logger)
+    {
+        m_timer.Reset();
+    }
+
+    ~ScopedTime()
+    {
+        double dtime = m_timer.GetElapsedSeconds();
+        if( m_logger )
+        {
+            char scratch[512];
+            snprintf(scratch,
+                        sizeof(scratch),"%s took %0.5f seconds",
+                        m_action,
+                        dtime);
+            m_logger->Log(scratch);
+        }
+    }
+
+    const char* m_action{ nullptr };
+    Timer       m_timer;
+    VHACD::IVHACD::IUserLogger* m_logger{ nullptr };
+};
+BoundsAABB::BoundsAABB(const std::vector<VHACD::Vertex>& points)
+        : m_min(points[0])
+        , m_max(points[0])
+{
+    for (uint32_t i = 1; i < points.size(); ++i)
+    {
+        const VHACD::Vertex& p = points[i];
+        m_min = m_min.CWiseMin(p);
+        m_max = m_max.CWiseMax(p);
+    }
+}
+
+BoundsAABB::BoundsAABB(const VHACD::Vect3& min,
+                              const VHACD::Vect3& max)
+        : m_min(min)
+        , m_max(max)
+{
+}
+
+BoundsAABB BoundsAABB::Union(const BoundsAABB& b)
+{
+    return BoundsAABB(GetMin().CWiseMin(b.GetMin()),
+                      GetMax().CWiseMax(b.GetMax()));
+}
+
+bool VHACD::BoundsAABB::Intersects(const VHACD::BoundsAABB& b) const
+{
+    if (   (  GetMin().GetX() > b.GetMax().GetX())
+           || (b.GetMin().GetX() >   GetMax().GetX()))
+        return false;
+    if (   (  GetMin().GetY() > b.GetMax().GetY())
+           || (b.GetMin().GetY() >   GetMax().GetY()))
+        return false;
+    if (   (  GetMin().GetZ() > b.GetMax().GetZ())
+           || (b.GetMin().GetZ() >   GetMax().GetZ()))
+        return false;
+    return true;
+}
+
+double BoundsAABB::SurfaceArea() const
+{
+    VHACD::Vect3 d = GetMax() - GetMin();
+    return double(2.0) * (d.GetX() * d.GetY() + d.GetX() * d.GetZ() + d.GetY() * d.GetZ());
+}
+
+double VHACD::BoundsAABB::Volume() const
+{
+    VHACD::Vect3 d = GetMax() - GetMin();
+    return d.GetX() * d.GetY() * d.GetZ();
+}
+
+BoundsAABB VHACD::BoundsAABB::Inflate(double ratio) const
+{
+    double inflate = (GetMin() - GetMax()).GetNorm() * double(0.5) * ratio;
+    return BoundsAABB(GetMin() - inflate,
+                      GetMax() + inflate);
+}
+
+VHACD::Vect3 VHACD::BoundsAABB::ClosestPoint(const VHACD::Vect3& p) const
+{
+    return p.CWiseMax(GetMin()).CWiseMin(GetMax());
+}
+
+VHACD::Vect3& VHACD::BoundsAABB::GetMin()
+{
+    return m_min;
+}
+
+VHACD::Vect3& VHACD::BoundsAABB::GetMax()
+{
+    return m_max;
+}
+
+inline const VHACD::Vect3& VHACD::BoundsAABB::GetMin() const
+{
+    return m_min;
+}
+
+const VHACD::Vect3& VHACD::BoundsAABB::GetMax() const
+{
+    return m_max;
+}
+
+VHACD::Vect3 VHACD::BoundsAABB::GetSize() const
+{
+    return GetMax() - GetMin();
+}
+
+VHACD::Vect3 VHACD::BoundsAABB::GetCenter() const
+{
+    return (GetMin() + GetMax()) * double(0.5);
+}
+
+/*
+ * Relies on three way comparison, which std::sort doesn't use
+ */
+template <class T, class dCompareKey>
+void Sort(T* const array, int elements)
+{
+    const int batchSize = 8;
+    int stack[1024][2];
+
+    stack[0][0] = 0;
+    stack[0][1] = elements - 1;
+    int stackIndex = 1;
+    const dCompareKey comparator;
+    while (stackIndex)
+    {
+        stackIndex--;
+        int lo = stack[stackIndex][0];
+        int hi = stack[stackIndex][1];
+        if ((hi - lo) > batchSize)
+        {
+            int mid = (lo + hi) >> 1;
+            if (comparator.Compare(array[lo], array[mid]) > 0)
+            {
+                std::swap(array[lo],
+                          array[mid]);
+            }
+            if (comparator.Compare(array[mid], array[hi]) > 0)
+            {
+                std::swap(array[mid],
+                          array[hi]);
+            }
+            if (comparator.Compare(array[lo], array[mid]) > 0)
+            {
+                std::swap(array[lo],
+                          array[mid]);
+            }
+            int i = lo + 1;
+            int j = hi - 1;
+            const T pivot(array[mid]);
+            do
+            {
+                while (comparator.Compare(array[i], pivot) < 0)
+                {
+                    i++;
+                }
+                while (comparator.Compare(array[j], pivot) > 0)
+                {
+                    j--;
+                }
+
+                if (i <= j)
+                {
+                    std::swap(array[i],
+                              array[j]);
+                    i++;
+                    j--;
+                }
+            } while (i <= j);
+
+            if (i < hi)
+            {
+                stack[stackIndex][0] = i;
+                stack[stackIndex][1] = hi;
+                stackIndex++;
+            }
+            if (lo < j)
+            {
+                stack[stackIndex][0] = lo;
+                stack[stackIndex][1] = j;
+                stackIndex++;
+            }
+            assert(stackIndex < int(sizeof(stack) / (2 * sizeof(stack[0][0]))));
+        }
+    }
+
+    int stride = batchSize + 1;
+    if (elements < stride)
+    {
+        stride = elements;
+    }
+    for (int i = 1; i < stride; ++i)
+    {
+        if (comparator.Compare(array[0], array[i]) > 0)
+        {
+            std::swap(array[0],
+                      array[i]);
+        }
+    }
+
+    for (int i = 1; i < elements; ++i)
+    {
+        int j = i;
+        const T tmp(array[i]);
+        for (; comparator.Compare(array[j - 1], tmp) > 0; --j)
+        {
+            assert(j > 0);
+            array[j] = array[j - 1];
+        }
+        array[j] = tmp;
+    }
+}
+
+/*
+Maintaining comment due to attribution
+Purpose:
+
+TRIANGLE_AREA_3D computes the area of a triangle in 3D.
+
+Modified:
+
+22 April 1999
+
+Author:
+
+John Burkardt
+
+Parameters:
+
+Input, double X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3, the (getX,getY,getZ)
+coordinates of the corners of the triangle.
+
+Output, double TRIANGLE_AREA_3D, the area of the triangle.
+*/
+double ComputeArea(const VHACD::Vect3& p1,
+                   const VHACD::Vect3& p2,
+                   const VHACD::Vect3& p3)
+{
+    /*
+    Find the projection of (P3-P1) onto (P2-P1).
+    */
+    double base = (p2 - p1).GetNorm();
+    /*
+    The height of the triangle is the length of (P3-P1) after its
+    projection onto (P2-P1) has been subtracted.
+    */
+    double height;
+    if (base == double(0.0))
+    {
+        height = double(0.0);
+    }
+    else
+    {
+        double dot = (p3 - p1).Dot(p2 - p1);
+        double alpha = dot / (base * base);
+
+        VHACD::Vect3 a = p3 - p1 - alpha * (p2 - p1);
+        height = a.GetNorm();
+    }
+
+    return double(0.5) * base * height;
+}
+
+bool ComputeCentroid(const std::vector<VHACD::Vertex>& points,
+                     const std::vector<VHACD::Triangle>& indices,
+                     VHACD::Vect3& center)
+
+{
+    bool ret = false;
+    if (points.size())
+    {
+        center = VHACD::Vect3(0);
+
+        VHACD::Vect3 numerator(0);
+        double denominator = 0;
+
+        for (uint32_t i = 0; i < indices.size(); i++)
+        {
+            uint32_t i1 = indices[i].mI0;
+            uint32_t i2 = indices[i].mI1;
+            uint32_t i3 = indices[i].mI2;
+
+            const VHACD::Vect3& p1 = points[i1];
+            const VHACD::Vect3& p2 = points[i2];
+            const VHACD::Vect3& p3 = points[i3];
+
+            // Compute the average of the sum of the three positions
+            VHACD::Vect3 sum = (p1 + p2 + p3) / 3;
+
+            // Compute the area of this triangle
+            double area = ComputeArea(p1,
+                                      p2,
+                                      p3);
+
+            numerator += (sum * area);
+
+            denominator += area;
+        }
+        double recip = 1 / denominator;
+        center = numerator * recip;
+        ret = true;
+    }
+    return ret;
+}
+
+double Determinant3x3(const std::array<VHACD::Vect3, 3>& matrix,
+                      double& error)
+{
+    double det = double(0.0);
+    error = double(0.0);
+
+    double a01xa12 = matrix[0].GetY() * matrix[1].GetZ();
+    double a02xa11 = matrix[0].GetZ() * matrix[1].GetY();
+    error += (std::abs(a01xa12) + std::abs(a02xa11)) * std::abs(matrix[2].GetX());
+    det += (a01xa12 - a02xa11) * matrix[2].GetX();
+
+    double a00xa12 = matrix[0].GetX() * matrix[1].GetZ();
+    double a02xa10 = matrix[0].GetZ() * matrix[1].GetX();
+    error += (std::abs(a00xa12) + std::abs(a02xa10)) * std::abs(matrix[2].GetY());
+    det -= (a00xa12 - a02xa10) * matrix[2].GetY();
+
+    double a00xa11 = matrix[0].GetX() * matrix[1].GetY();
+    double a01xa10 = matrix[0].GetY() * matrix[1].GetX();
+    error += (std::abs(a00xa11) + std::abs(a01xa10)) * std::abs(matrix[2].GetZ());
+    det += (a00xa11 - a01xa10) * matrix[2].GetZ();
+
+    return det;
+}
+
+double ComputeMeshVolume(const std::vector<VHACD::Vertex>& vertices,
+                         const std::vector<VHACD::Triangle>& indices)
+{
+    double volume = 0;
+    for (uint32_t i = 0; i < indices.size(); i++)
+    {
+        const std::array<VHACD::Vect3, 3> m = {
+            vertices[indices[i].mI0],
+            vertices[indices[i].mI1],
+            vertices[indices[i].mI2]
+        };
+        double placeholder;
+        volume += Determinant3x3(m,
+                                 placeholder);
+    }
+
+    volume *= (double(1.0) / double(6.0));
+    if (volume < 0)
+        volume *= -1;
+    return volume;
+}
+
+/*
+ * To minimize memory allocations while maintaining pointer stability.
+ * Used in KdTreeNode and ConvexHull, as both use tree data structures that rely on pointer stability
+ * Neither rely on random access or iteration
+ * They just dump elements into a memory pool, then refer to pointers to the elements
+ * All elements are default constructed in NodeStorage's m_nodes array
+ */
+template <typename T, std::size_t MaxBundleSize = 1024>
+class NodeBundle
+{
+    struct NodeStorage {
+        bool IsFull() const;
+
+        T& GetNextNode();
+
+        std::size_t m_index;
+        std::array<T, MaxBundleSize> m_nodes;
+    };
+
+    std::list<NodeStorage> m_list;
+    typename std::list<NodeStorage>::iterator m_head{ m_list.end() };
+
+public:
+    T& GetNextNode();
+
+    T& GetFirstNode();
+
+    void Clear();
+};
+
+template <typename T, std::size_t MaxBundleSize>
+bool NodeBundle<T, MaxBundleSize>::NodeStorage::IsFull() const
+{
+    return m_index == MaxBundleSize;
+}
+
+template <typename T, std::size_t MaxBundleSize>
+T& NodeBundle<T, MaxBundleSize>::NodeStorage::GetNextNode()
+{
+    assert(m_index < MaxBundleSize);
+    T& ret = m_nodes[m_index];
+    m_index++;
+    return ret;
+}
+
+template <typename T, std::size_t MaxBundleSize>
+T& NodeBundle<T, MaxBundleSize>::GetNextNode()
+{
+    /*
+     * || short circuits, so doesn't dereference if m_bundle == m_bundleHead.end()
+     */
+    if (   m_head == m_list.end()
+        || m_head->IsFull())
+    {
+        m_head = m_list.emplace(m_list.end());
+    }
+
+    return m_head->GetNextNode();
+}
+
+template <typename T, std::size_t MaxBundleSize>
+T& NodeBundle<T, MaxBundleSize>::GetFirstNode()
+{
+    assert(m_head != m_list.end());
+    return m_list.front().m_nodes[0];
+}
+
+template <typename T, std::size_t MaxBundleSize>
+void NodeBundle<T, MaxBundleSize>::Clear()
+{
+    m_list.clear();
+}
+
+/*
+ * Returns index of highest set bit in x
+ */
+inline int dExp2(int x)
+{
+    int exp;
+    for (exp = -1; x; x >>= 1)
+    {
+        exp++;
+    }
+    return exp;
+}
+
+/*
+ * Reverses the order of the bits in v and returns the result
+ * Does not put fill any of the bits higher than the highest bit in v
+ * Only used to calculate index of ndNormalMap::m_normal when tessellating a triangle
+ */
+inline int dBitReversal(int v,
+                        int base)
+{
+    int x = 0;
+    int power = dExp2(base) - 1;
+    do
+    {
+        x += (v & 1) << power;
+        v >>= 1;
+        power--;
+    } while (v);
+    return x;
+}
+
+class Googol
+{
+    #define VHACD_GOOGOL_SIZE 4
+public:
+    Googol() = default;
+    Googol(double value);
+
+    operator double() const;
+    Googol operator+(const Googol &A) const;
+    Googol operator-(const Googol &A) const;
+    Googol operator*(const Googol &A) const;
+    Googol operator/ (const Googol &A) const;
+
+    Googol& operator+= (const Googol &A);
+    Googol& operator-= (const Googol &A);
+
+    bool operator>(const Googol &A) const;
+    bool operator>=(const Googol &A) const;
+    bool operator<(const Googol &A) const;
+    bool operator<=(const Googol &A) const;
+    bool operator==(const Googol &A) const;
+    bool operator!=(const Googol &A) const;
+
+    Googol Abs() const;
+    Googol Floor() const;
+    Googol InvSqrt() const;
+    Googol Sqrt() const;
+
+    void ToString(char* const string) const;
+
+private:
+    void NegateMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa) const;
+    void CopySignedMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa) const;
+    int NormalizeMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa) const;
+    void ShiftRightMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa,
+                            int bits) const;
+    uint64_t CheckCarrier(uint64_t a, uint64_t b) const;
+
+    int LeadingZeros(uint64_t a) const;
+    void ExtendedMultiply(uint64_t a,
+                          uint64_t b,
+                          uint64_t& high,
+                          uint64_t& low) const;
+    void ScaleMantissa(uint64_t* out,
+                       uint64_t scale) const;
+
+    int m_sign{ 0 };
+    int m_exponent{ 0 };
+    std::array<uint64_t, VHACD_GOOGOL_SIZE> m_mantissa{ 0 };
+
+public:
+    static Googol m_zero;
+    static Googol m_one;
+    static Googol m_two;
+    static Googol m_three;
+    static Googol m_half;
+};
+
+Googol Googol::m_zero(double(0.0));
+Googol Googol::m_one(double(1.0));
+Googol Googol::m_two(double(2.0));
+Googol Googol::m_three(double(3.0));
+Googol Googol::m_half(double(0.5));
+
+Googol::Googol(double value)
+{
+    int exp;
+    double mantissa = fabs(frexp(value, &exp));
+
+    m_exponent = exp;
+    m_sign = (value >= 0) ? 0 : 1;
+
+    m_mantissa[0] = uint64_t(double(uint64_t(1) << 62) * mantissa);
+}
+
+Googol::operator double() const
+{
+    double mantissa = (double(1.0) / double(uint64_t(1) << 62)) * double(m_mantissa[0]);
+    mantissa = ldexp(mantissa, m_exponent) * (m_sign ? double(-1.0) : double(1.0));
+    return mantissa;
+}
+
+Googol Googol::operator+(const Googol &A) const
+{
+    Googol tmp;
+    if (m_mantissa[0] && A.m_mantissa[0])
+    {
+        std::array<uint64_t, VHACD_GOOGOL_SIZE> mantissa0;
+        std::array<uint64_t, VHACD_GOOGOL_SIZE> mantissa1;
+        std::array<uint64_t, VHACD_GOOGOL_SIZE> mantissa;
+
+        CopySignedMantissa(mantissa0);
+        A.CopySignedMantissa(mantissa1);
+
+        int exponentDiff = m_exponent - A.m_exponent;
+        int exponent = m_exponent;
+        if (exponentDiff > 0)
+        {
+            ShiftRightMantissa(mantissa1,
+                               exponentDiff);
+        }
+        else if (exponentDiff < 0)
+        {
+            exponent = A.m_exponent;
+            ShiftRightMantissa(mantissa0,
+                               -exponentDiff);
+        }
+
+        uint64_t carrier = 0;
+        for (int i = VHACD_GOOGOL_SIZE - 1; i >= 0; i--)
+        {
+            uint64_t m0 = mantissa0[i];
+            uint64_t m1 = mantissa1[i];
+            mantissa[i] = m0 + m1 + carrier;
+            carrier = CheckCarrier(m0, m1) | CheckCarrier(m0 + m1, carrier);
+        }
+
+        int sign = 0;
+        if (int64_t(mantissa[0]) < 0)
+        {
+            sign = 1;
+            NegateMantissa(mantissa);
+        }
+
+        int bits = NormalizeMantissa(mantissa);
+        if (bits <= (-64 * VHACD_GOOGOL_SIZE))
+        {
+            tmp.m_sign = 0;
+            tmp.m_exponent = 0;
+        }
+        else
+        {
+            tmp.m_sign = sign;
+            tmp.m_exponent = int(exponent + bits);
+        }
+
+        tmp.m_mantissa = mantissa;
+    }
+    else if (A.m_mantissa[0])
+    {
+        tmp = A;
+    }
+    else
+    {
+        tmp = *this;
+    }
+
+    return tmp;
+}
+
+Googol Googol::operator-(const Googol &A) const
+{
+    Googol tmp(A);
+    tmp.m_sign = !tmp.m_sign;
+    return *this + tmp;
+}
+
+Googol Googol::operator*(const Googol &A) const
+{
+    if (m_mantissa[0] && A.m_mantissa[0])
+    {
+        std::array<uint64_t, VHACD_GOOGOL_SIZE * 2> mantissaAcc{ 0 };
+        for (int i = VHACD_GOOGOL_SIZE - 1; i >= 0; i--)
+        {
+            uint64_t a = m_mantissa[i];
+            if (a)
+            {
+                uint64_t mantissaScale[2 * VHACD_GOOGOL_SIZE] = { 0 };
+                A.ScaleMantissa(&mantissaScale[i], a);
+
+                uint64_t carrier = 0;
+                for (int j = 0; j < 2 * VHACD_GOOGOL_SIZE; j++)
+                {
+                    const int k = 2 * VHACD_GOOGOL_SIZE - 1 - j;
+                    uint64_t m0 = mantissaAcc[k];
+                    uint64_t m1 = mantissaScale[k];
+                    mantissaAcc[k] = m0 + m1 + carrier;
+                    carrier = CheckCarrier(m0, m1) | CheckCarrier(m0 + m1, carrier);
+                }
+            }
+        }
+
+        uint64_t carrier = 0;
+        int bits = LeadingZeros(mantissaAcc[0]) - 2;
+        for (int i = 0; i < 2 * VHACD_GOOGOL_SIZE; i++)
+        {
+            const int k = 2 * VHACD_GOOGOL_SIZE - 1 - i;
+            uint64_t a = mantissaAcc[k];
+            mantissaAcc[k] = (a << uint64_t(bits)) | carrier;
+            carrier = a >> uint64_t(64 - bits);
+        }
+
+        int exp = m_exponent + A.m_exponent - (bits - 2);
+
+        Googol tmp;
+        tmp.m_sign = m_sign ^ A.m_sign;
+        tmp.m_exponent = exp;
+        for (std::size_t i = 0; i < tmp.m_mantissa.size(); ++i)
+        {
+            tmp.m_mantissa[i] = mantissaAcc[i];
+        }
+
+        return tmp;
+    }
+    return Googol(double(0.0));
+}
+
+Googol Googol::operator/(const Googol &A) const
+{
+    Googol tmp(double(1.0) / A);
+    tmp = tmp * (m_two - A * tmp);
+    tmp = tmp * (m_two - A * tmp);
+    bool test = false;
+    int passes = 0;
+    do
+    {
+        passes++;
+        Googol tmp0(tmp);
+        tmp = tmp * (m_two - A * tmp);
+        test = tmp0 == tmp;
+    } while (test && (passes < (2 * VHACD_GOOGOL_SIZE)));
+    return (*this) * tmp;
+}
+
+Googol& Googol::operator+=(const Googol &A)
+{
+    *this = *this + A;
+    return *this;
+}
+
+Googol& Googol::operator-=(const Googol &A)
+{
+    *this = *this - A;
+    return *this;
+}
+
+bool Googol::operator>(const Googol &A) const
+{
+    Googol tmp(*this - A);
+    return double(tmp) > double(0.0);
+}
+
+bool Googol::operator>=(const Googol &A) const
+{
+    Googol tmp(*this - A);
+    return double(tmp) >= double(0.0);
+}
+
+bool Googol::operator<(const Googol &A) const
+{
+    Googol tmp(*this - A);
+    return double(tmp) < double(0.0);
+}
+
+bool Googol::operator<=(const Googol &A) const
+{
+    Googol tmp(*this - A);
+    return double(tmp) <= double(0.0);
+}
+
+bool Googol::operator==(const Googol &A) const
+{
+    return    m_sign == A.m_sign
+           && m_exponent == A.m_exponent
+           && m_mantissa == A.m_mantissa;
+}
+
+bool Googol::operator!=(const Googol &A) const
+{
+    return !(*this == A);
+}
+
+Googol Googol::Abs() const
+{
+    Googol tmp(*this);
+    tmp.m_sign = 0;
+    return tmp;
+}
+
+Googol Googol::Floor() const
+{
+    if (m_exponent < 1)
+    {
+        return Googol(double(0.0));
+    }
+    int bits = m_exponent + 2;
+    int start = 0;
+    while (bits >= 64)
+    {
+        bits -= 64;
+        start++;
+    }
+
+    Googol tmp(*this);
+    for (int i = VHACD_GOOGOL_SIZE - 1; i > start; i--)
+    {
+        tmp.m_mantissa[i] = 0;
+    }
+    // some compilers do no like this and I do not know why is that
+    //uint64_t mask = (-1LL) << (64 - bits);
+    uint64_t mask(~0ULL);
+    mask <<= (64 - bits);
+    tmp.m_mantissa[start] &= mask;
+    return tmp;
+}
+
+Googol Googol::InvSqrt() const
+{
+    const Googol& me = *this;
+    Googol x(double(1.0) / sqrt(me));
+
+    int test = 0;
+    int passes = 0;
+    do
+    {
+        passes++;
+        Googol tmp(x);
+        x = m_half * x * (m_three - me * x * x);
+        test = (x != tmp);
+    } while (test && (passes < (2 * VHACD_GOOGOL_SIZE)));
+    return x;
+}
+
+Googol Googol::Sqrt() const
+{
+    return *this * InvSqrt();
+}
+
+void Googol::ToString(char* const string) const
+{
+    Googol tmp(*this);
+    Googol base(double(10.0));
+    while (double(tmp) > double(1.0))
+    {
+        tmp = tmp / base;
+    }
+
+    int index = 0;
+    while (tmp.m_mantissa[0])
+    {
+        tmp = tmp * base;
+        Googol digit(tmp.Floor());
+        tmp -= digit;
+        double val = digit;
+        string[index] = char(val) + '0';
+        index++;
+    }
+    string[index] = 0;
+}
+
+void Googol::NegateMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa) const
+{
+    uint64_t carrier = 1;
+    for (size_t i = mantissa.size() - 1; i >= 0 && i < mantissa.size(); i--)
+    {
+        uint64_t a = ~mantissa[i] + carrier;
+        if (a)
+        {
+            carrier = 0;
+        }
+        mantissa[i] = a;
+    }
+}
+
+void Googol::CopySignedMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa) const
+{
+    mantissa = m_mantissa;
+    if (m_sign)
+    {
+        NegateMantissa(mantissa);
+    }
+}
+
+int Googol::NormalizeMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa) const
+{
+    int bits = 0;
+    if (int64_t(mantissa[0] * 2) < 0)
+    {
+        bits = 1;
+        ShiftRightMantissa(mantissa, 1);
+    }
+    else
+    {
+        while (!mantissa[0] && bits > (-64 * VHACD_GOOGOL_SIZE))
+        {
+            bits -= 64;
+            for (int i = 1; i < VHACD_GOOGOL_SIZE; i++) {
+                mantissa[i - 1] = mantissa[i];
+            }
+            mantissa[VHACD_GOOGOL_SIZE - 1] = 0;
+        }
+
+        if (bits > (-64 * VHACD_GOOGOL_SIZE))
+        {
+            int n = LeadingZeros(mantissa[0]) - 2;
+            if (n > 0)
+            {
+                uint64_t carrier = 0;
+                for (int i = VHACD_GOOGOL_SIZE - 1; i >= 0; i--)
+                {
+                    uint64_t a = mantissa[i];
+                    mantissa[i] = (a << n) | carrier;
+                    carrier = a >> (64 - n);
+                }
+                bits -= n;
+            }
+            else if (n < 0)
+            {
+                // this is very rare but it does happens, whee the leading zeros of the mantissa is an exact multiple of 64
+                uint64_t carrier = 0;
+                int shift = -n;
+                for (int i = 0; i < VHACD_GOOGOL_SIZE; i++)
+                {
+                    uint64_t a = mantissa[i];
+                    mantissa[i] = (a >> shift) | carrier;
+                    carrier = a << (64 - shift);
+                }
+                bits -= n;
+            }
+        }
+    }
+    return bits;
+}
+
+void Googol::ShiftRightMantissa(std::array<uint64_t, VHACD_GOOGOL_SIZE>& mantissa,
+                                int bits) const
+{
+    uint64_t carrier = 0;
+    if (int64_t(mantissa[0]) < int64_t(0))
+    {
+        carrier = uint64_t(-1);
+    }
+
+    while (bits >= 64)
+    {
+        for (int i = VHACD_GOOGOL_SIZE - 2; i >= 0; i--)
+        {
+            mantissa[i + 1] = mantissa[i];
+        }
+        mantissa[0] = carrier;
+        bits -= 64;
+    }
+
+    if (bits > 0)
+    {
+        carrier <<= (64 - bits);
+        for (int i = 0; i < VHACD_GOOGOL_SIZE; i++)
+        {
+            uint64_t a = mantissa[i];
+            mantissa[i] = (a >> bits) | carrier;
+            carrier = a << (64 - bits);
+        }
+    }
+}
+
+uint64_t Googol::CheckCarrier(uint64_t a, uint64_t b) const
+{
+    return ((uint64_t(-1) - b) < a) ? uint64_t(1) : 0;
+}
+
+int Googol::LeadingZeros(uint64_t a) const
+{
+    #define VHACD_COUNTBIT(mask, add)	\
+    do {								\
+        uint64_t test = a & mask;		\
+        n += test ? 0 : add;			\
+        a = test ? test : (a & ~mask);	\
+    } while (false)
+
+    int n = 0;
+    VHACD_COUNTBIT(0xffffffff00000000LL, 32);
+    VHACD_COUNTBIT(0xffff0000ffff0000LL, 16);
+    VHACD_COUNTBIT(0xff00ff00ff00ff00LL, 8);
+    VHACD_COUNTBIT(0xf0f0f0f0f0f0f0f0LL, 4);
+    VHACD_COUNTBIT(0xccccccccccccccccLL, 2);
+    VHACD_COUNTBIT(0xaaaaaaaaaaaaaaaaLL, 1);
+
+    return n;
+}
+
+void Googol::ExtendedMultiply(uint64_t a,
+                              uint64_t b,
+                              uint64_t& high,
+                              uint64_t& low) const
+{
+    uint64_t bLow = b & 0xffffffff;
+    uint64_t bHigh = b >> 32;
+    uint64_t aLow = a & 0xffffffff;
+    uint64_t aHigh = a >> 32;
+
+    uint64_t l = bLow * aLow;
+
+    uint64_t c1 = bHigh * aLow;
+    uint64_t c2 = bLow * aHigh;
+    uint64_t m = c1 + c2;
+    uint64_t carrier = CheckCarrier(c1, c2) << 32;
+
+    uint64_t h = bHigh * aHigh + carrier;
+
+    uint64_t ml = m << 32;
+    uint64_t ll = l + ml;
+    uint64_t mh = (m >> 32) + CheckCarrier(l, ml);
+    uint64_t hh = h + mh;
+
+    low = ll;
+    high = hh;
+}
+
+void Googol::ScaleMantissa(uint64_t* dst,
+                           uint64_t scale) const
+{
+    uint64_t carrier = 0;
+    for (int i = VHACD_GOOGOL_SIZE - 1; i >= 0; i--)
+    {
+        if (m_mantissa[i])
+        {
+            uint64_t low;
+            uint64_t high;
+            ExtendedMultiply(scale,
+                             m_mantissa[i],
+                             high,
+                             low);
+            uint64_t acc = low + carrier;
+            carrier = CheckCarrier(low,
+                                   carrier);
+            carrier += high;
+            dst[i + 1] = acc;
+        }
+        else
+        {
+            dst[i + 1] = carrier;
+            carrier = 0;
+        }
+
+    }
+    dst[0] = carrier;
+}
+
+Googol Determinant3x3(const std::array<VHACD::Vector3<Googol>, 3>& matrix)
+{
+    Googol det = double(0.0);
+
+    Googol a01xa12 = matrix[0].GetY() * matrix[1].GetZ();
+    Googol a02xa11 = matrix[0].GetZ() * matrix[1].GetY();
+    det += (a01xa12 - a02xa11) * matrix[2].GetX();
+
+    Googol a00xa12 = matrix[0].GetX() * matrix[1].GetZ();
+    Googol a02xa10 = matrix[0].GetZ() * matrix[1].GetX();
+    det -= (a00xa12 - a02xa10) * matrix[2].GetY();
+
+    Googol a00xa11 = matrix[0].GetX() * matrix[1].GetY();
+    Googol a01xa10 = matrix[0].GetY() * matrix[1].GetX();
+    det += (a00xa11 - a01xa10) * matrix[2].GetZ();
+    return det;
+}
+
+class HullPlane : public VHACD::Vect3
+{
+public:
+    HullPlane(const HullPlane&) = default;
+    HullPlane(double x,
+              double y,
+              double z,
+              double w);
+
+    HullPlane(const VHACD::Vect3& p,
+              double w);
+
+    HullPlane(const VHACD::Vect3& p0,
+              const VHACD::Vect3& p1,
+              const VHACD::Vect3& p2);
+
+    HullPlane Scale(double s) const;
+
+    HullPlane& operator=(const HullPlane& rhs);
+
+    double Evalue(const VHACD::Vect3 &point) const;
+
+    double& GetW();
+    const double& GetW() const;
+
+private:
+    double m_w;
+};
+
+HullPlane::HullPlane(double x,
+                     double y,
+                     double z,
+                     double w)
+    : VHACD::Vect3(x, y, z)
+    , m_w(w)
+{
+}
+
+HullPlane::HullPlane(const VHACD::Vect3& p,
+                     double w)
+    : VHACD::Vect3(p)
+    , m_w(w)
+{
+}
+
+HullPlane::HullPlane(const VHACD::Vect3& p0,
+                     const VHACD::Vect3& p1,
+                     const VHACD::Vect3& p2)
+    : VHACD::Vect3((p1 - p0).Cross(p2 - p0))
+    , m_w(-Dot(p0))
+{
+}
+
+HullPlane HullPlane::Scale(double s) const
+{
+    return HullPlane(*this * s,
+                     m_w * s);
+}
+
+HullPlane& HullPlane::operator=(const HullPlane& rhs)
+{
+    GetX() = rhs.GetX();
+    GetY() = rhs.GetY();
+    GetZ() = rhs.GetZ();
+    m_w = rhs.m_w;
+    return *this;
+}
+
+double HullPlane::Evalue(const VHACD::Vect3& point) const
+{
+    return Dot(point) + m_w;
+}
+
+double& HullPlane::GetW()
+{
+    return m_w;
+}
+
+const double& HullPlane::GetW() const
+{
+    return m_w;
+}
+
+class ConvexHullFace
+{
+public:
+    ConvexHullFace() = default;
+    double Evalue(const std::vector<VHACD::Vect3>& pointArray,
+                  const VHACD::Vect3& point) const;
+    HullPlane GetPlaneEquation(const std::vector<VHACD::Vect3>& pointArray,
+                               bool& isValid) const;
+
+    std::array<int, 3> m_index;
+private:
+    int m_mark{ 0 };
+    std::array<std::list<ConvexHullFace>::iterator, 3> m_twin;
+
+    friend class ConvexHull;
+};
+
+double ConvexHullFace::Evalue(const std::vector<VHACD::Vect3>& pointArray,
+                              const VHACD::Vect3& point) const
+{
+    const VHACD::Vect3& p0 = pointArray[m_index[0]];
+    const VHACD::Vect3& p1 = pointArray[m_index[1]];
+    const VHACD::Vect3& p2 = pointArray[m_index[2]];
+
+    std::array<VHACD::Vect3, 3> matrix = { p2 - p0, p1 - p0, point - p0 };
+    double error;
+    double det = Determinant3x3(matrix,
+                                error);
+
+    // the code use double, however the threshold for accuracy test is the machine precision of a float.
+    // by changing this to a smaller number, the code should run faster since many small test will be considered valid
+    // the precision must be a power of two no smaller than the machine precision of a double, (1<<48)
+    // float64(1<<30) can be a good value
+
+    // double precision	= double (1.0f) / double (1<<30);
+    double precision = double(1.0) / double(1 << 24);
+    double errbound = error * precision;
+    if (fabs(det) > errbound)
+    {
+        return det;
+    }
+
+    const VHACD::Vector3<Googol> p0g = pointArray[m_index[0]];
+    const VHACD::Vector3<Googol> p1g = pointArray[m_index[1]];
+    const VHACD::Vector3<Googol> p2g = pointArray[m_index[2]];
+    const VHACD::Vector3<Googol> pointg = point;
+    std::array<VHACD::Vector3<Googol>, 3> exactMatrix = { p2g - p0g, p1g - p0g, pointg - p0g };
+    return Determinant3x3(exactMatrix);
+}
+
+HullPlane ConvexHullFace::GetPlaneEquation(const std::vector<VHACD::Vect3>& pointArray,
+                                           bool& isvalid) const
+{
+    const VHACD::Vect3& p0 = pointArray[m_index[0]];
+    const VHACD::Vect3& p1 = pointArray[m_index[1]];
+    const VHACD::Vect3& p2 = pointArray[m_index[2]];
+    HullPlane plane(p0, p1, p2);
+
+    isvalid = false;
+    double mag2 = plane.Dot(plane);
+    if (mag2 > double(1.0e-16))
+    {
+        isvalid = true;
+        plane = plane.Scale(double(1.0) / sqrt(mag2));
+    }
+    return plane;
+}
+
+class ConvexHullVertex : public VHACD::Vect3
+{
+public:
+    ConvexHullVertex() = default;
+    ConvexHullVertex(const ConvexHullVertex&) = default;
+    ConvexHullVertex& operator=(const ConvexHullVertex& rhs) = default;
+    using VHACD::Vect3::operator=;
+
+    int m_mark{ 0 };
+};
+
+
+class ConvexHullAABBTreeNode
+{
+    #define VHACD_CONVEXHULL_3D_VERTEX_CLUSTER_SIZE 8
+public:
+    ConvexHullAABBTreeNode() = default;
+    ConvexHullAABBTreeNode(ConvexHullAABBTreeNode* parent);
+
+    VHACD::Vect3 m_box[2];
+    ConvexHullAABBTreeNode* m_left{ nullptr };
+    ConvexHullAABBTreeNode* m_right{ nullptr };
+    ConvexHullAABBTreeNode* m_parent{ nullptr };
+
+    size_t m_count;
+    std::array<size_t, VHACD_CONVEXHULL_3D_VERTEX_CLUSTER_SIZE> m_indices;
+};
+
+ConvexHullAABBTreeNode::ConvexHullAABBTreeNode(ConvexHullAABBTreeNode* parent)
+    : m_parent(parent)
+{
+}
+
+class ConvexHull
+{
+    class ndNormalMap;
+
+public:
+    ConvexHull(const ConvexHull& source);
+    ConvexHull(const std::vector<::VHACD::Vertex>& vertexCloud,
+               double distTol,
+               int maxVertexCount = 0x7fffffff);
+    ~ConvexHull() = default;
+
+    const std::vector<VHACD::Vect3>& GetVertexPool() const;
+
+    const std::list<ConvexHullFace>& GetList() const { return m_list; }
+
+private:
+    void BuildHull(const std::vector<::VHACD::Vertex>& vertexCloud,
+                   double distTol,
+                   int maxVertexCount);
+
+    void GetUniquePoints(std::vector<ConvexHullVertex>& points);
+    int InitVertexArray(std::vector<ConvexHullVertex>& points,
+                        NodeBundle<ConvexHullAABBTreeNode>& memoryPool);
+
+    ConvexHullAABBTreeNode* BuildTreeNew(std::vector<ConvexHullVertex>& points,
+                                         std::vector<ConvexHullAABBTreeNode>& memoryPool) const;
+    ConvexHullAABBTreeNode* BuildTreeOld(std::vector<ConvexHullVertex>& points,
+                                         NodeBundle<ConvexHullAABBTreeNode>& memoryPool);
+    ConvexHullAABBTreeNode* BuildTreeRecurse(ConvexHullAABBTreeNode* const parent,
+                                             ConvexHullVertex* const points,
+                                             int count,
+                                             int baseIndex,
+                                             NodeBundle<ConvexHullAABBTreeNode>& memoryPool) const;
+
+    std::list<ConvexHullFace>::iterator AddFace(int i0,
+                                                int i1,
+                                                int i2);
+
+    void CalculateConvexHull3D(ConvexHullAABBTreeNode* vertexTree,
+                               std::vector<ConvexHullVertex>& points,
+                               int count,
+                               double distTol,
+                               int maxVertexCount);
+
+    int SupportVertex(ConvexHullAABBTreeNode** const tree,
+                      const std::vector<ConvexHullVertex>& points,
+                      const VHACD::Vect3& dir,
+                      const bool removeEntry = true) const;
+    double TetrahedrumVolume(const VHACD::Vect3& p0,
+                             const VHACD::Vect3& p1,
+                             const VHACD::Vect3& p2,
+                             const VHACD::Vect3& p3) const;
+
+    std::list<ConvexHullFace> m_list;
+    VHACD::Vect3 m_aabbP0{ 0 };
+    VHACD::Vect3 m_aabbP1{ 0 };
+    double m_diag{ 0.0 };
+    std::vector<VHACD::Vect3> m_points;
+};
+
+class ConvexHull::ndNormalMap
+{
+public:
+    ndNormalMap();
+
+    static const ndNormalMap& GetNormalMap();
+
+    void TessellateTriangle(int level,
+                            const VHACD::Vect3& p0,
+                            const VHACD::Vect3& p1,
+                            const VHACD::Vect3& p2,
+                            int& count);
+
+    std::array<VHACD::Vect3, 128> m_normal;
+    int m_count{ 128 };
+};
+
+const ConvexHull::ndNormalMap& ConvexHull::ndNormalMap::GetNormalMap()
+{
+    static ndNormalMap normalMap;
+    return normalMap;
+}
+
+void ConvexHull::ndNormalMap::TessellateTriangle(int level,
+                                                 const VHACD::Vect3& p0,
+                                                 const VHACD::Vect3& p1,
+                                                 const VHACD::Vect3& p2,
+                                                 int& count)
+{
+    if (level)
+    {
+        assert(fabs(p0.Dot(p0) - double(1.0)) < double(1.0e-4));
+        assert(fabs(p1.Dot(p1) - double(1.0)) < double(1.0e-4));
+        assert(fabs(p2.Dot(p2) - double(1.0)) < double(1.0e-4));
+        VHACD::Vect3 p01(p0 + p1);
+        VHACD::Vect3 p12(p1 + p2);
+        VHACD::Vect3 p20(p2 + p0);
+
+        p01 = p01 * (double(1.0) / p01.GetNorm());
+        p12 = p12 * (double(1.0) / p12.GetNorm());
+        p20 = p20 * (double(1.0) / p20.GetNorm());
+
+        assert(fabs(p01.GetNormSquared() - double(1.0)) < double(1.0e-4));
+        assert(fabs(p12.GetNormSquared() - double(1.0)) < double(1.0e-4));
+        assert(fabs(p20.GetNormSquared() - double(1.0)) < double(1.0e-4));
+
+        TessellateTriangle(level - 1, p0,  p01, p20, count);
+        TessellateTriangle(level - 1, p1,  p12, p01, count);
+        TessellateTriangle(level - 1, p2,  p20, p12, count);
+        TessellateTriangle(level - 1, p01, p12, p20, count);
+    }
+    else
+    {
+        /*
+         * This is just m_normal[index] = n.Normalized(), but due to tiny floating point errors, causes
+         * different outputs, so I'm leaving it
+         */
+        HullPlane n(p0, p1, p2);
+        n = n.Scale(double(1.0) / n.GetNorm());
+        n.GetW() = double(0.0);
+        int index = dBitReversal(count,
+                                 int(m_normal.size()));
+        m_normal[index] = n;
+        count++;
+        assert(count <= int(m_normal.size()));
+    }
+}
+
+ConvexHull::ndNormalMap::ndNormalMap()
+{
+    VHACD::Vect3 p0(double( 1.0), double( 0.0), double( 0.0));
+    VHACD::Vect3 p1(double(-1.0), double( 0.0), double( 0.0));
+    VHACD::Vect3 p2(double( 0.0), double( 1.0), double( 0.0));
+    VHACD::Vect3 p3(double( 0.0), double(-1.0), double( 0.0));
+    VHACD::Vect3 p4(double( 0.0), double( 0.0), double( 1.0));
+    VHACD::Vect3 p5(double( 0.0), double( 0.0), double(-1.0));
+
+    int count = 0;
+    int subdivisions = 2;
+    TessellateTriangle(subdivisions, p4, p0, p2, count);
+    TessellateTriangle(subdivisions, p0, p5, p2, count);
+    TessellateTriangle(subdivisions, p5, p1, p2, count);
+    TessellateTriangle(subdivisions, p1, p4, p2, count);
+    TessellateTriangle(subdivisions, p0, p4, p3, count);
+    TessellateTriangle(subdivisions, p5, p0, p3, count);
+    TessellateTriangle(subdivisions, p1, p5, p3, count);
+    TessellateTriangle(subdivisions, p4, p1, p3, count);
+}
+
+ConvexHull::ConvexHull(const std::vector<::VHACD::Vertex>& vertexCloud,
+                       double distTol,
+                       int maxVertexCount)
+{
+    if (vertexCloud.size() >= 4)
+    {
+        BuildHull(vertexCloud,
+                  distTol,
+                  maxVertexCount);
+    }
+}
+
+const std::vector<VHACD::Vect3>& ConvexHull::GetVertexPool() const
+{
+    return m_points;
+}
+
+void ConvexHull::BuildHull(const std::vector<::VHACD::Vertex>& vertexCloud,
+                           double distTol,
+                           int maxVertexCount)
+{
+    size_t treeCount = vertexCloud.size() / (VHACD_CONVEXHULL_3D_VERTEX_CLUSTER_SIZE >> 1);
+    treeCount = std::max(treeCount, size_t(4)) * 2;
+
+    std::vector<ConvexHullVertex> points(vertexCloud.size());
+    /*
+     * treePool provides a memory pool for the AABB tree
+     * Each node is either a leaf or non-leaf node
+     * Non-leaf nodes have up to 8 vertices
+     * Vertices are specified by the m_indices array and are accessed via the points array
+     *
+     * Later on in ConvexHull::SupportVertex, the tree is used directly
+     * It differentiates between ConvexHullAABBTreeNode and ConvexHull3DPointCluster by whether the m_left and m_right
+     * pointers are null or not
+     *
+     * Pointers have to be stable
+     */
+    NodeBundle<ConvexHullAABBTreeNode> treePool;
+    for (size_t i = 0; i < vertexCloud.size(); ++i)
+    {
+        points[i] = VHACD::Vect3(vertexCloud[i]);
+    }
+    int count = InitVertexArray(points,
+                                treePool);
+
+    if (m_points.size() >= 4)
+    {
+        CalculateConvexHull3D(&treePool.GetFirstNode(),
+                              points,
+                              count,
+                              distTol,
+                              maxVertexCount);
+    }
+}
+
+void ConvexHull::GetUniquePoints(std::vector<ConvexHullVertex>& points)
+{
+    class CompareVertex
+    {
+        public:
+        int Compare(const ConvexHullVertex& elementA, const ConvexHullVertex& elementB) const
+        {
+            for (int i = 0; i < 3; i++)
+            {
+                if (elementA[i] < elementB[i])
+                {
+                    return -1;
+                }
+                else if (elementA[i] > elementB[i])
+                {
+                    return 1;
+                }
+            }
+            return 0;
+        }
+    };
+
+    int count = int(points.size());
+    Sort<ConvexHullVertex, CompareVertex>(points.data(),
+                                          count);
+
+    int indexCount = 0;
+    CompareVertex compareVertex;
+    for (int i = 1; i < count; ++i)
+    {
+        for (; i < count; ++i)
+        {
+            if (compareVertex.Compare(points[indexCount], points[i]))
+            {
+                indexCount++;
+                points[indexCount] = points[i];
+                break;
+            }
+        }
+    }
+    points.resize(indexCount + 1);
+}
+
+ConvexHullAABBTreeNode* ConvexHull::BuildTreeRecurse(ConvexHullAABBTreeNode* const parent,
+                                                     ConvexHullVertex* const points,
+                                                     int count,
+                                                     int baseIndex,
+                                                     NodeBundle<ConvexHullAABBTreeNode>& memoryPool) const
+{
+    ConvexHullAABBTreeNode* tree = nullptr;
+
+    assert(count);
+    VHACD::Vect3 minP( double(1.0e15));
+    VHACD::Vect3 maxP(-double(1.0e15));
+    if (count <= VHACD_CONVEXHULL_3D_VERTEX_CLUSTER_SIZE)
+    {
+        ConvexHullAABBTreeNode& clump = memoryPool.GetNextNode();
+
+        clump.m_count = count;
+        for (int i = 0; i < count; ++i)
+        {
+            clump.m_indices[i] = i + baseIndex;
+
+            const VHACD::Vect3& p = points[i];
+            minP = minP.CWiseMin(p);
+            maxP = maxP.CWiseMax(p);
+        }
+
+        clump.m_left = nullptr;
+        clump.m_right = nullptr;
+        tree = &clump;
+    }
+    else
+    {
+        VHACD::Vect3 median(0);
+        VHACD::Vect3 varian(0);
+        for (int i = 0; i < count; ++i)
+        {
+            const VHACD::Vect3& p = points[i];
+            minP = minP.CWiseMin(p);
+            maxP = maxP.CWiseMax(p);
+            median += p;
+            varian += p.CWiseMul(p);
+        }
+
+        varian = varian * double(count) - median.CWiseMul(median);
+        int index = 0;
+        double maxVarian = double(-1.0e10);
+        for (int i = 0; i < 3; ++i)
+        {
+            if (varian[i] > maxVarian)
+            {
+                index = i;
+                maxVarian = varian[i];
+            }
+        }
+        VHACD::Vect3 center(median * (double(1.0) / double(count)));
+
+        double test = center[index];
+
+        int i0 = 0;
+        int i1 = count - 1;
+        do
+        {
+            for (; i0 <= i1; i0++)
+            {
+                double val = points[i0][index];
+                if (val > test)
+                {
+                    break;
+                }
+            }
+
+            for (; i1 >= i0; i1--)
+            {
+                double val = points[i1][index];
+                if (val < test)
+                {
+                    break;
+                }
+            }
+
+            if (i0 < i1)
+            {
+                std::swap(points[i0],
+                          points[i1]);
+                i0++;
+                i1--;
+            }
+        } while (i0 <= i1);
+
+        if (i0 == 0)
+        {
+            i0 = count / 2;
+        }
+        if (i0 >= (count - 1))
+        {
+            i0 = count / 2;
+        }
+
+        tree = &memoryPool.GetNextNode();
+
+        assert(i0);
+        assert(count - i0);
+
+        tree->m_left = BuildTreeRecurse(tree,
+                                        points,
+                                        i0,
+                                        baseIndex,
+                                        memoryPool);
+        tree->m_right = BuildTreeRecurse(tree,
+                                         &points[i0],
+                                         count - i0,
+                                         i0 + baseIndex,
+                                         memoryPool);
+    }
+
+    assert(tree);
+    tree->m_parent = parent;
+    /*
+     * WARNING: Changing the compiler conversion of 1.0e-3f changes the results of the convex decomposition
+     * Inflate the tree's bounding box slightly
+     */
+    tree->m_box[0] = minP - VHACD::Vect3(double(1.0e-3f));
+    tree->m_box[1] = maxP + VHACD::Vect3(double(1.0e-3f));
+    return tree;
+}
+
+ConvexHullAABBTreeNode* ConvexHull::BuildTreeOld(std::vector<ConvexHullVertex>& points,
+                                                 NodeBundle<ConvexHullAABBTreeNode>& memoryPool)
+{
+    GetUniquePoints(points);
+    int count = int(points.size());
+    if (count < 4)
+    {
+        return nullptr;
+    }
+    return BuildTreeRecurse(nullptr,
+                            points.data(),
+                            count,
+                            0,
+                            memoryPool);
+}
+
+ConvexHullAABBTreeNode* ConvexHull::BuildTreeNew(std::vector<ConvexHullVertex>& points,
+                                                 std::vector<ConvexHullAABBTreeNode>& memoryPool) const
+{
+    class dCluster
+    {
+        public:
+        VHACD::Vect3 m_sum{ double(0.0) };
+        VHACD::Vect3 m_sum2{ double(0.0) };
+        int m_start{ 0 };
+        int m_count{ 0 };
+    };
+
+    dCluster firstCluster;
+    firstCluster.m_count = int(points.size());
+
+    for (int i = 0; i < firstCluster.m_count; ++i)
+    {
+        const VHACD::Vect3& p = points[i];
+        firstCluster.m_sum += p;
+        firstCluster.m_sum2 += p.CWiseMul(p);
+    }
+
+    int baseCount = 0;
+    const int clusterSize = 16;
+
+    if (firstCluster.m_count > clusterSize)
+    {
+        dCluster spliteStack[128];
+        spliteStack[0] = firstCluster;
+        size_t stack = 1;
+
+        while (stack)
+        {
+            stack--;
+            dCluster cluster (spliteStack[stack]);
+
+            const VHACD::Vect3 origin(cluster.m_sum * (double(1.0) / cluster.m_count));
+            const VHACD::Vect3 variance2(cluster.m_sum2 * (double(1.0) / cluster.m_count) - origin.CWiseMul(origin));
+            double maxVariance2 = variance2.MaxCoeff();
+
+            if (   (cluster.m_count <= clusterSize)
+                || (stack > (sizeof(spliteStack) / sizeof(spliteStack[0]) - 4))
+                || (maxVariance2 < 1.e-4f))
+            {
+                // no sure if this is beneficial,
+                // the array is so small that seem too much overhead
+                //int maxIndex = 0;
+                //double min_x = 1.0e20f;
+                //for (int i = 0; i < cluster.m_count; ++i)
+                //{
+                //	if (points[cluster.m_start + i].getX() < min_x)
+                //	{
+                //		maxIndex = i;
+                //		min_x = points[cluster.m_start + i].getX();
+                //	}
+                //}
+                //Swap(points[cluster.m_start], points[cluster.m_start + maxIndex]);
+                //
+                //for (int i = 2; i < cluster.m_count; ++i)
+                //{
+                //	int j = i;
+                //	ConvexHullVertex tmp(points[cluster.m_start + i]);
+                //	for (; points[cluster.m_start + j - 1].getX() > tmp.getX(); --j)
+                //	{
+                //		assert(j > 0);
+                //		points[cluster.m_start + j] = points[cluster.m_start + j - 1];
+                //	}
+                //	points[cluster.m_start + j] = tmp;
+                //}
+
+                int count = cluster.m_count;
+                for (int i = cluster.m_count - 1; i > 0; --i)
+                {
+                    for (int j = i - 1; j >= 0; --j)
+                    {
+                        VHACD::Vect3 error(points[cluster.m_start + j] - points[cluster.m_start + i]);
+                        double mag2 = error.Dot(error);
+                        if (mag2 < double(1.0e-6))
+                        {
+                            points[cluster.m_start + j] = points[cluster.m_start + i];
+                            count--;
+                            break;
+                        }
+                    }
+                }
+
+                assert(baseCount <= cluster.m_start);
+                for (int i = 0; i < count; ++i)
+                {
+                    points[baseCount] = points[cluster.m_start + i];
+                    baseCount++;
+                }
+            }
+            else
+            {
+                const int firstSortAxis = variance2.LongestAxis();
+                double axisVal = origin[firstSortAxis];
+
+                int i0 = 0;
+                int i1 = cluster.m_count - 1;
+
+                const int start = cluster.m_start;
+                while (i0 < i1)
+                {
+                    while (   (points[start + i0][firstSortAxis] <= axisVal)
+                           && (i0 < i1))
+                    {
+                        ++i0;
+                    };
+
+                    while (   (points[start + i1][firstSortAxis] > axisVal)
+                           && (i0 < i1))
+                    {
+                        --i1;
+                    }
+
+                    assert(i0 <= i1);
+                    if (i0 < i1)
+                    {
+                        std::swap(points[start + i0],
+                                  points[start + i1]);
+                        ++i0;
+                        --i1;
+                    }
+                }
+
+                while (   (points[start + i0][firstSortAxis] <= axisVal)
+                       && (i0 < cluster.m_count))
+                {
+                    ++i0;
+                };
+
+                #ifdef _DEBUG
+                for (int i = 0; i < i0; ++i)
+                {
+                    assert(points[start + i][firstSortAxis] <= axisVal);
+                }
+
+                for (int i = i0; i < cluster.m_count; ++i)
+                {
+                    assert(points[start + i][firstSortAxis] > axisVal);
+                }
+                #endif
+
+                VHACD::Vect3 xc(0);
+                VHACD::Vect3 x2c(0);
+                for (int i = 0; i < i0; ++i)
+                {
+                    const VHACD::Vect3& x = points[start + i];
+                    xc += x;
+                    x2c += x.CWiseMul(x);
+                }
+
+                dCluster cluster_i1(cluster);
+                cluster_i1.m_start = start + i0;
+                cluster_i1.m_count = cluster.m_count - i0;
+                cluster_i1.m_sum -= xc;
+                cluster_i1.m_sum2 -= x2c;
+                spliteStack[stack] = cluster_i1;
+                assert(cluster_i1.m_count > 0);
+                stack++;
+
+                dCluster cluster_i0(cluster);
+                cluster_i0.m_start = start;
+                cluster_i0.m_count = i0;
+                cluster_i0.m_sum = xc;
+                cluster_i0.m_sum2 = x2c;
+                assert(cluster_i0.m_count > 0);
+                spliteStack[stack] = cluster_i0;
+                stack++;
+            }
+        }
+    }
+
+    points.resize(baseCount);
+    if (baseCount < 4)
+    {
+        return nullptr;
+    }
+
+    VHACD::Vect3 sum(0);
+    VHACD::Vect3 sum2(0);
+    VHACD::Vect3 minP(double( 1.0e15));
+    VHACD::Vect3 maxP(double(-1.0e15));
+    class dTreeBox
+    {
+        public:
+        VHACD::Vect3 m_min;
+        VHACD::Vect3 m_max;
+        VHACD::Vect3 m_sum;
+        VHACD::Vect3 m_sum2;
+        ConvexHullAABBTreeNode* m_parent;
+        ConvexHullAABBTreeNode** m_child;
+        int m_start;
+        int m_count;
+    };
+
+    for (int i = 0; i < baseCount; ++i)
+    {
+        const VHACD::Vect3& p = points[i];
+        sum += p;
+        sum2 += p.CWiseMul(p);
+        minP = minP.CWiseMin(p);
+        maxP = maxP.CWiseMax(p);
+    }
+
+    dTreeBox treeBoxStack[128];
+    treeBoxStack[0].m_start = 0;
+    treeBoxStack[0].m_count = baseCount;
+    treeBoxStack[0].m_sum = sum;
+    treeBoxStack[0].m_sum2 = sum2;
+    treeBoxStack[0].m_min = minP;
+    treeBoxStack[0].m_max = maxP;
+    treeBoxStack[0].m_child = nullptr;
+    treeBoxStack[0].m_parent = nullptr;
+
+    int stack = 1;
+    ConvexHullAABBTreeNode* root = nullptr;
+    while (stack)
+    {
+        stack--;
+        dTreeBox box(treeBoxStack[stack]);
+        if (box.m_count <= VHACD_CONVEXHULL_3D_VERTEX_CLUSTER_SIZE)
+        {
+            assert(memoryPool.size() != memoryPool.capacity()
+                   && "memoryPool is going to be reallocated, pointers will be invalid");
+            memoryPool.emplace_back();
+            ConvexHullAABBTreeNode& clump = memoryPool.back();
+
+            clump.m_count = box.m_count;
+            for (int i = 0; i < box.m_count; ++i)
+            {
+                clump.m_indices[i] = i + box.m_start;
+            }
+            clump.m_box[0] = box.m_min;
+            clump.m_box[1] = box.m_max;
+
+            if (box.m_child)
+            {
+                *box.m_child = &clump;
+            }
+
+            if (!root)
+            {
+                root = &clump;
+            }
+        }
+        else
+        {
+            const VHACD::Vect3 origin(box.m_sum * (double(1.0) / box.m_count));
+            const VHACD::Vect3 variance2(box.m_sum2 * (double(1.0) / box.m_count) - origin.CWiseMul(origin));
+
+            int firstSortAxis = 0;
+            if ((variance2.GetY() >= variance2.GetX()) && (variance2.GetY() >= variance2.GetZ()))
+            {
+                firstSortAxis = 1;
+            }
+            else if ((variance2.GetZ() >= variance2.GetX()) && (variance2.GetZ() >= variance2.GetY()))
+            {
+                firstSortAxis = 2;
+            }
+            double axisVal = origin[firstSortAxis];
+
+            int i0 = 0;
+            int i1 = box.m_count - 1;
+
+            const int start = box.m_start;
+            while (i0 < i1)
+            {
+                while ((points[start + i0][firstSortAxis] <= axisVal) && (i0 < i1))
+                {
+                    ++i0;
+                };
+
+                while ((points[start + i1][firstSortAxis] > axisVal) && (i0 < i1))
+                {
+                    --i1;
+                }
+
+                assert(i0 <= i1);
+                if (i0 < i1)
+                {
+                    std::swap(points[start + i0],
+                              points[start + i1]);
+                    ++i0;
+                    --i1;
+                }
+            }
+
+            while ((points[start + i0][firstSortAxis] <= axisVal) && (i0 < box.m_count))
+            {
+                ++i0;
+            };
+
+            #ifdef _DEBUG
+            for (int i = 0; i < i0; ++i)
+            {
+                assert(points[start + i][firstSortAxis] <= axisVal);
+            }
+
+            for (int i = i0; i < box.m_count; ++i)
+            {
+                assert(points[start + i][firstSortAxis] > axisVal);
+            }
+            #endif
+
+            assert(memoryPool.size() != memoryPool.capacity()
+                   && "memoryPool is going to be reallocated, pointers will be invalid");
+            memoryPool.emplace_back();
+            ConvexHullAABBTreeNode& node = memoryPool.back();
+
+            node.m_box[0] = box.m_min;
+            node.m_box[1] = box.m_max;
+            if (box.m_child)
+            {
+                *box.m_child = &node;
+            }
+
+            if (!root)
+            {
+                root = &node;
+            }
+
+            {
+                VHACD::Vect3 xc(0);
+                VHACD::Vect3 x2c(0);
+                VHACD::Vect3 p0(double( 1.0e15));
+                VHACD::Vect3 p1(double(-1.0e15));
+                for (int i = i0; i < box.m_count; ++i)
+                {
+                    const VHACD::Vect3& p = points[start + i];
+                    xc += p;
+                    x2c += p.CWiseMul(p);
+                    p0 = p0.CWiseMin(p);
+                    p1 = p1.CWiseMax(p);
+                }
+
+                dTreeBox cluster_i1(box);
+                cluster_i1.m_start = start + i0;
+                cluster_i1.m_count = box.m_count - i0;
+                cluster_i1.m_sum = xc;
+                cluster_i1.m_sum2 = x2c;
+                cluster_i1.m_min = p0;
+                cluster_i1.m_max = p1;
+                cluster_i1.m_parent = &node;
+                cluster_i1.m_child = &node.m_right;
+                treeBoxStack[stack] = cluster_i1;
+                assert(cluster_i1.m_count > 0);
+                stack++;
+            }
+
+            {
+                VHACD::Vect3 xc(0);
+                VHACD::Vect3 x2c(0);
+                VHACD::Vect3 p0(double( 1.0e15));
+                VHACD::Vect3 p1(double(-1.0e15));
+                for (int i = 0; i < i0; ++i)
+                {
+                    const VHACD::Vect3& p = points[start + i];
+                    xc += p;
+                    x2c += p.CWiseMul(p);
+                    p0 = p0.CWiseMin(p);
+                    p1 = p1.CWiseMax(p);
+                }
+
+                dTreeBox cluster_i0(box);
+                cluster_i0.m_start = start;
+                cluster_i0.m_count = i0;
+                cluster_i0.m_min = p0;
+                cluster_i0.m_max = p1;
+                cluster_i0.m_sum = xc;
+                cluster_i0.m_sum2 = x2c;
+                cluster_i0.m_parent = &node;
+                cluster_i0.m_child = &node.m_left;
+                assert(cluster_i0.m_count > 0);
+                treeBoxStack[stack] = cluster_i0;
+                stack++;
+            }
+        }
+    }
+
+    return root;
+}
+
+int ConvexHull::SupportVertex(ConvexHullAABBTreeNode** const treePointer,
+                              const std::vector<ConvexHullVertex>& points,
+                              const VHACD::Vect3& dirPlane,
+                              const bool removeEntry) const
+{
+#define VHACD_STACK_DEPTH_3D 64
+    double aabbProjection[VHACD_STACK_DEPTH_3D];
+    ConvexHullAABBTreeNode* stackPool[VHACD_STACK_DEPTH_3D];
+
+    VHACD::Vect3 dir(dirPlane);
+
+    int index = -1;
+    int stack = 1;
+    stackPool[0] = *treePointer;
+    aabbProjection[0] = double(1.0e20);
+    double maxProj = double(-1.0e20);
+    int ix = (dir[0] > double(0.0)) ? 1 : 0;
+    int iy = (dir[1] > double(0.0)) ? 1 : 0;
+    int iz = (dir[2] > double(0.0)) ? 1 : 0;
+    while (stack)
+    {
+        stack--;
+        double boxSupportValue = aabbProjection[stack];
+        if (boxSupportValue > maxProj)
+        {
+            ConvexHullAABBTreeNode* me = stackPool[stack];
+
+            /*
+             * If the node is not a leaf node...
+             */
+            if (me->m_left && me->m_right)
+            {
+                const VHACD::Vect3 leftSupportPoint(me->m_left->m_box[ix].GetX(),
+                                                    me->m_left->m_box[iy].GetY(),
+                                                    me->m_left->m_box[iz].GetZ());
+                double leftSupportDist = leftSupportPoint.Dot(dir);
+
+                const VHACD::Vect3 rightSupportPoint(me->m_right->m_box[ix].GetX(),
+                                                     me->m_right->m_box[iy].GetY(),
+                                                     me->m_right->m_box[iz].GetZ());
+                double rightSupportDist = rightSupportPoint.Dot(dir);
+
+                /*
+                 * ...push the shorter side first
+                 * So we can explore the tree in the larger side first
+                 */
+                if (rightSupportDist >= leftSupportDist)
+                {
+                    aabbProjection[stack] = leftSupportDist;
+                    stackPool[stack] = me->m_left;
+                    stack++;
+                    assert(stack < VHACD_STACK_DEPTH_3D);
+                    aabbProjection[stack] = rightSupportDist;
+                    stackPool[stack] = me->m_right;
+                    stack++;
+                    assert(stack < VHACD_STACK_DEPTH_3D);
+                }
+                else
+                {
+                    aabbProjection[stack] = rightSupportDist;
+                    stackPool[stack] = me->m_right;
+                    stack++;
+                    assert(stack < VHACD_STACK_DEPTH_3D);
+                    aabbProjection[stack] = leftSupportDist;
+                    stackPool[stack] = me->m_left;
+                    stack++;
+                    assert(stack < VHACD_STACK_DEPTH_3D);
+                }
+            }
+            /*
+             * If it is a node...
+             */
+            else
+            {
+                ConvexHullAABBTreeNode* cluster = me;
+                for (size_t i = 0; i < cluster->m_count; ++i)
+                {
+                    const ConvexHullVertex& p = points[cluster->m_indices[i]];
+                    assert(p.GetX() >= cluster->m_box[0].GetX());
+                    assert(p.GetX() <= cluster->m_box[1].GetX());
+                    assert(p.GetY() >= cluster->m_box[0].GetY());
+                    assert(p.GetY() <= cluster->m_box[1].GetY());
+                    assert(p.GetZ() >= cluster->m_box[0].GetZ());
+                    assert(p.GetZ() <= cluster->m_box[1].GetZ());
+                    if (!p.m_mark)
+                    {
+                        //assert(p.m_w == double(0.0f));
+                        double dist = p.Dot(dir);
+                        if (dist > maxProj)
+                        {
+                            maxProj = dist;
+                            index = cluster->m_indices[i];
+                        }
+                    }
+                    else if (removeEntry)
+                    {
+                        cluster->m_indices[i] = cluster->m_indices[cluster->m_count - 1];
+                        cluster->m_count = cluster->m_count - 1;
+                        i--;
+                    }
+                }
+
+                if (cluster->m_count == 0)
+                {
+                    ConvexHullAABBTreeNode* const parent = cluster->m_parent;
+                    if (parent)
+                    {
+                        ConvexHullAABBTreeNode* const sibling = (parent->m_left != cluster) ? parent->m_left : parent->m_right;
+                        assert(sibling != cluster);
+                        ConvexHullAABBTreeNode* const grandParent = parent->m_parent;
+                        if (grandParent)
+                        {
+                            sibling->m_parent = grandParent;
+                            if (grandParent->m_right == parent)
+                            {
+                                grandParent->m_right = sibling;
+                            }
+                            else
+                            {
+                                grandParent->m_left = sibling;
+                            }
+                        }
+                        else
+                        {
+                            sibling->m_parent = nullptr;
+                            *treePointer = sibling;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    assert(index != -1);
+    return index;
+}
+
+double ConvexHull::TetrahedrumVolume(const VHACD::Vect3& p0,
+                                     const VHACD::Vect3& p1,
+                                     const VHACD::Vect3& p2,
+                                     const VHACD::Vect3& p3) const
+{
+    const VHACD::Vect3 p1p0(p1 - p0);
+    const VHACD::Vect3 p2p0(p2 - p0);
+    const VHACD::Vect3 p3p0(p3 - p0);
+    return p3p0.Dot(p1p0.Cross(p2p0));
+}
+
+int ConvexHull::InitVertexArray(std::vector<ConvexHullVertex>& points,
+                                NodeBundle<ConvexHullAABBTreeNode>& memoryPool)
+//                                 std::vector<ConvexHullAABBTreeNode>& memoryPool)
+{
+#if 1
+    ConvexHullAABBTreeNode* tree = BuildTreeOld(points,
+                                                memoryPool);
+#else
+    ConvexHullAABBTreeNode* tree = BuildTreeNew(points, (char**)&memoryPool, maxMemSize);
+#endif
+    int count = int(points.size());
+    if (count < 4)
+    {
+        m_points.resize(0);
+        return 0;
+    }
+
+    m_points.resize(count);
+    m_aabbP0 = tree->m_box[0];
+    m_aabbP1 = tree->m_box[1];
+
+    VHACD::Vect3 boxSize(tree->m_box[1] - tree->m_box[0]);
+    m_diag = boxSize.GetNorm();
+    const ndNormalMap& normalMap = ndNormalMap::GetNormalMap();
+
+    int index0 = SupportVertex(&tree,
+                               points,
+                               normalMap.m_normal[0]);
+    m_points[0] = points[index0];
+    points[index0].m_mark = 1;
+
+    bool validTetrahedrum = false;
+    VHACD::Vect3 e1(double(0.0));
+    for (int i = 1; i < normalMap.m_count; ++i)
+    {
+        int index = SupportVertex(&tree,
+                                  points,
+                                  normalMap.m_normal[i]);
+        assert(index >= 0);
+
+        e1 = points[index] - m_points[0];
+        double error2 = e1.GetNormSquared();
+        if (error2 > (double(1.0e-4) * m_diag * m_diag))
+        {
+            m_points[1] = points[index];
+            points[index].m_mark = 1;
+            validTetrahedrum = true;
+            break;
+        }
+    }
+    if (!validTetrahedrum)
+    {
+        m_points.resize(0);
+        assert(0);
+        return count;
+    }
+
+    validTetrahedrum = false;
+    VHACD::Vect3 e2(double(0.0));
+    VHACD::Vect3 normal(double(0.0));
+    for (int i = 2; i < normalMap.m_count; ++i)
+    {
+        int index = SupportVertex(&tree,
+                                  points,
+                                  normalMap.m_normal[i]);
+        assert(index >= 0);
+        e2 = points[index] - m_points[0];
+        normal = e1.Cross(e2);
+        double error2 = normal.GetNorm();
+        if (error2 > (double(1.0e-4) * m_diag * m_diag))
+        {
+            m_points[2] = points[index];
+            points[index].m_mark = 1;
+            validTetrahedrum = true;
+            break;
+        }
+    }
+
+    if (!validTetrahedrum)
+    {
+        m_points.resize(0);
+        assert(0);
+        return count;
+    }
+
+    // find the largest possible tetrahedron
+    validTetrahedrum = false;
+    VHACD::Vect3 e3(double(0.0));
+
+    index0 = SupportVertex(&tree,
+                           points,
+                           normal);
+    e3 = points[index0] - m_points[0];
+    double err2 = normal.Dot(e3);
+    if (fabs(err2) > (double(1.0e-6) * m_diag * m_diag))
+    {
+        // we found a valid tetrahedral, about and start build the hull by adding the rest of the points
+        m_points[3] = points[index0];
+        points[index0].m_mark = 1;
+        validTetrahedrum = true;
+    }
+    if (!validTetrahedrum)
+    {
+        VHACD::Vect3 n(-normal);
+        int index = SupportVertex(&tree,
+                                  points,
+                                  n);
+        e3 = points[index] - m_points[0];
+        double error2 = normal.Dot(e3);
+        if (fabs(error2) > (double(1.0e-6) * m_diag * m_diag))
+        {
+            // we found a valid tetrahedral, about and start build the hull by adding the rest of the points
+            m_points[3] = points[index];
+            points[index].m_mark = 1;
+            validTetrahedrum = true;
+        }
+    }
+    if (!validTetrahedrum)
+    {
+        for (int i = 3; i < normalMap.m_count; ++i)
+        {
+            int index = SupportVertex(&tree,
+                                      points,
+                                      normalMap.m_normal[i]);
+            assert(index >= 0);
+
+            //make sure the volume of the fist tetrahedral is no negative
+            e3 = points[index] - m_points[0];
+            double error2 = normal.Dot(e3);
+            if (fabs(error2) > (double(1.0e-6) * m_diag * m_diag))
+            {
+                // we found a valid tetrahedral, about and start build the hull by adding the rest of the points
+                m_points[3] = points[index];
+                points[index].m_mark = 1;
+                validTetrahedrum = true;
+                break;
+            }
+        }
+    }
+    if (!validTetrahedrum)
+    {
+        // the points do not form a convex hull
+        m_points.resize(0);
+        return count;
+    }
+
+    m_points.resize(4);
+    double volume = TetrahedrumVolume(m_points[0],
+                                      m_points[1],
+                                      m_points[2],
+                                      m_points[3]);
+    if (volume > double(0.0))
+    {
+        std::swap(m_points[2],
+                  m_points[3]);
+    }
+    assert(TetrahedrumVolume(m_points[0], m_points[1], m_points[2], m_points[3]) < double(0.0));
+    return count;
+}
+
+std::list<ConvexHullFace>::iterator ConvexHull::AddFace(int i0,
+                                                        int i1,
+                                                        int i2)
+{
+    ConvexHullFace face;
+    face.m_index[0] = i0;
+    face.m_index[1] = i1;
+    face.m_index[2] = i2;
+
+    std::list<ConvexHullFace>::iterator node = m_list.emplace(m_list.end(), face);
+    return node;
+}
+
+void ConvexHull::CalculateConvexHull3D(ConvexHullAABBTreeNode* vertexTree,
+                                       std::vector<ConvexHullVertex>& points,
+                                       int count,
+                                       double distTol,
+                                       int maxVertexCount)
+{
+    distTol = fabs(distTol) * m_diag;
+    std::list<ConvexHullFace>::iterator f0Node = AddFace(0, 1, 2);
+    std::list<ConvexHullFace>::iterator f1Node = AddFace(0, 2, 3);
+    std::list<ConvexHullFace>::iterator f2Node = AddFace(2, 1, 3);
+    std::list<ConvexHullFace>::iterator f3Node = AddFace(1, 0, 3);
+
+    ConvexHullFace& f0 = *f0Node;
+    ConvexHullFace& f1 = *f1Node;
+    ConvexHullFace& f2 = *f2Node;
+    ConvexHullFace& f3 = *f3Node;
+
+    f0.m_twin[0] = f3Node;
+    f0.m_twin[1] = f2Node;
+    f0.m_twin[2] = f1Node;
+
+    f1.m_twin[0] = f0Node;
+    f1.m_twin[1] = f2Node;
+    f1.m_twin[2] = f3Node;
+
+    f2.m_twin[0] = f0Node;
+    f2.m_twin[1] = f3Node;
+    f2.m_twin[2] = f1Node;
+
+    f3.m_twin[0] = f0Node;
+    f3.m_twin[1] = f1Node;
+    f3.m_twin[2] = f2Node;
+
+    std::list<std::list<ConvexHullFace>::iterator> boundaryFaces;
+    boundaryFaces.push_back(f0Node);
+    boundaryFaces.push_back(f1Node);
+    boundaryFaces.push_back(f2Node);
+    boundaryFaces.push_back(f3Node);
+
+    m_points.resize(count);
+
+    count -= 4;
+    maxVertexCount -= 4;
+    int currentIndex = 4;
+
+    /*
+     * Some are iterators into boundaryFaces, others into m_list
+     */
+    std::vector<std::list<ConvexHullFace>::iterator> stack;
+    std::vector<std::list<ConvexHullFace>::iterator> coneList;
+    std::vector<std::list<ConvexHullFace>::iterator> deleteList;
+
+    stack.reserve(1024 + count);
+    coneList.reserve(1024 + count);
+    deleteList.reserve(1024 + count);
+
+    while (boundaryFaces.size() && count && (maxVertexCount > 0))
+    {
+        // my definition of the optimal convex hull of a given vertex count,
+        // is the convex hull formed by a subset of the input vertex that minimizes the volume difference
+        // between the perfect hull formed from all input vertex and the hull of the sub set of vertex.
+        // When using a priority heap this algorithms will generate the an optimal of a fix vertex count.
+        // Since all Newton's tools do not have a limit on the point count of a convex hull, I can use either a stack or a queue.
+        // a stack maximize construction speed, a Queue tend to maximize the volume of the generated Hull approaching a perfect Hull.
+        // For now we use a queue.
+        // For general hulls it does not make a difference if we use a stack, queue, or a priority heap.
+        // perfect optimal hull only apply for when build hull of a limited vertex count.
+        //
+        // Also when building Hulls of a limited vertex count, this function runs in constant time.
+        // yes that is correct, it does not makes a difference if you build a N point hull from 100 vertex
+        // or from 100000 vertex input array.
+
+        // using a queue (some what slower by better hull when reduced vertex count is desired)
+        bool isvalid;
+        std::list<ConvexHullFace>::iterator faceNode = boundaryFaces.back();
+        ConvexHullFace& face = *faceNode;
+        HullPlane planeEquation(face.GetPlaneEquation(m_points, isvalid));
+
+        int index = 0;
+        double dist = 0;
+        VHACD::Vect3 p;
+        if (isvalid)
+        {
+            index = SupportVertex(&vertexTree,
+                                  points,
+                                  planeEquation);
+            p = points[index];
+            dist = planeEquation.Evalue(p);
+        }
+
+        if (   isvalid
+            && (dist >= distTol)
+            && (face.Evalue(m_points, p) < double(0.0)))
+        {
+            stack.push_back(faceNode);
+
+            deleteList.clear();
+            while (stack.size())
+            {
+                std::list<ConvexHullFace>::iterator node1 = stack.back();
+                ConvexHullFace& face1 = *node1;
+
+                stack.pop_back();
+
+                if (!face1.m_mark && (face1.Evalue(m_points, p) < double(0.0)))
+                {
+                    #ifdef _DEBUG
+                    for (const auto node : deleteList)
+                    {
+                        assert(node != node1);
+                    }
+                    #endif
+
+                    deleteList.push_back(node1);
+                    face1.m_mark = 1;
+                    for (std::list<ConvexHullFace>::iterator& twinNode : face1.m_twin)
+                    {
+                        ConvexHullFace& twinFace = *twinNode;
+                        if (!twinFace.m_mark)
+                        {
+                            stack.push_back(twinNode);
+                        }
+                    }
+                }
+            }
+
+            m_points[currentIndex] = points[index];
+            points[index].m_mark = 1;
+
+            coneList.clear();
+            for (std::list<ConvexHullFace>::iterator node1 : deleteList)
+            {
+                ConvexHullFace& face1 = *node1;
+                assert(face1.m_mark == 1);
+                for (std::size_t j0 = 0; j0 < face1.m_twin.size(); ++j0)
+                {
+                    std::list<ConvexHullFace>::iterator twinNode = face1.m_twin[j0];
+                    ConvexHullFace& twinFace = *twinNode;
+                    if (!twinFace.m_mark)
+                    {
+                        std::size_t j1 = (j0 == 2) ? 0 : j0 + 1;
+                        std::list<ConvexHullFace>::iterator newNode = AddFace(currentIndex,
+                                                                              face1.m_index[j0],
+                                                                              face1.m_index[j1]);
+                        boundaryFaces.push_front(newNode);
+                        ConvexHullFace& newFace = *newNode;
+
+                        newFace.m_twin[1] = twinNode;
+                        for (std::size_t k = 0; k < twinFace.m_twin.size(); ++k)
+                        {
+                            if (twinFace.m_twin[k] == node1)
+                            {
+                                twinFace.m_twin[k] = newNode;
+                            }
+                        }
+                        coneList.push_back(newNode);
+                    }
+                }
+            }
+
+            for (std::size_t i = 0; i < coneList.size() - 1; ++i)
+            {
+                std::list<ConvexHullFace>::iterator nodeA = coneList[i];
+                ConvexHullFace& faceA = *nodeA;
+                assert(faceA.m_mark == 0);
+                for (std::size_t j = i + 1; j < coneList.size(); j++)
+                {
+                    std::list<ConvexHullFace>::iterator nodeB = coneList[j];
+                    ConvexHullFace& faceB = *nodeB;
+                    assert(faceB.m_mark == 0);
+                    if (faceA.m_index[2] == faceB.m_index[1])
+                    {
+                        faceA.m_twin[2] = nodeB;
+                        faceB.m_twin[0] = nodeA;
+                        break;
+                    }
+                }
+
+                for (std::size_t j = i + 1; j < coneList.size(); j++)
+                {
+                    std::list<ConvexHullFace>::iterator nodeB = coneList[j];
+                    ConvexHullFace& faceB = *nodeB;
+                    assert(faceB.m_mark == 0);
+                    if (faceA.m_index[1] == faceB.m_index[2])
+                    {
+                        faceA.m_twin[0] = nodeB;
+                        faceB.m_twin[2] = nodeA;
+                        break;
+                    }
+                }
+            }
+
+            for (std::list<ConvexHullFace>::iterator node : deleteList)
+            {
+                auto it = std::find(boundaryFaces.begin(),
+                                    boundaryFaces.end(),
+                                    node);
+                if (it != boundaryFaces.end())
+                {
+                    boundaryFaces.erase(it);
+                }
+                m_list.erase(node);
+            }
+
+            maxVertexCount--;
+            currentIndex++;
+            count--;
+        }
+        else
+        {
+            auto it = std::find(boundaryFaces.begin(),
+                                boundaryFaces.end(),
+                                faceNode);
+            if (it != boundaryFaces.end())
+            {
+                boundaryFaces.erase(it);
+            }
+        }
+    }
+    m_points.resize(currentIndex);
+}
+
+//***********************************************************************************************
+// End of ConvexHull generation code by Julio Jerez <[email protected]>
+//***********************************************************************************************
+
+class KdTreeNode;
+
+enum Axes
+{
+    X_AXIS = 0,
+    Y_AXIS = 1,
+    Z_AXIS = 2
+};
+
+class KdTreeFindNode
+{
+public:
+    KdTreeFindNode() = default;
+
+    KdTreeNode* m_node{ nullptr };
+    double m_distance{ 0.0 };
+};
+
+class KdTree
+{
+public:
+    KdTree() = default;
+
+    const VHACD::Vertex& GetPosition(uint32_t index) const;
+
+    uint32_t Search(const VHACD::Vect3& pos,
+                    double radius,
+                    uint32_t maxObjects,
+                    KdTreeFindNode* found) const;
+
+    uint32_t Add(const VHACD::Vertex& v);
+
+    KdTreeNode& GetNewNode(uint32_t index);
+
+    uint32_t GetNearest(const VHACD::Vect3& pos,
+                        double radius,
+                        bool& _found) const; // returns the nearest possible neighbor's index.
+
+    const std::vector<VHACD::Vertex>& GetVertices() const;
+    std::vector<VHACD::Vertex>&& TakeVertices();
+
+    uint32_t GetVCount() const;
+
+private:
+    KdTreeNode* m_root{ nullptr };
+    NodeBundle<KdTreeNode> m_bundle;
+
+    std::vector<VHACD::Vertex> m_vertices;
+};
+
+class KdTreeNode
+{
+public:
+    KdTreeNode() = default;
+    KdTreeNode(uint32_t index);
+
+    void Add(KdTreeNode& node,
+             Axes dim,
+             const KdTree& iface);
+
+    uint32_t GetIndex() const;
+
+    void Search(Axes axis,
+                const VHACD::Vect3& pos,
+                double radius,
+                uint32_t& count,
+                uint32_t maxObjects,
+                KdTreeFindNode* found,
+                const KdTree& iface);
+
+private:
+    uint32_t m_index = 0;
+    KdTreeNode* m_left = nullptr;
+    KdTreeNode* m_right = nullptr;
+};
+
+const VHACD::Vertex& KdTree::GetPosition(uint32_t index) const
+{
+    assert(index < m_vertices.size());
+    return m_vertices[index];
+}
+
+uint32_t KdTree::Search(const VHACD::Vect3& pos,
+                        double radius,
+                        uint32_t maxObjects,
+                        KdTreeFindNode* found) const
+{
+    if (!m_root)
+        return 0;
+    uint32_t count = 0;
+    m_root->Search(X_AXIS, pos, radius, count, maxObjects, found, *this);
+    return count;
+}
+
+uint32_t KdTree::Add(const VHACD::Vertex& v)
+{
+    uint32_t ret = uint32_t(m_vertices.size());
+    m_vertices.emplace_back(v);
+    KdTreeNode& node = GetNewNode(ret);
+    if (m_root)
+    {
+        m_root->Add(node,
+                    X_AXIS,
+                    *this);
+    }
+    else
+    {
+        m_root = &node;
+    }
+    return ret;
+}
+
+KdTreeNode& KdTree::GetNewNode(uint32_t index)
+{
+    KdTreeNode& node = m_bundle.GetNextNode();
+    node = KdTreeNode(index);
+    return node;
+}
+
+uint32_t KdTree::GetNearest(const VHACD::Vect3& pos,
+                            double radius,
+                            bool& _found) const // returns the nearest possible neighbor's index.
+{
+    uint32_t ret = 0;
+
+    _found = false;
+    KdTreeFindNode found;
+    uint32_t count = Search(pos, radius, 1, &found);
+    if (count)
+    {
+        KdTreeNode* node = found.m_node;
+        ret = node->GetIndex();
+        _found = true;
+    }
+    return ret;
+}
+
+const std::vector<VHACD::Vertex>& KdTree::GetVertices() const
+{
+    return m_vertices;
+}
+
+std::vector<VHACD::Vertex>&& KdTree::TakeVertices()
+{
+    return std::move(m_vertices);
+}
+
+uint32_t KdTree::GetVCount() const
+{
+    return uint32_t(m_vertices.size());
+}
+
+KdTreeNode::KdTreeNode(uint32_t index)
+    : m_index(index)
+{
+}
+
+void KdTreeNode::Add(KdTreeNode& node,
+                     Axes dim,
+                     const KdTree& tree)
+{
+    Axes axis = X_AXIS;
+    uint32_t idx = 0;
+    switch (dim)
+    {
+    case X_AXIS:
+        idx = 0;
+        axis = Y_AXIS;
+        break;
+    case Y_AXIS:
+        idx = 1;
+        axis = Z_AXIS;
+        break;
+    case Z_AXIS:
+        idx = 2;
+        axis = X_AXIS;
+        break;
+    }
+
+    const VHACD::Vertex& nodePosition = tree.GetPosition(node.m_index);
+    const VHACD::Vertex& position = tree.GetPosition(m_index);
+    if (nodePosition[idx] <= position[idx])
+    {
+        if (m_left)
+            m_left->Add(node, axis, tree);
+        else
+            m_left = &node;
+    }
+    else
+    {
+        if (m_right)
+            m_right->Add(node, axis, tree);
+        else
+            m_right = &node;
+    }
+}
+
+uint32_t KdTreeNode::GetIndex() const
+{
+    return m_index;
+}
+
+void KdTreeNode::Search(Axes axis,
+                        const VHACD::Vect3& pos,
+                        double radius,
+                        uint32_t& count,
+                        uint32_t maxObjects,
+                        KdTreeFindNode* found,
+                        const KdTree& iface)
+{
+    const VHACD::Vect3 position = iface.GetPosition(m_index);
+
+    const VHACD::Vect3 d = pos - position;
+
+    KdTreeNode* search1 = 0;
+    KdTreeNode* search2 = 0;
+
+    uint32_t idx = 0;
+    switch (axis)
+    {
+    case X_AXIS:
+        idx = 0;
+        axis = Y_AXIS;
+        break;
+    case Y_AXIS:
+        idx = 1;
+        axis = Z_AXIS;
+        break;
+    case Z_AXIS:
+        idx = 2;
+        axis = X_AXIS;
+        break;
+    }
+
+    if (d[idx] <= 0) // JWR  if we are to the left
+    {
+        search1 = m_left; // JWR  then search to the left
+        if (-d[idx] < radius) // JWR  if distance to the right is less than our search radius, continue on the right
+                            // as well.
+            search2 = m_right;
+    }
+    else
+    {
+        search1 = m_right; // JWR  ok, we go down the left tree
+        if (d[idx] < radius) // JWR  if the distance from the right is less than our search radius
+            search2 = m_left;
+    }
+
+    double r2 = radius * radius;
+    double m = d.GetNormSquared();
+
+    if (m < r2)
+    {
+        switch (count)
+        {
+        case 0:
+        {
+            found[count].m_node = this;
+            found[count].m_distance = m;
+            break;
+        }
+        case 1:
+        {
+            if (m < found[0].m_distance)
+            {
+                if (maxObjects == 1)
+                {
+                    found[0].m_node = this;
+                    found[0].m_distance = m;
+                }
+                else
+                {
+                    found[1] = found[0];
+                    found[0].m_node = this;
+                    found[0].m_distance = m;
+                }
+            }
+            else if (maxObjects > 1)
+            {
+                found[1].m_node = this;
+                found[1].m_distance = m;
+            }
+            break;
+        }
+        default:
+        {
+            bool inserted = false;
+
+            for (uint32_t i = 0; i < count; i++)
+            {
+                if (m < found[i].m_distance) // if this one is closer than a pre-existing one...
+                {
+                    // insertion sort...
+                    uint32_t scan = count;
+                    if (scan >= maxObjects)
+                        scan = maxObjects - 1;
+                    for (uint32_t j = scan; j > i; j--)
+                    {
+                        found[j] = found[j - 1];
+                    }
+                    found[i].m_node = this;
+                    found[i].m_distance = m;
+                    inserted = true;
+                    break;
+                }
+            }
+
+            if (!inserted && count < maxObjects)
+            {
+                found[count].m_node = this;
+                found[count].m_distance = m;
+            }
+        }
+        break;
+        }
+
+        count++;
+
+        if (count > maxObjects)
+        {
+            count = maxObjects;
+        }
+    }
+
+
+    if (search1)
+        search1->Search(axis, pos, radius, count, maxObjects, found, iface);
+
+    if (search2)
+        search2->Search(axis, pos, radius, count, maxObjects, found, iface);
+}
+
+class VertexIndex
+{
+public:
+    VertexIndex(double granularity,
+                bool snapToGrid);
+
+    VHACD::Vect3 SnapToGrid(VHACD::Vect3 p);
+
+    uint32_t GetIndex(VHACD::Vect3 p,
+                      bool& newPos);
+
+    const std::vector<VHACD::Vertex>& GetVertices() const;
+
+    std::vector<VHACD::Vertex>&& TakeVertices();
+
+    uint32_t GetVCount() const;
+
+    bool SaveAsObj(const char* fname,
+                   uint32_t tcount,
+                   uint32_t* indices)
+    {
+        bool ret = false;
+
+        FILE* fph = fopen(fname, "wb");
+        if (fph)
+        {
+            ret = true;
+
+            const std::vector<VHACD::Vertex>& v = GetVertices();
+            for (uint32_t i = 0; i < v.size(); ++i)
+            {
+                fprintf(fph, "v %0.9f %0.9f %0.9f\r\n",
+                        v[i].mX,
+                        v[i].mY,
+                        v[i].mZ);
+            }
+
+            for (uint32_t i = 0; i < tcount; i++)
+            {
+                uint32_t i1 = *indices++;
+                uint32_t i2 = *indices++;
+                uint32_t i3 = *indices++;
+                fprintf(fph, "f %d %d %d\r\n",
+                        i1 + 1,
+                        i2 + 1,
+                        i3 + 1);
+            }
+            fclose(fph);
+        }
+
+        return ret;
+    }
+
+private:
+    bool m_snapToGrid : 1;
+    double m_granularity;
+    KdTree m_KdTree;
+};
+
+VertexIndex::VertexIndex(double granularity,
+                         bool snapToGrid)
+    : m_snapToGrid(snapToGrid)
+    , m_granularity(granularity)
+{
+}
+
+VHACD::Vect3 VertexIndex::SnapToGrid(VHACD::Vect3 p)
+{
+    for (int i = 0; i < 3; ++i)
+    {
+        double m = fmod(p[i], m_granularity);
+        p[i] -= m;
+    }
+    return p;
+}
+
+uint32_t VertexIndex::GetIndex(VHACD::Vect3 p,
+                               bool& newPos)
+{
+    uint32_t ret;
+
+    newPos = false;
+
+    if (m_snapToGrid)
+    {
+        p = SnapToGrid(p);
+    }
+
+    bool found;
+    ret = m_KdTree.GetNearest(p, m_granularity, found);
+    if (!found)
+    {
+        newPos = true;
+        ret = m_KdTree.Add(VHACD::Vertex(p.GetX(), p.GetY(), p.GetZ()));
+    }
+
+    return ret;
+}
+
+const std::vector<VHACD::Vertex>& VertexIndex::GetVertices() const
+{
+    return m_KdTree.GetVertices();
+}
+
+std::vector<VHACD::Vertex>&& VertexIndex::TakeVertices()
+{
+    return std::move(m_KdTree.TakeVertices());
+}
+
+uint32_t VertexIndex::GetVCount() const
+{
+    return m_KdTree.GetVCount();
+}
+
+/*
+ * A wrapper class for 3 10 bit integers packed into a 32 bit integer
+ * Layout is [PAD][X][Y][Z]
+ * Pad is bits 31-30, X is 29-20, Y is 19-10, and Z is 9-0
+ */
+class Voxel
+{
+    /*
+     * Specify all of them for consistency
+     */
+    static constexpr int VoxelBitsZStart =  0;
+    static constexpr int VoxelBitsYStart = 10;
+    static constexpr int VoxelBitsXStart = 20;
+    static constexpr int VoxelBitMask = 0x03FF; // bits 0 through 9 inclusive
+public:
+    Voxel() = default;
+
+    Voxel(uint32_t index);
+
+    Voxel(uint32_t x,
+          uint32_t y,
+          uint32_t z);
+
+    bool operator==(const Voxel &v) const;
+
+    VHACD::Vector3<uint32_t> GetVoxel() const;
+
+    uint32_t GetX() const;
+    uint32_t GetY() const;
+    uint32_t GetZ() const;
+
+    uint32_t GetVoxelAddress() const;
+
+private:
+    uint32_t m_voxel{ 0 };
+};
+
+Voxel::Voxel(uint32_t index)
+    : m_voxel(index)
+{
+}
+
+Voxel::Voxel(uint32_t x,
+             uint32_t y,
+             uint32_t z)
+    : m_voxel((x << VoxelBitsXStart) | (y << VoxelBitsYStart) | (z << VoxelBitsZStart))
+{
+    assert(x < 1024 && "Voxel constructed with X outside of range");
+    assert(y < 1024 && "Voxel constructed with Y outside of range");
+    assert(z < 1024 && "Voxel constructed with Z outside of range");
+}
+
+bool Voxel::operator==(const Voxel& v) const
+{
+    return m_voxel == v.m_voxel;
+}
+
+VHACD::Vector3<uint32_t> Voxel::GetVoxel() const
+{
+    return VHACD::Vector3<uint32_t>(GetX(), GetY(), GetZ());
+}
+
+uint32_t Voxel::GetX() const
+{
+    return (m_voxel >> VoxelBitsXStart) & VoxelBitMask;
+}
+
+uint32_t Voxel::GetY() const
+{
+    return (m_voxel >> VoxelBitsYStart) & VoxelBitMask;
+}
+
+uint32_t Voxel::GetZ() const
+{
+    return (m_voxel >> VoxelBitsZStart) & VoxelBitMask;
+}
+
+uint32_t Voxel::GetVoxelAddress() const
+{
+    return m_voxel;
+}
+
+struct SimpleMesh
+{
+    std::vector<VHACD::Vertex> m_vertices;
+    std::vector<VHACD::Triangle> m_indices;
+};
+
+/*======================== 0-tests ========================*/
+inline bool IntersectRayAABB(const VHACD::Vect3& start,
+                             const VHACD::Vect3& dir,
+                             const VHACD::BoundsAABB& bounds,
+                             double& t)
+{
+    //! calculate candidate plane on each axis
+    bool inside = true;
+    VHACD::Vect3 ta(double(-1.0));
+
+    //! use unrolled loops
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+        if (start[i] < bounds.GetMin()[i])
+        {
+            if (dir[i] != double(0.0))
+                ta[i] = (bounds.GetMin()[i] - start[i]) / dir[i];
+            inside = false;
+        }
+        else if (start[i] > bounds.GetMax()[i])
+        {
+            if (dir[i] != double(0.0))
+                ta[i] = (bounds.GetMax()[i] - start[i]) / dir[i];
+            inside = false;
+        }
+    }
+
+    //! if point inside all planes
+    if (inside)
+    {
+        t = double(0.0);
+        return true;
+    }
+
+    //! we now have t values for each of possible intersection planes
+    //! find the maximum to get the intersection point
+    uint32_t taxis;
+    double tmax = ta.MaxCoeff(taxis);
+
+    if (tmax < double(0.0))
+        return false;
+
+    //! check that the intersection point lies on the plane we picked
+    //! we don't test the axis of closest intersection for precision reasons
+
+    //! no eps for now
+    double eps = double(0.0);
+
+    VHACD::Vect3 hit = start + dir * tmax;
+
+    if ((   hit.GetX() < bounds.GetMin().GetX() - eps
+         || hit.GetX() > bounds.GetMax().GetX() + eps)
+        && taxis != 0)
+        return false;
+    if ((   hit.GetY() < bounds.GetMin().GetY() - eps
+         || hit.GetY() > bounds.GetMax().GetY() + eps)
+        && taxis != 1)
+        return false;
+    if ((   hit.GetZ() < bounds.GetMin().GetZ() - eps
+         || hit.GetZ() > bounds.GetMax().GetZ() + eps)
+        && taxis != 2)
+        return false;
+
+    //! output results
+    t = tmax;
+
+    return true;
+}
+
+// Moller and Trumbore's method
+inline bool IntersectRayTriTwoSided(const VHACD::Vect3& p,
+                                    const VHACD::Vect3& dir,
+                                    const VHACD::Vect3& a,
+                                    const VHACD::Vect3& b,
+                                    const VHACD::Vect3& c,
+                                    double& t,
+                                    double& u,
+                                    double& v,
+                                    double& w,
+                                    double& sign,
+                                    VHACD::Vect3* normal)
+{
+    VHACD::Vect3 ab = b - a;
+    VHACD::Vect3 ac = c - a;
+    VHACD::Vect3 n = ab.Cross(ac);
+
+    double d = -dir.Dot(n);
+    double ood = double(1.0) / d; // No need to check for division by zero here as infinity arithmetic will save us...
+    VHACD::Vect3 ap = p - a;
+
+    t = ap.Dot(n) * ood;
+    if (t < double(0.0))
+    {
+        return false;
+    }
+
+    VHACD::Vect3 e = -dir.Cross(ap);
+    v = ac.Dot(e) * ood;
+    if (v < double(0.0) || v > double(1.0)) // ...here...
+    {
+        return false;
+    }
+    w = -ab.Dot(e) * ood;
+    if (w < double(0.0) || v + w > double(1.0)) // ...and here
+    {
+        return false;
+    }
+
+    u = double(1.0) - v - w;
+    if (normal)
+    {
+        *normal = n;
+    }
+
+    sign = d;
+
+    return true;
+}
+
+// RTCD 5.1.5, page 142
+inline VHACD::Vect3 ClosestPointOnTriangle(const VHACD::Vect3& a,
+                                           const VHACD::Vect3& b,
+                                           const VHACD::Vect3& c,
+                                           const VHACD::Vect3& p,
+                                           double& v,
+                                           double& w)
+{
+    VHACD::Vect3 ab = b - a;
+    VHACD::Vect3 ac = c - a;
+    VHACD::Vect3 ap = p - a;
+
+    double d1 = ab.Dot(ap);
+    double d2 = ac.Dot(ap);
+    if (   d1 <= double(0.0)
+        && d2 <= double(0.0))
+    {
+        v = double(0.0);
+        w = double(0.0);
+        return a;
+    }
+
+    VHACD::Vect3 bp = p - b;
+    double d3 = ab.Dot(bp);
+    double d4 = ac.Dot(bp);
+    if (   d3 >= double(0.0)
+        && d4 <= d3)
+    {
+        v = double(1.0);
+        w = double(0.0);
+        return b;
+    }
+
+    double vc = d1 * d4 - d3 * d2;
+    if (   vc <= double(0.0)
+        && d1 >= double(0.0)
+        && d3 <= double(0.0))
+    {
+        v = d1 / (d1 - d3);
+        w = double(0.0);
+        return a + v * ab;
+    }
+
+    VHACD::Vect3 cp = p - c;
+    double d5 = ab.Dot(cp);
+    double d6 = ac.Dot(cp);
+    if (d6 >= double(0.0) && d5 <= d6)
+    {
+        v = double(0.0);
+        w = double(1.0);
+        return c;
+    }
+
+    double vb = d5 * d2 - d1 * d6;
+    if (   vb <= double(0.0)
+        && d2 >= double(0.0)
+        && d6 <= double(0.0))
+    {
+        v = double(0.0);
+        w = d2 / (d2 - d6);
+        return a + w * ac;
+    }
+
+    double va = d3 * d6 - d5 * d4;
+    if (   va <= double(0.0)
+        && (d4 - d3) >= double(0.0)
+        && (d5 - d6) >= double(0.0))
+    {
+        w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
+        v = double(1.0) - w;
+        return b + w * (c - b);
+    }
+
+    double denom = double(1.0) / (va + vb + vc);
+    v = vb * denom;
+    w = vc * denom;
+    return a + ab * v + ac * w;
+}
+
+class AABBTree
+{
+public:
+    AABBTree() = default;
+    AABBTree(AABBTree&&) = default;
+    AABBTree& operator=(AABBTree&&) = default;
+
+    AABBTree(const std::vector<VHACD::Vertex>& vertices,
+             const std::vector<VHACD::Triangle>& indices);
+
+    bool TraceRay(const VHACD::Vect3& start,
+                  const VHACD::Vect3& to,
+                  double& outT,
+                  double& faceSign,
+                  VHACD::Vect3& hitLocation) const;
+
+    bool TraceRay(const VHACD::Vect3& start,
+                  const VHACD::Vect3& dir,
+                  uint32_t& insideCount,
+                  uint32_t& outsideCount) const;
+
+    bool TraceRay(const VHACD::Vect3& start,
+                  const VHACD::Vect3& dir,
+                  double& outT,
+                  double& u,
+                  double& v,
+                  double& w,
+                  double& faceSign,
+                  uint32_t& faceIndex) const;
+
+    VHACD::Vect3 GetCenter() const;
+    VHACD::Vect3 GetMinExtents() const;
+    VHACD::Vect3 GetMaxExtents() const;
+
+    bool GetClosestPointWithinDistance(const VHACD::Vect3& point,
+                                       double maxDistance,
+                                       VHACD::Vect3& closestPoint) const;
+
+private:
+    struct Node
+    {
+        union
+        {
+            uint32_t m_children;
+            uint32_t m_numFaces{ 0 };
+        };
+
+        uint32_t* m_faces{ nullptr };
+        VHACD::BoundsAABB m_extents;
+    };
+
+    struct FaceSorter
+    {
+        FaceSorter(const std::vector<VHACD::Vertex>& positions,
+                   const std::vector<VHACD::Triangle>& indices,
+                   uint32_t axis);
+
+        bool operator()(uint32_t lhs, uint32_t rhs) const;
+
+        double GetCentroid(uint32_t face) const;
+
+        const std::vector<VHACD::Vertex>& m_vertices;
+        const std::vector<VHACD::Triangle>& m_indices;
+        uint32_t m_axis;
+    };
+
+    // partition the objects and return the number of objects in the lower partition
+    uint32_t PartitionMedian(Node& n,
+                             uint32_t* faces,
+                             uint32_t numFaces);
+    uint32_t PartitionSAH(Node& n,
+                          uint32_t* faces,
+                          uint32_t numFaces);
+
+    void Build();
+
+    void BuildRecursive(uint32_t nodeIndex,
+                        uint32_t* faces,
+                        uint32_t numFaces);
+
+    void TraceRecursive(uint32_t nodeIndex,
+                        const VHACD::Vect3& start,
+                        const VHACD::Vect3& dir,
+                        double& outT,
+                        double& u,
+                        double& v,
+                        double& w,
+                        double& faceSign,
+                        uint32_t& faceIndex) const;
+
+
+    bool GetClosestPointWithinDistance(const VHACD::Vect3& point,
+                                       const double maxDis,
+                                       double& dis,
+                                       double& v,
+                                       double& w,
+                                       uint32_t& faceIndex,
+                                       VHACD::Vect3& closest) const;
+
+    void GetClosestPointWithinDistanceSqRecursive(uint32_t nodeIndex,
+                                                  const VHACD::Vect3& point,
+                                                  double& outDisSq,
+                                                  double& outV,
+                                                  double& outW,
+                                                  uint32_t& outFaceIndex,
+                                                  VHACD::Vect3& closest) const;
+
+    VHACD::BoundsAABB CalculateFaceBounds(uint32_t* faces,
+                                          uint32_t numFaces);
+
+    // track the next free node
+    uint32_t m_freeNode;
+
+    const std::vector<VHACD::Vertex>* m_vertices{ nullptr };
+    const std::vector<VHACD::Triangle>* m_indices{ nullptr };
+
+    std::vector<uint32_t> m_faces;
+    std::vector<Node> m_nodes;
+    std::vector<VHACD::BoundsAABB> m_faceBounds;
+
+    // stats
+    uint32_t m_treeDepth{ 0 };
+    uint32_t m_innerNodes{ 0 };
+    uint32_t m_leafNodes{ 0 };
+
+    uint32_t s_depth{ 0 };
+};
+
+AABBTree::FaceSorter::FaceSorter(const std::vector<VHACD::Vertex>& positions,
+                                 const std::vector<VHACD::Triangle>& indices,
+                                 uint32_t axis)
+    : m_vertices(positions)
+    , m_indices(indices)
+    , m_axis(axis)
+{
+}
+
+inline bool AABBTree::FaceSorter::operator()(uint32_t lhs,
+                                             uint32_t rhs) const
+{
+    double a = GetCentroid(lhs);
+    double b = GetCentroid(rhs);
+
+    if (a == b)
+    {
+        return lhs < rhs;
+    }
+    else
+    {
+        return a < b;
+    }
+}
+
+inline double AABBTree::FaceSorter::GetCentroid(uint32_t face) const
+{
+    const VHACD::Vect3& a = m_vertices[m_indices[face].mI0];
+    const VHACD::Vect3& b = m_vertices[m_indices[face].mI1];
+    const VHACD::Vect3& c = m_vertices[m_indices[face].mI2];
+
+    return (a[m_axis] + b[m_axis] + c[m_axis]) / double(3.0);
+}
+
+AABBTree::AABBTree(const std::vector<VHACD::Vertex>& vertices,
+                   const std::vector<VHACD::Triangle>& indices)
+    : m_vertices(&vertices)
+    , m_indices(&indices)
+{
+    Build();
+}
+
+bool AABBTree::TraceRay(const VHACD::Vect3& start,
+                        const VHACD::Vect3& to,
+                        double& outT,
+                        double& faceSign,
+                        VHACD::Vect3& hitLocation) const
+{
+    VHACD::Vect3 dir = to - start;
+    double distance = dir.Normalize();
+    double u, v, w;
+    uint32_t faceIndex;
+    bool hit = TraceRay(start,
+                        dir,
+                        outT,
+                        u,
+                        v,
+                        w,
+                        faceSign,
+                        faceIndex);
+    if (hit)
+    {
+        hitLocation = start + dir * outT;
+    }
+
+    if (hit && outT > distance)
+    {
+        hit = false;
+    }
+    return hit;
+}
+
+bool AABBTree::TraceRay(const VHACD::Vect3& start,
+                        const VHACD::Vect3& dir,
+                        uint32_t& insideCount,
+                        uint32_t& outsideCount) const
+{
+    double outT, u, v, w, faceSign;
+    uint32_t faceIndex;
+    bool hit = TraceRay(start,
+                        dir,
+                        outT,
+                        u,
+                        v,
+                        w,
+                        faceSign,
+                        faceIndex);
+    if (hit)
+    {
+        if (faceSign >= 0)
+        {
+            insideCount++;
+        }
+        else
+        {
+            outsideCount++;
+        }
+    }
+    return hit;
+}
+
+bool AABBTree::TraceRay(const VHACD::Vect3& start,
+                        const VHACD::Vect3& dir,
+                        double& outT,
+                        double& u,
+                        double& v,
+                        double& w,
+                        double& faceSign,
+                        uint32_t& faceIndex) const
+{
+    outT = FLT_MAX;
+    TraceRecursive(0,
+                   start,
+                   dir,
+                   outT,
+                   u,
+                   v,
+                   w,
+                   faceSign,
+                   faceIndex);
+    return (outT != FLT_MAX);
+}
+
+VHACD::Vect3 AABBTree::GetCenter() const
+{
+    return m_nodes[0].m_extents.GetCenter();
+}
+
+VHACD::Vect3 AABBTree::GetMinExtents() const
+{
+    return m_nodes[0].m_extents.GetMin();
+}
+
+VHACD::Vect3 AABBTree::GetMaxExtents() const
+{
+    return m_nodes[0].m_extents.GetMax();
+}
+
+bool AABBTree::GetClosestPointWithinDistance(const VHACD::Vect3& point,
+                                             double maxDistance,
+                                             VHACD::Vect3& closestPoint) const
+{
+    double dis, v, w;
+    uint32_t faceIndex;
+    bool hit = GetClosestPointWithinDistance(point,
+                                             maxDistance,
+                                             dis,
+                                             v,
+                                             w,
+                                             faceIndex,
+                                             closestPoint);
+    return hit;
+}
+
+// partition faces around the median face
+uint32_t AABBTree::PartitionMedian(Node& n,
+                                   uint32_t* faces,
+                                   uint32_t numFaces)
+{
+    FaceSorter predicate(*m_vertices,
+                         *m_indices,
+                         n.m_extents.GetSize().LongestAxis());
+    std::nth_element(faces,
+                     faces + numFaces / 2,
+                     faces + numFaces,
+                     predicate);
+
+    return numFaces / 2;
+}
+
+// partition faces based on the surface area heuristic
+uint32_t AABBTree::PartitionSAH(Node&,
+                                uint32_t* faces,
+                                uint32_t numFaces)
+{
+    uint32_t bestAxis = 0;
+    uint32_t bestIndex = 0;
+    double bestCost = FLT_MAX;
+
+    for (uint32_t a = 0; a < 3; ++a)
+    {
+        // sort faces by centroids
+        FaceSorter predicate(*m_vertices,
+                             *m_indices,
+                             a);
+        std::sort(faces,
+                  faces + numFaces,
+                  predicate);
+
+        // two passes over data to calculate upper and lower bounds
+        std::vector<double> cumulativeLower(numFaces);
+        std::vector<double> cumulativeUpper(numFaces);
+
+        VHACD::BoundsAABB lower;
+        VHACD::BoundsAABB upper;
+
+        for (uint32_t i = 0; i < numFaces; ++i)
+        {
+            lower.Union(m_faceBounds[faces[i]]);
+            upper.Union(m_faceBounds[faces[numFaces - i - 1]]);
+
+            cumulativeLower[i] = lower.SurfaceArea();
+            cumulativeUpper[numFaces - i - 1] = upper.SurfaceArea();
+        }
+
+        double invTotalSA = double(1.0) / cumulativeUpper[0];
+
+        // test all split positions
+        for (uint32_t i = 0; i < numFaces - 1; ++i)
+        {
+            double pBelow = cumulativeLower[i] * invTotalSA;
+            double pAbove = cumulativeUpper[i] * invTotalSA;
+
+            double cost = double(0.125) + (pBelow * i + pAbove * (numFaces - i));
+            if (cost <= bestCost)
+            {
+                bestCost = cost;
+                bestIndex = i;
+                bestAxis = a;
+            }
+        }
+    }
+
+    // re-sort by best axis
+    FaceSorter predicate(*m_vertices,
+                         *m_indices,
+                         bestAxis);
+    std::sort(faces,
+              faces + numFaces,
+              predicate);
+
+    return bestIndex + 1;
+}
+
+void AABBTree::Build()
+{
+    const uint32_t numFaces = uint32_t(m_indices->size());
+
+    // build initial list of faces
+    m_faces.reserve(numFaces);
+
+    // calculate bounds of each face and store
+    m_faceBounds.reserve(numFaces);
+
+    std::vector<VHACD::BoundsAABB> stack;
+    for (uint32_t i = 0; i < numFaces; ++i)
+    {
+        VHACD::BoundsAABB top = CalculateFaceBounds(&i,
+                                                    1);
+
+        m_faces.push_back(i);
+        m_faceBounds.push_back(top);
+    }
+
+    m_nodes.reserve(uint32_t(numFaces * double(1.5)));
+
+    // allocate space for all the nodes
+    m_freeNode = 1;
+
+    // start building
+    BuildRecursive(0,
+                   m_faces.data(),
+                   numFaces);
+
+    assert(s_depth == 0);
+}
+
+void AABBTree::BuildRecursive(uint32_t nodeIndex,
+                              uint32_t* faces,
+                              uint32_t numFaces)
+{
+    const uint32_t kMaxFacesPerLeaf = 6;
+
+    // if we've run out of nodes allocate some more
+    if (nodeIndex >= m_nodes.size())
+    {
+        uint32_t s = std::max(uint32_t(double(1.5) * m_nodes.size()), 512U);
+        m_nodes.resize(s);
+    }
+
+    // a reference to the current node, need to be careful here as this reference may become invalid if array is resized
+    Node& n = m_nodes[nodeIndex];
+
+    // track max tree depth
+    ++s_depth;
+    m_treeDepth = std::max(m_treeDepth, s_depth);
+
+    n.m_extents = CalculateFaceBounds(faces,
+                                      numFaces);
+
+    // calculate bounds of faces and add node
+    if (numFaces <= kMaxFacesPerLeaf)
+    {
+        n.m_faces = faces;
+        n.m_numFaces = numFaces;
+
+        ++m_leafNodes;
+    }
+    else
+    {
+        ++m_innerNodes;
+
+        // face counts for each branch
+        const uint32_t leftCount = PartitionMedian(n, faces, numFaces);
+        // const uint32_t leftCount = PartitionSAH(n, faces, numFaces);
+        const uint32_t rightCount = numFaces - leftCount;
+
+        // alloc 2 nodes
+        m_nodes[nodeIndex].m_children = m_freeNode;
+
+        // allocate two nodes
+        m_freeNode += 2;
+
+        // split faces in half and build each side recursively
+        BuildRecursive(m_nodes[nodeIndex].m_children + 0, faces, leftCount);
+        BuildRecursive(m_nodes[nodeIndex].m_children + 1, faces + leftCount, rightCount);
+    }
+
+    --s_depth;
+}
+
+void AABBTree::TraceRecursive(uint32_t nodeIndex,
+                              const VHACD::Vect3& start,
+                              const VHACD::Vect3& dir,
+                              double& outT,
+                              double& outU,
+                              double& outV,
+                              double& outW,
+                              double& faceSign,
+                              uint32_t& faceIndex) const
+{
+    const Node& node = m_nodes[nodeIndex];
+
+    if (node.m_faces == NULL)
+    {
+        // find closest node
+        const Node& leftChild = m_nodes[node.m_children + 0];
+        const Node& rightChild = m_nodes[node.m_children + 1];
+
+        double dist[2] = { FLT_MAX, FLT_MAX };
+
+        IntersectRayAABB(start,
+                         dir,
+                         leftChild.m_extents,
+                         dist[0]);
+        IntersectRayAABB(start,
+                         dir,
+                         rightChild.m_extents,
+                         dist[1]);
+
+        uint32_t closest = 0;
+        uint32_t furthest = 1;
+
+        if (dist[1] < dist[0])
+        {
+            closest = 1;
+            furthest = 0;
+        }
+
+        if (dist[closest] < outT)
+        {
+            TraceRecursive(node.m_children + closest,
+                           start,
+                           dir,
+                           outT,
+                           outU,
+                           outV,
+                           outW,
+                           faceSign,
+                           faceIndex);
+        }
+
+        if (dist[furthest] < outT)
+        {
+            TraceRecursive(node.m_children + furthest,
+                           start,
+                           dir,
+                           outT,
+                           outU,
+                           outV,
+                           outW,
+                           faceSign,
+                           faceIndex);
+        }
+    }
+    else
+    {
+        double t, u, v, w, s;
+
+        for (uint32_t i = 0; i < node.m_numFaces; ++i)
+        {
+            uint32_t indexStart = node.m_faces[i];
+
+            const VHACD::Vect3& a = (*m_vertices)[(*m_indices)[indexStart].mI0];
+            const VHACD::Vect3& b = (*m_vertices)[(*m_indices)[indexStart].mI1];
+            const VHACD::Vect3& c = (*m_vertices)[(*m_indices)[indexStart].mI2];
+            if (IntersectRayTriTwoSided(start, dir, a, b, c, t, u, v, w, s, NULL))
+            {
+                if (t < outT)
+                {
+                    outT = t;
+                    outU = u;
+                    outV = v;
+                    outW = w;
+                    faceSign = s;
+                    faceIndex = node.m_faces[i];
+                }
+            }
+        }
+    }
+}
+
+bool AABBTree::GetClosestPointWithinDistance(const VHACD::Vect3& point,
+                                             const double maxDis,
+                                             double& dis,
+                                             double& v,
+                                             double& w,
+                                             uint32_t& faceIndex,
+                                             VHACD::Vect3& closest) const
+{
+    dis = maxDis;
+    faceIndex = uint32_t(~0);
+    double disSq = dis * dis;
+
+    GetClosestPointWithinDistanceSqRecursive(0,
+                                             point,
+                                             disSq,
+                                             v,
+                                             w,
+                                             faceIndex,
+                                             closest);
+    dis = sqrt(disSq);
+
+    return (faceIndex < (~(static_cast<unsigned int>(0))));
+}
+
+void AABBTree::GetClosestPointWithinDistanceSqRecursive(uint32_t nodeIndex,
+                                                        const VHACD::Vect3& point,
+                                                        double& outDisSq,
+                                                        double& outV,
+                                                        double& outW,
+                                                        uint32_t& outFaceIndex,
+                                                        VHACD::Vect3& closestPoint) const
+{
+    const Node& node = m_nodes[nodeIndex];
+
+    if (node.m_faces == nullptr)
+    {
+        // find closest node
+        const Node& leftChild = m_nodes[node.m_children + 0];
+        const Node& rightChild = m_nodes[node.m_children + 1];
+
+        // double dist[2] = { FLT_MAX, FLT_MAX };
+        VHACD::Vect3 lp = leftChild.m_extents.ClosestPoint(point);
+        VHACD::Vect3 rp = rightChild.m_extents.ClosestPoint(point);
+
+
+        uint32_t closest = 0;
+        uint32_t furthest = 1;
+        double dcSq = (point - lp).GetNormSquared();
+        double dfSq = (point - rp).GetNormSquared();
+        if (dfSq < dcSq)
+        {
+            closest = 1;
+            furthest = 0;
+            std::swap(dfSq, dcSq);
+        }
+
+        if (dcSq < outDisSq)
+        {
+            GetClosestPointWithinDistanceSqRecursive(node.m_children + closest,
+                                                     point,
+                                                     outDisSq,
+                                                     outV,
+                                                     outW,
+                                                     outFaceIndex,
+                                                     closestPoint);
+        }
+
+        if (dfSq < outDisSq)
+        {
+            GetClosestPointWithinDistanceSqRecursive(node.m_children + furthest,
+                                                     point,
+                                                     outDisSq,
+                                                     outV,
+                                                     outW,
+                                                     outFaceIndex,
+                                                     closestPoint);
+        }
+    }
+    else
+    {
+
+        double v, w;
+        for (uint32_t i = 0; i < node.m_numFaces; ++i)
+        {
+            uint32_t indexStart = node.m_faces[i];
+
+            const VHACD::Vect3& a = (*m_vertices)[(*m_indices)[indexStart].mI0];
+            const VHACD::Vect3& b = (*m_vertices)[(*m_indices)[indexStart].mI1];
+            const VHACD::Vect3& c = (*m_vertices)[(*m_indices)[indexStart].mI2];
+
+            VHACD::Vect3 cp = ClosestPointOnTriangle(a, b, c, point, v, w);
+            double disSq = (cp - point).GetNormSquared();
+
+            if (disSq < outDisSq)
+            {
+                closestPoint = cp;
+                outDisSq = disSq;
+                outV = v;
+                outW = w;
+                outFaceIndex = node.m_faces[i];
+            }
+        }
+    }
+}
+
+VHACD::BoundsAABB AABBTree::CalculateFaceBounds(uint32_t* faces,
+                                                uint32_t numFaces)
+{
+    VHACD::Vect3 minExtents( FLT_MAX);
+    VHACD::Vect3 maxExtents(-FLT_MAX);
+
+    // calculate face bounds
+    for (uint32_t i = 0; i < numFaces; ++i)
+    {
+        VHACD::Vect3 a = (*m_vertices)[(*m_indices)[faces[i]].mI0];
+        VHACD::Vect3 b = (*m_vertices)[(*m_indices)[faces[i]].mI1];
+        VHACD::Vect3 c = (*m_vertices)[(*m_indices)[faces[i]].mI2];
+
+        minExtents = a.CWiseMin(minExtents);
+        maxExtents = a.CWiseMax(maxExtents);
+
+        minExtents = b.CWiseMin(minExtents);
+        maxExtents = b.CWiseMax(maxExtents);
+
+        minExtents = c.CWiseMin(minExtents);
+        maxExtents = c.CWiseMax(maxExtents);
+    }
+
+    return VHACD::BoundsAABB(minExtents,
+                             maxExtents);
+}
+
+enum class VoxelValue : uint8_t
+{
+    PRIMITIVE_UNDEFINED = 0,
+    PRIMITIVE_OUTSIDE_SURFACE_TOWALK = 1,
+    PRIMITIVE_OUTSIDE_SURFACE = 2,
+    PRIMITIVE_INSIDE_SURFACE = 3,
+    PRIMITIVE_ON_SURFACE = 4
+};
+
+class Volume
+{
+public:
+    void Voxelize(const std::vector<VHACD::Vertex>& points,
+                  const std::vector<VHACD::Triangle>& triangles,
+                  const size_t dim,
+                  FillMode fillMode,
+                  const AABBTree& aabbTree);
+
+    void RaycastFill(const AABBTree& aabbTree);
+
+    void SetVoxel(const size_t i,
+                  const size_t j,
+                  const size_t k,
+                  VoxelValue value);
+
+    VoxelValue& GetVoxel(const size_t i,
+                         const size_t j,
+                         const size_t k);
+
+    const VoxelValue& GetVoxel(const size_t i,
+                               const size_t j,
+                               const size_t k) const;
+
+    const std::vector<Voxel>& GetSurfaceVoxels() const;
+    const std::vector<Voxel>& GetInteriorVoxels() const;
+
+    double GetScale() const;
+    const VHACD::BoundsAABB& GetBounds() const;
+    const VHACD::Vector3<uint32_t>& GetDimensions() const;
+
+    VHACD::BoundsAABB m_bounds;
+    double m_scale{ 1.0 };
+    VHACD::Vector3<uint32_t> m_dim{ 0 };
+    size_t m_numVoxelsOnSurface{ 0 };
+    size_t m_numVoxelsInsideSurface{ 0 };
+    size_t m_numVoxelsOutsideSurface{ 0 };
+    std::vector<VoxelValue> m_data;
+private:
+
+    void MarkOutsideSurface(const size_t i0,
+                            const size_t j0,
+                            const size_t k0,
+                            const size_t i1,
+                            const size_t j1,
+                            const size_t k1);
+    void FillOutsideSurface();
+
+    void FillInsideSurface();
+
+    std::vector<VHACD::Voxel> m_surfaceVoxels;
+    std::vector<VHACD::Voxel> m_interiorVoxels;
+};
+
+bool PlaneBoxOverlap(const VHACD::Vect3& normal,
+                     const VHACD::Vect3& vert,
+                     const VHACD::Vect3& maxbox)
+{
+    int32_t q;
+    VHACD::Vect3 vmin;
+    VHACD::Vect3 vmax;
+    double v;
+    for (q = 0; q < 3; q++)
+    {
+        v = vert[q];
+        if (normal[q] > double(0.0))
+        {
+            vmin[q] = -maxbox[q] - v;
+            vmax[q] =  maxbox[q] - v;
+        }
+        else
+        {
+            vmin[q] =  maxbox[q] - v;
+            vmax[q] = -maxbox[q] - v;
+        }
+    }
+    if (normal.Dot(vmin) > double(0.0))
+        return false;
+    if (normal.Dot(vmax) >= double(0.0))
+        return true;
+    return false;
+}
+
+bool AxisTest(double  a, double  b, double fa, double fb,
+              double v0, double v1, double v2, double v3,
+              double boxHalfSize1,  double boxHalfSize2)
+{
+    double p0 = a * v0 + b * v1;
+    double p1 = a * v2 + b * v3;
+
+    double min = std::min(p0, p1);
+    double max = std::max(p0, p1);
+
+    double rad = fa * boxHalfSize1 + fb * boxHalfSize2;
+    if (min > rad || max < -rad)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool TriBoxOverlap(const VHACD::Vect3& boxCenter,
+                   const VHACD::Vect3& boxHalfSize,
+                   const VHACD::Vect3& triVer0,
+                   const VHACD::Vect3& triVer1,
+                   const VHACD::Vect3& triVer2)
+{
+    /*    use separating axis theorem to test overlap between triangle and box */
+    /*    need to test for overlap in these directions: */
+    /*    1) the {x,y,z}-directions (actually, since we use the AABB of the triangle */
+    /*       we do not even need to test these) */
+    /*    2) normal of the triangle */
+    /*    3) crossproduct(edge from tri, {x,y,z}-direction) */
+    /*       this gives 3x3=9 more tests */
+
+    VHACD::Vect3 v0 = triVer0 - boxCenter;
+    VHACD::Vect3 v1 = triVer1 - boxCenter;
+    VHACD::Vect3 v2 = triVer2 - boxCenter;
+    VHACD::Vect3 e0 = v1 - v0;
+    VHACD::Vect3 e1 = v2 - v1;
+    VHACD::Vect3 e2 = v0 - v2;
+
+    /* This is the fastest branch on Sun */
+    /* move everything so that the boxcenter is in (0,0,0) */
+
+    /* Bullet 3:  */
+    /*  test the 9 tests first (this was faster) */
+    double fex = fabs(e0[0]);
+    double fey = fabs(e0[1]);
+    double fez = fabs(e0[2]);
+
+    /*
+     * These should use Get*() instead of subscript for consistency, but the function calls are long enough already
+     */
+    if (!AxisTest( e0[2], -e0[1], fez, fey, v0[1], v0[2], v2[1], v2[2], boxHalfSize[1], boxHalfSize[2])) return 0; // X01
+    if (!AxisTest(-e0[2],  e0[0], fez, fex, v0[0], v0[2], v2[0], v2[2], boxHalfSize[0], boxHalfSize[2])) return 0; // Y02
+    if (!AxisTest( e0[1], -e0[0], fey, fex, v1[0], v1[1], v2[0], v2[1], boxHalfSize[0], boxHalfSize[1])) return 0; // Z12
+
+    fex = fabs(e1[0]);
+    fey = fabs(e1[1]);
+    fez = fabs(e1[2]);
+
+    if (!AxisTest( e1[2], -e1[1], fez, fey, v0[1], v0[2], v2[1], v2[2], boxHalfSize[1], boxHalfSize[2])) return 0; // X01
+    if (!AxisTest(-e1[2],  e1[0], fez, fex, v0[0], v0[2], v2[0], v2[2], boxHalfSize[0], boxHalfSize[2])) return 0; // Y02
+    if (!AxisTest( e1[1], -e1[0], fey, fex, v0[0], v0[1], v1[0], v1[1], boxHalfSize[0], boxHalfSize[2])) return 0; // Z0
+
+    fex = fabs(e2[0]);
+    fey = fabs(e2[1]);
+    fez = fabs(e2[2]);
+
+    if (!AxisTest( e2[2], -e2[1], fez, fey, v0[1], v0[2], v1[1], v1[2], boxHalfSize[1], boxHalfSize[2])) return 0; // X2
+    if (!AxisTest(-e2[2],  e2[0], fez, fex, v0[0], v0[2], v1[0], v1[2], boxHalfSize[0], boxHalfSize[2])) return 0; // Y1
+    if (!AxisTest( e2[1], -e2[0], fey, fex, v1[0], v1[1], v2[0], v2[1], boxHalfSize[0], boxHalfSize[1])) return 0; // Z12
+
+    /* Bullet 1: */
+    /*  first test overlap in the {x,y,z}-directions */
+    /*  find min, max of the triangle each direction, and test for overlap in */
+    /*  that direction -- this is equivalent to testing a minimal AABB around */
+    /*  the triangle against the AABB */
+
+    /* test in 0-direction */
+    double min = std::min({v0.GetX(), v1.GetX(), v2.GetX()});
+    double max = std::max({v0.GetX(), v1.GetX(), v2.GetX()});
+    if (min > boxHalfSize[0] || max < -boxHalfSize[0])
+        return false;
+
+    /* test in 1-direction */
+    min = std::min({v0.GetY(), v1.GetY(), v2.GetY()});
+    max = std::max({v0.GetY(), v1.GetY(), v2.GetY()});
+    if (min > boxHalfSize[1] || max < -boxHalfSize[1])
+        return false;
+
+    /* test in getZ-direction */
+    min = std::min({v0.GetZ(), v1.GetZ(), v2.GetZ()});
+    max = std::max({v0.GetZ(), v1.GetZ(), v2.GetZ()});
+    if (min > boxHalfSize[2] || max < -boxHalfSize[2])
+        return false;
+
+    /* Bullet 2: */
+    /*  test if the box intersects the plane of the triangle */
+    /*  compute plane equation of triangle: normal*x+d=0 */
+    VHACD::Vect3 normal = e0.Cross(e1);
+
+    if (!PlaneBoxOverlap(normal, v0, boxHalfSize))
+        return false;
+    return true; /* box and triangle overlaps */
+}
+
+void Volume::Voxelize(const std::vector<VHACD::Vertex>& points,
+                      const std::vector<VHACD::Triangle>& indices,
+                      const size_t dimensions,
+                      FillMode fillMode,
+                      const AABBTree& aabbTree)
+{
+    double a = std::pow(dimensions, 0.33);
+    size_t dim = a * double(1.5);
+    dim = std::max(dim, size_t(32));
+
+    if (points.size() == 0)
+    {
+        return;
+    }
+
+    m_bounds = BoundsAABB(points);
+
+    VHACD::Vect3 d = m_bounds.GetSize();
+    double r;
+    // Equal comparison is important here to avoid taking the last branch when d[0] == d[1] with d[2] being the smallest
+    // dimension. That would lead to dimensions in i and j to be a lot bigger than expected and make the amount of
+    // voxels in the volume totally unmanageable.
+    if (d[0] >= d[1] && d[0] >= d[2])
+    {
+        r = d[0];
+        m_dim[0] = uint32_t(dim);
+        m_dim[1] = uint32_t(2 + static_cast<size_t>(dim * d[1] / d[0]));
+        m_dim[2] = uint32_t(2 + static_cast<size_t>(dim * d[2] / d[0]));
+    }
+    else if (d[1] >= d[0] && d[1] >= d[2])
+    {
+        r = d[1];
+        m_dim[1] = uint32_t(dim);
+        m_dim[0] = uint32_t(2 + static_cast<size_t>(dim * d[0] / d[1]));
+        m_dim[2] = uint32_t(2 + static_cast<size_t>(dim * d[2] / d[1]));
+    }
+    else
+    {
+        r = d[2];
+        m_dim[2] = uint32_t(dim);
+        m_dim[0] = uint32_t(2 + static_cast<size_t>(dim * d[0] / d[2]));
+        m_dim[1] = uint32_t(2 + static_cast<size_t>(dim * d[1] / d[2]));
+    }
+
+    m_scale = r / (dim - 1);
+    double invScale = (dim - 1) / r;
+
+    m_data = std::vector<VoxelValue>(m_dim[0] * m_dim[1] * m_dim[2],
+                                     VoxelValue::PRIMITIVE_UNDEFINED);
+    m_numVoxelsOnSurface = 0;
+    m_numVoxelsInsideSurface = 0;
+    m_numVoxelsOutsideSurface = 0;
+
+    VHACD::Vect3 p[3];
+    VHACD::Vect3 boxcenter;
+    VHACD::Vect3 pt;
+    const VHACD::Vect3 boxhalfsize(double(0.5));
+    for (size_t t = 0; t < indices.size(); ++t)
+    {
+        size_t i0, j0, k0;
+        size_t i1, j1, k1;
+        VHACD::Vector3<uint32_t> tri = indices[t];
+        for (int32_t c = 0; c < 3; ++c)
+        {
+            pt = points[tri[c]];
+
+            p[c] = (pt - m_bounds.GetMin()) * invScale;
+
+            size_t i = static_cast<size_t>(p[c][0] + double(0.5));
+            size_t j = static_cast<size_t>(p[c][1] + double(0.5));
+            size_t k = static_cast<size_t>(p[c][2] + double(0.5));
+
+            assert(i < m_dim[0] && i >= 0 && j < m_dim[1] && j >= 0 && k < m_dim[2] && k >= 0);
+
+            if (c == 0)
+            {
+                i0 = i1 = i;
+                j0 = j1 = j;
+                k0 = k1 = k;
+            }
+            else
+            {
+                i0 = std::min(i0, i);
+                j0 = std::min(j0, j);
+                k0 = std::min(k0, k);
+
+                i1 = std::max(i1, i);
+                j1 = std::max(j1, j);
+                k1 = std::max(k1, k);
+            }
+        }
+        if (i0 > 0)
+            --i0;
+        if (j0 > 0)
+            --j0;
+        if (k0 > 0)
+            --k0;
+        if (i1 < m_dim[0])
+            ++i1;
+        if (j1 < m_dim[1])
+            ++j1;
+        if (k1 < m_dim[2])
+            ++k1;
+        for (size_t i_id = i0; i_id < i1; ++i_id)
+        {
+            boxcenter[0] = uint32_t(i_id);
+            for (size_t j_id = j0; j_id < j1; ++j_id)
+            {
+                boxcenter[1] = uint32_t(j_id);
+                for (size_t k_id = k0; k_id < k1; ++k_id)
+                {
+                    boxcenter[2] = uint32_t(k_id);
+                    bool res = TriBoxOverlap(boxcenter,
+                                             boxhalfsize,
+                                             p[0],
+                                             p[1],
+                                             p[2]);
+                    VoxelValue& value = GetVoxel(i_id,
+                                                 j_id,
+                                                 k_id);
+                    if (   res
+                        && value == VoxelValue::PRIMITIVE_UNDEFINED)
+                    {
+                        value = VoxelValue::PRIMITIVE_ON_SURFACE;
+                        ++m_numVoxelsOnSurface;
+                        m_surfaceVoxels.emplace_back(uint32_t(i_id),
+                                                     uint32_t(j_id),
+                                                     uint32_t(k_id));
+                    }
+                }
+            }
+        }
+    }
+
+    if (fillMode == FillMode::SURFACE_ONLY)
+    {
+        const size_t i0_local = m_dim[0];
+        const size_t j0_local = m_dim[1];
+        const size_t k0_local = m_dim[2];
+        for (size_t i_id = 0; i_id < i0_local; ++i_id)
+        {
+            for (size_t j_id = 0; j_id < j0_local; ++j_id)
+            {
+                for (size_t k_id = 0; k_id < k0_local; ++k_id)
+                {
+                    const VoxelValue& voxel = GetVoxel(i_id,
+                                                       j_id,
+                                                       k_id);
+                    if (voxel != VoxelValue::PRIMITIVE_ON_SURFACE)
+                    {
+                        SetVoxel(i_id,
+                                 j_id,
+                                 k_id,
+                                 VoxelValue::PRIMITIVE_OUTSIDE_SURFACE);
+                    }
+                }
+            }
+        }
+    }
+    else if (fillMode == FillMode::FLOOD_FILL)
+    {
+        /*
+         * Marking the outside edges of the voxel cube to be outside surfaces to walk
+         */
+        MarkOutsideSurface(0,            0,            0,            m_dim[0], m_dim[1], 1);
+        MarkOutsideSurface(0,            0,            m_dim[2] - 1, m_dim[0], m_dim[1], m_dim[2]);
+        MarkOutsideSurface(0,            0,            0,            m_dim[0], 1,        m_dim[2]);
+        MarkOutsideSurface(0,            m_dim[1] - 1, 0,            m_dim[0], m_dim[1], m_dim[2]);
+        MarkOutsideSurface(0,            0,            0,            1,        m_dim[1], m_dim[2]);
+        MarkOutsideSurface(m_dim[0] - 1, 0,            0,            m_dim[0], m_dim[1], m_dim[2]);
+        FillOutsideSurface();
+        FillInsideSurface();
+    }
+    else if (fillMode == FillMode::RAYCAST_FILL)
+    {
+        RaycastFill(aabbTree);
+    }
+}
+
+void Volume::RaycastFill(const AABBTree& aabbTree)
+{
+    const uint32_t i0 = m_dim[0];
+    const uint32_t j0 = m_dim[1];
+    const uint32_t k0 = m_dim[2];
+
+    size_t maxSize = i0 * j0 * k0;
+
+    std::vector<Voxel> temp;
+    temp.reserve(maxSize);
+    uint32_t count{ 0 };
+    m_numVoxelsInsideSurface = 0;
+    for (uint32_t i = 0; i < i0; ++i)
+    {
+        for (uint32_t j = 0; j < j0; ++j)
+        {
+            for (uint32_t k = 0; k < k0; ++k)
+            {
+                VoxelValue& voxel = GetVoxel(i, j, k);
+                if (voxel != VoxelValue::PRIMITIVE_ON_SURFACE)
+                {
+                    VHACD::Vect3 start = VHACD::Vect3(i, j, k) * m_scale + m_bounds.GetMin();
+
+                    uint32_t insideCount = 0;
+                    uint32_t outsideCount = 0;
+
+                    VHACD::Vect3 directions[6] = {
+                        VHACD::Vect3( 1,  0,  0),
+                        VHACD::Vect3(-1,  0,  0), // this was 1, 0, 0 in the original code, but looks wrong
+                        VHACD::Vect3( 0,  1,  0),
+                        VHACD::Vect3( 0, -1,  0),
+                        VHACD::Vect3( 0,  0,  1),
+                        VHACD::Vect3( 0,  0, -1)
+                    };
+
+                    for (uint32_t r = 0; r < 6; r++)
+                    {
+                        aabbTree.TraceRay(start,
+                                          directions[r],
+                                          insideCount,
+                                          outsideCount);
+                        // Early out if we hit the outside of the mesh
+                        if (outsideCount)
+                        {
+                            break;
+                        }
+                        // Early out if we accumulated 3 inside hits
+                        if (insideCount >= 3)
+                        {
+                            break;
+                        }
+                    }
+
+                    if (outsideCount == 0 && insideCount >= 3)
+                    {
+                        voxel = VoxelValue::PRIMITIVE_INSIDE_SURFACE;
+                        temp.emplace_back(i, j, k);
+                        count++;
+                        m_numVoxelsInsideSurface++;
+                    }
+                    else
+                    {
+                        voxel = VoxelValue::PRIMITIVE_OUTSIDE_SURFACE;
+                    }
+                }
+            }
+        }
+    }
+
+    if (count)
+    {
+        m_interiorVoxels = std::move(temp);
+    }
+}
+
+void Volume::SetVoxel(const size_t i,
+                      const size_t j,
+                      const size_t k,
+                      VoxelValue value)
+{
+    assert(i < m_dim[0] || i >= 0);
+    assert(j < m_dim[1] || j >= 0);
+    assert(k < m_dim[2] || k >= 0);
+
+    m_data[k + j * m_dim[2] + i * m_dim[1] * m_dim[2]] = value;
+}
+
+VoxelValue& Volume::GetVoxel(const size_t i,
+                             const size_t j,
+                             const size_t k)
+{
+    assert(i < m_dim[0] || i >= 0);
+    assert(j < m_dim[1] || j >= 0);
+    assert(k < m_dim[2] || k >= 0);
+    return m_data[k + j * m_dim[2] + i * m_dim[1] * m_dim[2]];
+}
+
+const VoxelValue& Volume::GetVoxel(const size_t i,
+                                   const size_t j,
+                                   const size_t k) const
+{
+    assert(i < m_dim[0] || i >= 0);
+    assert(j < m_dim[1] || j >= 0);
+    assert(k < m_dim[2] || k >= 0);
+    return m_data[k + j * m_dim[2] + i * m_dim[1] * m_dim[2]];
+}
+
+const std::vector<Voxel>& Volume::GetSurfaceVoxels() const
+{
+    return m_surfaceVoxels;
+}
+
+const std::vector<Voxel>& Volume::GetInteriorVoxels() const
+{
+    return m_interiorVoxels;
+}
+
+double Volume::GetScale() const
+{
+    return m_scale;
+}
+
+const VHACD::BoundsAABB& Volume::GetBounds() const
+{
+    return m_bounds;
+}
+
+const VHACD::Vector3<uint32_t>& Volume::GetDimensions() const
+{
+    return m_dim;
+}
+
+void Volume::MarkOutsideSurface(const size_t i0,
+                                const size_t j0,
+                                const size_t k0,
+                                const size_t i1,
+                                const size_t j1,
+                                const size_t k1)
+{
+    for (size_t i = i0; i < i1; ++i)
+    {
+        for (size_t j = j0; j < j1; ++j)
+        {
+            for (size_t k = k0; k < k1; ++k)
+            {
+                VoxelValue& v = GetVoxel(i, j, k);
+                if (v == VoxelValue::PRIMITIVE_UNDEFINED)
+                {
+                    v = VoxelValue::PRIMITIVE_OUTSIDE_SURFACE_TOWALK;
+                }
+            }
+        }
+    }
+}
+
+inline void WalkForward(int64_t start,
+                        int64_t end,
+                        VoxelValue* ptr,
+                        int64_t stride,
+                        int64_t maxDistance)
+{
+    for (int64_t i = start, count = 0;
+         count < maxDistance && i < end && *ptr == VoxelValue::PRIMITIVE_UNDEFINED;
+         ++i, ptr += stride, ++count)
+    {
+        *ptr = VoxelValue::PRIMITIVE_OUTSIDE_SURFACE_TOWALK;
+    }
+}
+
+inline void WalkBackward(int64_t start,
+                         int64_t end,
+                         VoxelValue* ptr,
+                         int64_t stride,
+                         int64_t maxDistance)
+{
+    for (int64_t i = start, count = 0;
+         count < maxDistance && i >= end && *ptr == VoxelValue::PRIMITIVE_UNDEFINED;
+         --i, ptr -= stride, ++count)
+    {
+        *ptr = VoxelValue::PRIMITIVE_OUTSIDE_SURFACE_TOWALK;
+    }
+}
+
+void Volume::FillOutsideSurface()
+{
+    size_t voxelsWalked = 0;
+    const int64_t i0 = m_dim[0];
+    const int64_t j0 = m_dim[1];
+    const int64_t k0 = m_dim[2];
+
+    // Avoid striding too far in each direction to stay in L1 cache as much as possible.
+    // The cache size required for the walk is roughly (4 * walkDistance * 64) since
+    // the k direction doesn't count as it's walking byte per byte directly in a cache lines.
+    // ~16k is required for a walk distance of 64 in each directions.
+    const size_t walkDistance = 64;
+
+    // using the stride directly instead of calling GetVoxel for each iterations saves
+    // a lot of multiplications and pipeline stalls due to data dependencies on imul.
+    const size_t istride = &GetVoxel(1, 0, 0) - &GetVoxel(0, 0, 0);
+    const size_t jstride = &GetVoxel(0, 1, 0) - &GetVoxel(0, 0, 0);
+    const size_t kstride = &GetVoxel(0, 0, 1) - &GetVoxel(0, 0, 0);
+
+    // It might seem counter intuitive to go over the whole voxel range multiple times
+    // but since we do the run in memory order, it leaves us with far fewer cache misses
+    // than a BFS algorithm and it has the additional benefit of not requiring us to
+    // store and manipulate a fifo for recursion that might become huge when the number
+    // of voxels is large.
+    // This will outperform the BFS algorithm by several orders of magnitude in practice.
+    do
+    {
+        voxelsWalked = 0;
+        for (int64_t i = 0; i < i0; ++i)
+        {
+            for (int64_t j = 0; j < j0; ++j)
+            {
+                for (int64_t k = 0; k < k0; ++k)
+                {
+                    VoxelValue& voxel = GetVoxel(i, j, k);
+                    if (voxel == VoxelValue::PRIMITIVE_OUTSIDE_SURFACE_TOWALK)
+                    {
+                        voxelsWalked++;
+                        voxel = VoxelValue::PRIMITIVE_OUTSIDE_SURFACE;
+
+                        // walk in each direction to mark other voxel that should be walked.
+                        // this will generate a 3d pattern that will help the overall
+                        // algorithm converge faster while remaining cache friendly.
+                        WalkForward(k + 1, k0, &voxel + kstride, kstride, walkDistance);
+                        WalkBackward(k - 1, 0, &voxel - kstride, kstride, walkDistance);
+
+                        WalkForward(j + 1, j0, &voxel + jstride, jstride, walkDistance);
+                        WalkBackward(j - 1, 0, &voxel - jstride, jstride, walkDistance);
+
+                        WalkForward(i + 1, i0, &voxel + istride, istride, walkDistance);
+                        WalkBackward(i - 1, 0, &voxel - istride, istride, walkDistance);
+                    }
+                }
+            }
+        }
+
+        m_numVoxelsOutsideSurface += voxelsWalked;
+    } while (voxelsWalked != 0);
+}
+
+void Volume::FillInsideSurface()
+{
+    const uint32_t i0 = uint32_t(m_dim[0]);
+    const uint32_t j0 = uint32_t(m_dim[1]);
+    const uint32_t k0 = uint32_t(m_dim[2]);
+
+    size_t maxSize = i0 * j0 * k0;
+
+    std::vector<Voxel> temp;
+    temp.reserve(maxSize);
+    uint32_t count{ 0 };
+
+    for (uint32_t i = 0; i < i0; ++i)
+    {
+        for (uint32_t j = 0; j < j0; ++j)
+        {
+            for (uint32_t k = 0; k < k0; ++k)
+            {
+                VoxelValue& v = GetVoxel(i, j, k);
+                if (v == VoxelValue::PRIMITIVE_UNDEFINED)
+                {
+                    v = VoxelValue::PRIMITIVE_INSIDE_SURFACE;
+                    temp.emplace_back(i, j, k);
+                    count++;
+                    ++m_numVoxelsInsideSurface;
+                }
+            }
+        }
+    }
+
+    if ( count )
+    {
+        m_interiorVoxels = std::move(temp);
+    }
+}
+
+//******************************************************************************************
+//  ShrinkWrap helper class
+//******************************************************************************************
+// This is a code snippet which 'shrinkwraps' a convex hull
+// to a source mesh.
+//
+// It is a somewhat complicated algorithm. It works as follows:
+//
+// * Step #1 : Compute the mean unit normal vector for each vertex in the convex hull
+// * Step #2 : For each vertex in the conex hull we project is slightly outwards along the mean normal vector
+// * Step #3 : We then raycast from this slightly extruded point back into the opposite direction of the mean normal vector
+//             resulting in a raycast from slightly beyond the vertex in the hull into the source mesh we are trying
+//             to 'shrink wrap' against
+// * Step #4 : If the raycast fails we leave the original vertex alone
+// * Step #5 : If the raycast hits a backface we leave the original vertex alone
+// * Step #6 : If the raycast hits too far away (no more than a certain threshold distance) we live it alone
+// * Step #7 : If the point we hit on the source mesh is not still within the convex hull, we reject it.
+// * Step #8 : If all of the previous conditions are met, then we take the raycast hit location as the 'new position'
+// * Step #9 : Once all points have been projected, if possible, we need to recompute the convex hull again based on these shrinkwrapped points
+// * Step #10 : In theory that should work.. let's see...
+
+//***********************************************************************************************
+// QuickHull implementation
+//***********************************************************************************************
+
+//////////////////////////////////////////////////////////////////////////
+// Quickhull base class holding the hull during construction
+//////////////////////////////////////////////////////////////////////////
+class QuickHull
+{
+public:
+    uint32_t ComputeConvexHull(const std::vector<VHACD::Vertex>& vertices,
+                               uint32_t maxHullVertices);
+
+    const std::vector<VHACD::Vertex>& GetVertices() const;
+    const std::vector<VHACD::Triangle>& GetIndices() const;
+
+private:
+    std::vector<VHACD::Vertex>   m_vertices;
+    std::vector<VHACD::Triangle> m_indices;
+};
+
+uint32_t QuickHull::ComputeConvexHull(const std::vector<VHACD::Vertex>& vertices,
+                                      uint32_t maxHullVertices)
+{
+    m_indices.clear();
+
+    VHACD::ConvexHull ch(vertices,
+                         double(0.0001),
+                         maxHullVertices);
+
+    auto& vlist = ch.GetVertexPool();
+    if ( !vlist.empty() )
+    {
+        size_t vcount = vlist.size();
+        m_vertices.resize(vcount);
+        std::copy(vlist.begin(),
+                  vlist.end(),
+                  m_vertices.begin());
+    }
+
+    for (std::list<ConvexHullFace>::const_iterator node = ch.GetList().begin(); node != ch.GetList().end(); ++node)
+    {
+        const VHACD::ConvexHullFace& face = *node;
+        m_indices.emplace_back(face.m_index[0],
+                               face.m_index[1],
+                               face.m_index[2]);
+    }
+
+    return uint32_t(m_indices.size());
+}
+
+const std::vector<VHACD::Vertex>& QuickHull::GetVertices() const
+{
+    return m_vertices;
+}
+
+const std::vector<VHACD::Triangle>& QuickHull::GetIndices() const
+{
+    return m_indices;
+}
+
+//******************************************************************************************
+// Implementation of the ShrinkWrap function
+//******************************************************************************************
+
+void ShrinkWrap(SimpleMesh& sourceConvexHull,
+                const AABBTree& aabbTree,
+                uint32_t maxHullVertexCount,
+                double distanceThreshold,
+                bool doShrinkWrap)
+{
+    std::vector<VHACD::Vertex> verts; // New verts for the new convex hull
+    verts.reserve(sourceConvexHull.m_vertices.size());
+    // Examine each vertex and see if it is within the voxel distance.
+    // If it is, then replace the point with the shrinkwrapped / projected point
+    for (uint32_t j = 0; j < sourceConvexHull.m_vertices.size(); j++)
+    {
+        VHACD::Vertex& p = sourceConvexHull.m_vertices[j];
+        if (doShrinkWrap)
+        {
+            VHACD::Vect3 closest;
+            if (aabbTree.GetClosestPointWithinDistance(p, distanceThreshold, closest))
+            {
+                p = closest;
+            }
+        }
+        verts.emplace_back(p);
+    }
+    // Final step is to recompute the convex hull
+    VHACD::QuickHull qh;
+    uint32_t tcount = qh.ComputeConvexHull(verts,
+                                            maxHullVertexCount);
+    if (tcount)
+    {
+        sourceConvexHull.m_vertices = qh.GetVertices();
+        sourceConvexHull.m_indices = qh.GetIndices();
+    }
+}
+
+//********************************************************************************************************************
+
+#if !VHACD_DISABLE_THREADING
+
+//********************************************************************************************************************
+// Definition of the ThreadPool
+//********************************************************************************************************************
+
+class ThreadPool {
+ public:
+    ThreadPool();
+    ThreadPool(int worker);
+    ~ThreadPool();
+    template<typename F, typename... Args>
+    auto enqueue(F&& f, Args&& ... args)
+#ifndef __cpp_lib_is_invocable
+        -> std::future< typename std::result_of< F( Args... ) >::type>;
+#else
+        -> std::future< typename std::invoke_result_t<F, Args...>>;
+#endif
+ private:
+    std::vector<std::thread> workers;
+    std::deque<std::function<void()>> tasks;
+    std::mutex task_mutex;
+    std::condition_variable cv;
+    bool closed;
+    int count;
+};
+
+ThreadPool::ThreadPool()
+    : ThreadPool(1)
+{
+}
+
+ThreadPool::ThreadPool(int worker)
+    : closed(false)
+    , count(0)
+{
+    workers.reserve(worker);
+    for(int i=0; i<worker; i++) 
+    {
+        workers.emplace_back(
+            [this]
+            {
+                std::unique_lock<std::mutex> lock(this->task_mutex);
+                while(true) 
+                {
+                    while (this->tasks.empty()) 
+                    {
+                        if (this->closed) 
+                        {
+                            return;
+                        }
+                        this->cv.wait(lock);
+                    }
+                    auto task = this->tasks.front();
+                    this->tasks.pop_front();
+                    lock.unlock();
+                    task();
+                    lock.lock();
+                }
+            }
+        );
+    }
+}
+
+template<typename F, typename... Args>
+auto ThreadPool::enqueue(F&& f, Args&& ... args)
+#ifndef __cpp_lib_is_invocable
+    -> std::future< typename std::result_of< F( Args... ) >::type>
+#else
+    -> std::future< typename std::invoke_result_t<F, Args...>>
+#endif
+{
+
+#ifndef __cpp_lib_is_invocable
+    using return_type = typename std::result_of< F( Args... ) >::type;
+#else
+    using return_type = typename std::invoke_result_t< F, Args... >;
+#endif
+    auto task = std::make_shared<std::packaged_task<return_type()> > (
+        std::bind(std::forward<F>(f), std::forward<Args>(args)...)
+    );
+    auto result = task->get_future();
+
+    {
+        std::unique_lock<std::mutex> lock(task_mutex);
+        if (!closed) 
+        {
+            tasks.emplace_back([task]
+            { 
+                (*task)();
+            });
+            cv.notify_one();
+        }
+    }
+
+    return result;
+}
+
+ThreadPool::~ThreadPool() {
+    {
+        std::unique_lock<std::mutex> lock(task_mutex);
+        closed = true;
+    }
+    cv.notify_all();
+    for (auto && worker : workers) 
+    {
+        worker.join();
+    }
+}
+#endif
+
+enum class Stages
+{
+    COMPUTE_BOUNDS_OF_INPUT_MESH,
+    REINDEXING_INPUT_MESH,
+    CREATE_RAYCAST_MESH,
+    VOXELIZING_INPUT_MESH,
+    BUILD_INITIAL_CONVEX_HULL,
+    PERFORMING_DECOMPOSITION,
+    INITIALIZING_CONVEX_HULLS_FOR_MERGING,
+    COMPUTING_COST_MATRIX,
+    MERGING_CONVEX_HULLS,
+    FINALIZING_RESULTS,
+    NUM_STAGES
+};
+
+class VHACDCallbacks
+{
+public:
+    virtual void ProgressUpdate(Stages stage,
+                                double stageProgress,
+                                const char *operation) = 0;
+    virtual bool IsCanceled() const = 0;
+
+    virtual ~VHACDCallbacks() = default;
+};
+
+enum class SplitAxis
+{
+    X_AXIS_NEGATIVE,
+    X_AXIS_POSITIVE,
+    Y_AXIS_NEGATIVE,
+    Y_AXIS_POSITIVE,
+    Z_AXIS_NEGATIVE,
+    Z_AXIS_POSITIVE,
+};
+
+// This class represents a collection of voxels, the convex hull
+// which surrounds them, and a triangle mesh representation of those voxels
+class VoxelHull
+{
+public:
+
+    // This method constructs a new VoxelHull based on a plane split of the parent
+    // convex hull
+    VoxelHull(const VoxelHull& parent,
+              SplitAxis axis,
+              uint32_t splitLoc);
+
+    // Here we construct the initial convex hull around the
+    // entire voxel set
+    VoxelHull(Volume& voxels,
+              const IVHACD::Parameters &params,
+              VHACDCallbacks *callbacks);
+
+    ~VoxelHull() = default;
+
+    // Helper method to refresh the min/max voxel bounding region
+    void MinMaxVoxelRegion(const Voxel &v);
+
+    void BuildRaycastMesh();
+
+    // We now compute the convex hull relative to a triangle mesh generated 
+    // from the voxels
+    void ComputeConvexHull();
+
+    // Returns true if this convex hull should be considered done
+    bool IsComplete();
+
+    
+    // Convert a voxel position into it's correct double precision location
+    VHACD::Vect3 GetPoint(const int32_t x,
+                                 const int32_t y,
+                                 const int32_t z,
+                                 const double scale,
+                                 const VHACD::Vect3& bmin) const;
+
+    // Sees if we have already got an index for this voxel position.
+    // If the voxel position has already been indexed, we just return
+    // that index value.
+    // If not, then we convert it into the floating point position and
+    // add it to the index map
+    uint32_t GetVertexIndex(const VHACD::Vector3<uint32_t>& p);
+
+    // This method will convert the voxels into an actual indexed triangle mesh of boxes
+    // This serves two purposes.
+    // The primary purpose is so that when we compute a convex hull it considered all of the points
+    // for each voxel, not just the center point. If you don't do this, then the hulls don't fit the
+    // mesh accurately enough.
+    // The second reason we convert it into a triangle mesh is so that we can do raycasting against it
+    // to search for the best splitting plane fairly quickly. That algorithm will be discussed in the 
+    // method which computes the best splitting plane.
+    void BuildVoxelMesh();
+
+    // Convert a single voxel position into an actual 3d box mesh comprised
+    // of 12 triangles
+    void AddVoxelBox(const Voxel &v);
+    
+    // Add the triangle represented by these 3 indices into the 'box' set of vertices
+    // to the output mesh
+    void AddTri(const std::array<VHACD::Vector3<uint32_t>, 8>& box,
+                uint32_t i1,
+                uint32_t i2,
+                uint32_t i3);
+
+    // Here we convert from voxel space to a 3d position, index it, and add
+    // the triangle positions and indices for the output mesh
+    void AddTriangle(const VHACD::Vector3<uint32_t>& p1,
+                     const VHACD::Vector3<uint32_t>& p2,
+                     const VHACD::Vector3<uint32_t>& p3);
+
+    // When computing the split plane, we start by simply 
+    // taking the midpoint of the longest side. However,
+    // we can also search the surface and look for the greatest
+    // spot of concavity and use that as the split location.
+    // This will make the convex decomposition more efficient
+    // as it will tend to cut across the greatest point of
+    // concavity on the surface.
+    SplitAxis ComputeSplitPlane(uint32_t& location);
+
+    VHACD::Vect3 GetPosition(const VHACD::Vector3<int32_t>& ip) const;
+
+    double Raycast(const VHACD::Vector3<int32_t>& p1,
+                   const VHACD::Vector3<int32_t>& p2) const;
+
+    bool FindConcavity(uint32_t idx,
+                       uint32_t& splitLoc);
+
+    // Finding the greatest area of concavity..
+    bool FindConcavityX(uint32_t& splitLoc);
+
+    // Finding the greatest area of concavity..
+    bool FindConcavityY(uint32_t& splitLoc);
+
+    // Finding the greatest area of concavity..
+    bool FindConcavityZ(uint32_t& splitLoc);
+
+    // This operation is performed in a background thread.
+    // It splits the voxels by a plane
+    void PerformPlaneSplit();
+
+    // Used only for debugging. Saves the voxelized mesh to disk
+    // Optionally saves the original source mesh as well for comparison
+    void SaveVoxelMesh(const SimpleMesh& inputMesh,
+                       bool saveVoxelMesh,
+                       bool saveSourceMesh);
+
+    void SaveOBJ(const char* fname,
+                 const VoxelHull* h);
+
+    void SaveOBJ(const char* fname);
+
+private:
+    void WriteOBJ(FILE* fph,
+                  const std::vector<VHACD::Vertex>& vertices,
+                  const std::vector<VHACD::Triangle>& indices,
+                  uint32_t baseIndex);
+public:
+
+    SplitAxis               m_axis{ SplitAxis::X_AXIS_NEGATIVE };
+    Volume*                 m_voxels{ nullptr }; // The voxelized data set
+    double                  m_voxelScale{ 0 };   // Size of a single voxel
+    double                  m_voxelScaleHalf{ 0 }; // 1/2 of the size of a single voxel
+    VHACD::BoundsAABB       m_voxelBounds;
+    VHACD::Vect3            m_voxelAdjust;       // Minimum coordinates of the voxel space, with adjustment
+    uint32_t                m_depth{ 0 };        // How deep in the recursion of the binary tree this hull is
+    uint32_t                m_index{ 0 };        // Each convex hull is given a unique id to distinguish it from the others
+    double                  m_volumeError{ 0 };  // The percentage error from the convex hull volume vs. the voxel volume
+    double                  m_voxelVolume{ 0 };  // The volume of the voxels
+    double                  m_hullVolume{ 0 };   // The volume of the enclosing convex hull
+
+    std::unique_ptr<IVHACD::ConvexHull> m_convexHull{ nullptr }; // The convex hull which encloses this set of voxels.
+    std::vector<Voxel>                  m_surfaceVoxels;     // The voxels which are on the surface of the source mesh.
+    std::vector<Voxel>                  m_newSurfaceVoxels;  // Voxels which are on the surface as a result of a plane split
+    std::vector<Voxel>                  m_interiorVoxels;    // Voxels which are part of the interior of the hull
+
+    std::unique_ptr<VoxelHull>          m_hullA{ nullptr }; // hull resulting from one side of the plane split
+    std::unique_ptr<VoxelHull>          m_hullB{ nullptr }; // hull resulting from the other side of the plane split
+
+    // Defines the coordinates this convex hull comprises within the voxel volume
+    // of the entire source
+    VHACD::Vector3<uint32_t>                    m_1{ 0 };
+    VHACD::Vector3<uint32_t>                    m_2{ 0 };
+    AABBTree                                    m_AABBTree;
+    std::unordered_map<uint32_t, uint32_t>      m_voxelIndexMap; // Maps from a voxel coordinate space into a vertex index space
+    std::vector<VHACD::Vertex>                  m_vertices;
+    std::vector<VHACD::Triangle>                m_indices;
+    static uint32_t                             m_voxelHullCount;
+    IVHACD::Parameters                          m_params;
+    VHACDCallbacks*                             m_callbacks{ nullptr };
+};
+
+uint32_t VoxelHull::m_voxelHullCount = 0;
+
+VoxelHull::VoxelHull(const VoxelHull& parent,
+                     SplitAxis axis,
+                     uint32_t splitLoc)
+    : m_axis(axis)
+    , m_voxels(parent.m_voxels)
+    , m_voxelScale(m_voxels->GetScale())
+    , m_voxelScaleHalf(m_voxelScale * double(0.5))
+    , m_voxelBounds(m_voxels->GetBounds())
+    , m_voxelAdjust(m_voxelBounds.GetMin() - m_voxelScaleHalf)
+    , m_depth(parent.m_depth + 1)
+    , m_index(++m_voxelHullCount)
+    , m_1(parent.m_1)
+    , m_2(parent.m_2)
+    , m_params(parent.m_params)
+{
+    // Default copy the voxel region from the parent, but values will
+    // be adjusted next based on the split axis and location
+    switch ( m_axis )
+    {
+        case SplitAxis::X_AXIS_NEGATIVE:
+            m_2.GetX() = splitLoc;
+            break;
+        case SplitAxis::X_AXIS_POSITIVE:
+            m_1.GetX() = splitLoc + 1;
+            break;
+        case SplitAxis::Y_AXIS_NEGATIVE:
+            m_2.GetY() = splitLoc;
+            break;
+        case SplitAxis::Y_AXIS_POSITIVE:
+            m_1.GetY() = splitLoc + 1;
+            break;
+        case SplitAxis::Z_AXIS_NEGATIVE:
+            m_2.GetZ() = splitLoc;
+            break;
+        case SplitAxis::Z_AXIS_POSITIVE:
+            m_1.GetZ() = splitLoc + 1;
+            break;
+    }
+
+    // First, we copy all of the interior voxels from our parent
+    // which intersect our region
+    for (auto& i : parent.m_interiorVoxels)
+    {
+        VHACD::Vector3<uint32_t> v = i.GetVoxel();
+        if (v.CWiseAllGE(m_1) && v.CWiseAllLE(m_2))
+        {
+            bool newSurface = false;
+            switch ( m_axis )
+            {
+                case SplitAxis::X_AXIS_NEGATIVE:
+                    if ( v.GetX() == splitLoc )
+                    {
+                        newSurface = true;
+                    }
+                    break;
+                case SplitAxis::X_AXIS_POSITIVE:
+                    if ( v.GetX() == m_1.GetX() )
+                    {
+                        newSurface = true;
+                    }
+                    break;
+                case SplitAxis::Y_AXIS_NEGATIVE:
+                    if ( v.GetY() == splitLoc )
+                    {
+                        newSurface = true;
+                    }
+                    break;
+                case SplitAxis::Y_AXIS_POSITIVE:
+                    if ( v.GetY() == m_1.GetY() )
+                    {
+                        newSurface = true;
+                    }
+                    break;
+                case SplitAxis::Z_AXIS_NEGATIVE:
+                    if ( v.GetZ() == splitLoc )
+                    {
+                        newSurface = true;
+                    }
+                    break;
+                case SplitAxis::Z_AXIS_POSITIVE:
+                    if ( v.GetZ() == m_1.GetZ() )
+                    {
+                        newSurface = true;
+                    }
+                    break;
+            }
+            // If his interior voxels lie directly on the split plane then
+            // these become new surface voxels for our patch
+            if ( newSurface )
+            {
+                m_newSurfaceVoxels.push_back(i);
+            }
+            else
+            {
+                m_interiorVoxels.push_back(i);
+            }
+        }
+    }
+    // Next we copy all of the surface voxels which intersect our region
+    for (auto& i : parent.m_surfaceVoxels)
+    {
+        VHACD::Vector3<uint32_t> v = i.GetVoxel();
+        if (v.CWiseAllGE(m_1) && v.CWiseAllLE(m_2))
+        {
+            m_surfaceVoxels.push_back(i);
+        }
+    }
+    // Our parent's new surface voxels become our new surface voxels so long as they intersect our region
+    for (auto& i : parent.m_newSurfaceVoxels)
+    {
+        VHACD::Vector3<uint32_t> v = i.GetVoxel();
+        if (v.CWiseAllGE(m_1) && v.CWiseAllLE(m_2))
+        {
+            m_newSurfaceVoxels.push_back(i);
+        }
+    }
+
+    // Recompute the min-max bounding box which would be different after the split occurs
+    m_1 = VHACD::Vector3<uint32_t>(0x7FFFFFFF);
+    m_2 = VHACD::Vector3<uint32_t>(0);
+    for (auto& i : m_surfaceVoxels)
+    {
+        MinMaxVoxelRegion(i);
+    }
+    for (auto& i : m_newSurfaceVoxels)
+    {
+        MinMaxVoxelRegion(i);
+    }
+    for (auto& i : m_interiorVoxels)
+    {
+        MinMaxVoxelRegion(i);
+    }
+
+    BuildVoxelMesh();
+    BuildRaycastMesh(); // build a raycast mesh of the voxel mesh
+    ComputeConvexHull();
+}
+
+VoxelHull::VoxelHull(Volume& voxels,
+                     const IVHACD::Parameters& params,
+                     VHACDCallbacks* callbacks)
+    : m_voxels(&voxels)
+    , m_voxelScale(m_voxels->GetScale())
+    , m_voxelScaleHalf(m_voxelScale * double(0.5))
+    , m_voxelBounds(m_voxels->GetBounds())
+    , m_voxelAdjust(m_voxelBounds.GetMin() - m_voxelScaleHalf)
+    , m_index(++m_voxelHullCount)
+    // Here we get a copy of all voxels which lie on the surface mesh
+    , m_surfaceVoxels(m_voxels->GetSurfaceVoxels())
+    // Now we get a copy of all voxels which are considered part of the 'interior' of the source mesh
+    , m_interiorVoxels(m_voxels->GetInteriorVoxels())
+    , m_2(m_voxels->GetDimensions() - 1)
+    , m_params(params)
+    , m_callbacks(callbacks)
+{
+    BuildVoxelMesh();
+    BuildRaycastMesh(); // build a raycast mesh of the voxel mesh
+    ComputeConvexHull();
+}
+
+void VoxelHull::MinMaxVoxelRegion(const Voxel& v)
+{
+    VHACD::Vector3<uint32_t> x = v.GetVoxel();
+    m_1 = m_1.CWiseMin(x);
+    m_2 = m_2.CWiseMax(x);
+}
+
+void VoxelHull::BuildRaycastMesh()
+{
+    // Create a raycast mesh representation of the voxelized surface mesh
+    if ( !m_indices.empty() )
+    {
+        m_AABBTree = AABBTree(m_vertices,
+                              m_indices);
+    }
+}
+
+void VoxelHull::ComputeConvexHull()
+{
+    if ( !m_vertices.empty() )
+    {
+        // we compute the convex hull as follows...
+        VHACD::QuickHull qh;
+        uint32_t tcount = qh.ComputeConvexHull(m_vertices,
+                                               uint32_t(m_vertices.size()));
+        if ( tcount )
+        {
+            m_convexHull = std::unique_ptr<IVHACD::ConvexHull>(new IVHACD::ConvexHull);
+
+            m_convexHull->m_points = qh.GetVertices();
+            m_convexHull->m_triangles = qh.GetIndices();
+
+            VHACD::ComputeCentroid(m_convexHull->m_points,
+                                   m_convexHull->m_triangles,
+                                   m_convexHull->m_center);
+            m_convexHull->m_volume = VHACD::ComputeMeshVolume(m_convexHull->m_points,
+                                                              m_convexHull->m_triangles);
+        }
+    }
+    if ( m_convexHull )
+    {
+        m_hullVolume = m_convexHull->m_volume;
+    }
+    // This is the volume of a single voxel
+    double singleVoxelVolume = m_voxelScale * m_voxelScale * m_voxelScale;
+    size_t voxelCount = m_interiorVoxels.size() + m_newSurfaceVoxels.size() + m_surfaceVoxels.size();
+    m_voxelVolume = singleVoxelVolume * double(voxelCount);
+    double diff = fabs(m_hullVolume - m_voxelVolume);
+    m_volumeError = (diff * 100) / m_voxelVolume;
+}
+
+bool VoxelHull::IsComplete()
+{
+    bool ret = false;
+    if ( m_convexHull == nullptr )
+    {
+        ret = true;
+    }
+    else if ( m_volumeError < m_params.m_minimumVolumePercentErrorAllowed )
+    {
+        ret = true;
+    }
+    else if ( m_depth > m_params.m_maxRecursionDepth )
+    {
+        ret = true;
+    }
+    else
+    {
+        // We compute the voxel width on all 3 axes and see if they are below the min threshold size
+        VHACD::Vector3<uint32_t> d = m_2 - m_1;
+        if ( d.GetX() <= m_params.m_minEdgeLength &&
+             d.GetY() <= m_params.m_minEdgeLength &&
+             d.GetZ() <= m_params.m_minEdgeLength )
+        {
+            ret = true;
+        }
+    }
+    return ret;
+}
+
+VHACD::Vect3 VoxelHull::GetPoint(const int32_t x,
+                                 const int32_t y,
+                                 const int32_t z,
+                                 const double scale,
+                                 const VHACD::Vect3& bmin) const
+{
+    return VHACD::Vect3(x * scale + bmin.GetX(),
+                        y * scale + bmin.GetY(),
+                        z * scale + bmin.GetZ());
+}
+
+uint32_t VoxelHull::GetVertexIndex(const VHACD::Vector3<uint32_t>& p)
+{
+    uint32_t ret = 0;
+    uint32_t address = (p.GetX() << 20) | (p.GetY() << 10) | p.GetZ();
+    auto found = m_voxelIndexMap.find(address);
+    if ( found != m_voxelIndexMap.end() )
+    {
+        ret = found->second;
+    }
+    else
+    {
+        VHACD::Vect3 vertex = GetPoint(p.GetX(),
+                                       p.GetY(),
+                                       p.GetZ(),
+                                       m_voxelScale,
+                                       m_voxelAdjust);
+        ret = uint32_t(m_voxelIndexMap.size());
+        m_voxelIndexMap[address] = ret;
+        m_vertices.emplace_back(vertex);
+    }
+    return ret;
+}
+
+void VoxelHull::BuildVoxelMesh()
+{
+    // When we build the triangle mesh we do *not* need the interior voxels, only the ones
+    // which lie upon the logical surface of the mesh.
+    // Each time we perform a plane split, voxels which are along the splitting plane become
+    // 'new surface voxels'.
+
+    for (auto& i : m_surfaceVoxels)
+    {
+        AddVoxelBox(i);
+    }
+    for (auto& i : m_newSurfaceVoxels)
+    {
+        AddVoxelBox(i);
+    }
+}
+
+void VoxelHull::AddVoxelBox(const Voxel &v)
+{
+    // The voxel position of the upper left corner of the box
+    VHACD::Vector3<uint32_t> bmin(v.GetX(),
+                                  v.GetY(),
+                                  v.GetZ());
+    // The voxel position of the lower right corner of the box
+    VHACD::Vector3<uint32_t> bmax(bmin.GetX() + 1,
+                                  bmin.GetY() + 1,
+                                  bmin.GetZ() + 1);
+
+    // Build the set of 8 voxel positions representing
+    // the coordinates of the box
+    std::array<VHACD::Vector3<uint32_t>, 8> box{{
+        { bmin.GetX(), bmin.GetY(), bmin.GetZ() },
+        { bmax.GetX(), bmin.GetY(), bmin.GetZ() },
+        { bmax.GetX(), bmax.GetY(), bmin.GetZ() },
+        { bmin.GetX(), bmax.GetY(), bmin.GetZ() },
+        { bmin.GetX(), bmin.GetY(), bmax.GetZ() },
+        { bmax.GetX(), bmin.GetY(), bmax.GetZ() },
+        { bmax.GetX(), bmax.GetY(), bmax.GetZ() },
+        { bmin.GetX(), bmax.GetY(), bmax.GetZ() }
+    }};
+
+    // Now add the 12 triangles comprising the 3d box
+    AddTri(box, 2, 1, 0);
+    AddTri(box, 3, 2, 0);
+
+    AddTri(box, 7, 2, 3);
+    AddTri(box, 7, 6, 2);
+
+    AddTri(box, 5, 1, 2);
+    AddTri(box, 5, 2, 6);
+
+    AddTri(box, 5, 4, 1);
+    AddTri(box, 4, 0, 1);
+
+    AddTri(box, 4, 6, 7);
+    AddTri(box, 4, 5, 6);
+
+    AddTri(box, 4, 7, 0);
+    AddTri(box, 7, 3, 0);
+}
+
+void VoxelHull::AddTri(const std::array<VHACD::Vector3<uint32_t>, 8>& box,
+                       uint32_t i1,
+                       uint32_t i2,
+                       uint32_t i3)
+{
+    AddTriangle(box[i1], box[i2], box[i3]);
+}
+
+void VoxelHull::AddTriangle(const VHACD::Vector3<uint32_t>& p1,
+                            const VHACD::Vector3<uint32_t>& p2,
+                            const VHACD::Vector3<uint32_t>& p3)
+{
+    uint32_t i1 = GetVertexIndex(p1);
+    uint32_t i2 = GetVertexIndex(p2);
+    uint32_t i3 = GetVertexIndex(p3);
+
+    m_indices.emplace_back(i1, i2, i3);
+}
+
+SplitAxis VoxelHull::ComputeSplitPlane(uint32_t& location)
+{
+    SplitAxis ret = SplitAxis::X_AXIS_NEGATIVE;
+
+    VHACD::Vector3<uint32_t> d = m_2 - m_1;
+
+    if ( d.GetX() >= d.GetY() && d.GetX() >= d.GetZ() )
+    {
+        ret = SplitAxis::X_AXIS_NEGATIVE;
+        location = (m_2.GetX() + 1 + m_1.GetX()) / 2;
+        uint32_t edgeLoc;
+        if ( m_params.m_findBestPlane && FindConcavityX(edgeLoc) )
+        {
+            location = edgeLoc;
+        }
+    }
+    else if ( d.GetY() >= d.GetX() && d.GetY() >= d.GetZ() )
+    {
+        ret = SplitAxis::Y_AXIS_NEGATIVE;
+        location = (m_2.GetY() + 1 + m_1.GetY()) / 2;
+        uint32_t edgeLoc;
+        if ( m_params.m_findBestPlane && FindConcavityY(edgeLoc) )
+        {
+            location = edgeLoc;
+        }
+    }
+    else
+    {
+        ret = SplitAxis::Z_AXIS_NEGATIVE;
+        location = (m_2.GetZ() + 1 + m_1.GetZ()) / 2;
+        uint32_t edgeLoc;
+        if ( m_params.m_findBestPlane && FindConcavityZ(edgeLoc) )
+        {
+            location = edgeLoc;
+        }
+    }
+
+    return ret;
+}
+
+VHACD::Vect3 VoxelHull::GetPosition(const VHACD::Vector3<int32_t>& ip) const
+{
+    return GetPoint(ip.GetX(),
+                    ip.GetY(),
+                    ip.GetZ(),
+                    m_voxelScale,
+                    m_voxelAdjust);
+}
+
+double VoxelHull::Raycast(const VHACD::Vector3<int32_t>& p1,
+                          const VHACD::Vector3<int32_t>& p2) const
+{
+    double ret;
+    VHACD::Vect3 from = GetPosition(p1);
+    VHACD::Vect3 to = GetPosition(p2);
+
+    double outT;
+    double faceSign;
+    VHACD::Vect3 hitLocation;
+    if (m_AABBTree.TraceRay(from, to, outT, faceSign, hitLocation))
+    {
+        ret = (from - hitLocation).GetNorm();
+    }
+    else
+    {
+        ret = 0; // if it doesn't hit anything, just assign it to zero.
+    }
+
+    return ret;
+}
+
+bool VoxelHull::FindConcavity(uint32_t idx,
+                              uint32_t& splitLoc)
+{
+    bool ret = false;
+
+    int32_t d = (m_2[idx] - m_1[idx]) + 1; // The length of the getX axis in voxel space
+
+    uint32_t idx1;
+    uint32_t idx2;
+    uint32_t idx3;
+    switch (idx)
+    {
+        case 0: // X
+            idx1 = 0;
+            idx2 = 1;
+            idx3 = 2;
+            break;
+        case 1: // Y
+            idx1 = 1;
+            idx2 = 0;
+            idx3 = 2;
+            break;
+        case 2:
+            idx1 = 2;
+            idx2 = 1;
+            idx3 = 0;
+            break;
+        default:
+            /*
+                * To silence uninitialized variable warnings
+                */
+            idx1 = 0;
+            idx2 = 0;
+            idx3 = 0;
+            assert(0 && "findConcavity::idx must be 0, 1, or 2");
+            break;
+    }
+
+    // We will compute the edge error on the XY plane and the XZ plane
+    // searching for the greatest location of concavity
+    std::vector<double> edgeError1 = std::vector<double>(d);
+    std::vector<double> edgeError2 = std::vector<double>(d);
+
+    // Counter of number of voxel samples on the XY plane we have accumulated
+    uint32_t index1 = 0;
+
+    // Compute Edge Error on the XY plane
+    for (uint32_t i0 = m_1[idx1]; i0 <= m_2[idx1]; i0++)
+    {
+        double errorTotal = 0;
+        // We now perform a raycast from the sides inward on the XY plane to
+        // determine the total error (distance of the surface from the sides)
+        // along this getX position.
+        for (uint32_t i1 = m_1[idx2]; i1 <= m_2[idx2]; i1++)
+        {
+            VHACD::Vector3<int32_t> p1;
+            VHACD::Vector3<int32_t> p2;
+            switch (idx)
+            {
+                case 0:
+                {
+                    p1 = VHACD::Vector3<int32_t>(i0, i1, m_1.GetZ() - 2);
+                    p2 = VHACD::Vector3<int32_t>(i0, i1, m_2.GetZ() + 2);
+                    break;
+                }
+                case 1:
+                {
+                    p1 = VHACD::Vector3<int32_t>(i1, i0, m_1.GetZ() - 2);
+                    p2 = VHACD::Vector3<int32_t>(i1, i0, m_2.GetZ() + 2);
+                    break;
+                }
+                case 2:
+                {
+                    p1 = VHACD::Vector3<int32_t>(m_1.GetX() - 2, i1, i0);
+                    p2 = VHACD::Vector3<int32_t>(m_2.GetX() + 2, i1, i0);
+                    break;
+                }
+            }
+
+            double e1 = Raycast(p1, p2);
+            double e2 = Raycast(p2, p1);
+
+            errorTotal = errorTotal + e1 + e2;
+        }
+        // The total amount of edge error along this voxel location
+        edgeError1[index1] = errorTotal;
+        index1++;
+    }
+
+    // Compute edge error along the XZ plane
+    uint32_t index2 = 0;
+
+    for (uint32_t i0 = m_1[idx1]; i0 <= m_2[idx1]; i0++)
+    {
+        double errorTotal = 0;
+
+        for (uint32_t i1 = m_1[idx3]; i1 <= m_2[idx3]; i1++)
+        {
+            VHACD::Vector3<int32_t> p1;
+            VHACD::Vector3<int32_t> p2;
+            switch (idx)
+            {
+                case 0:
+                {
+                    p1 = VHACD::Vector3<int32_t>(i0, m_1.GetY() - 2, i1);
+                    p2 = VHACD::Vector3<int32_t>(i0, m_2.GetY() + 2, i1);
+                    break;
+                }
+                case 1:
+                {
+                    p1 = VHACD::Vector3<int32_t>(m_1.GetX() - 2, i0, i1);
+                    p2 = VHACD::Vector3<int32_t>(m_2.GetX() + 2, i0, i1);
+                    break;
+                }
+                case 2:
+                {
+                    p1 = VHACD::Vector3<int32_t>(i1, m_1.GetY() - 2, i0);
+                    p2 = VHACD::Vector3<int32_t>(i1, m_2.GetY() + 2, i0);
+                    break;
+                }
+            }
+
+            double e1 = Raycast(p1, p2); // raycast from one side to the interior
+            double e2 = Raycast(p2, p1); // raycast from the other side to the interior
+
+            errorTotal = errorTotal + e1 + e2;
+        }
+        edgeError2[index2] = errorTotal;
+        index2++;
+    }
+
+
+    // we now compute the first derivative to find the greatest spot of concavity on the XY plane
+    double maxDiff = 0;
+    uint32_t maxC = 0;
+    for (uint32_t x = 1; x < index1; x++)
+    {
+        if ( edgeError1[x] > 0 &&  edgeError1[x - 1] > 0 )
+        {
+            double diff = abs(edgeError1[x] - edgeError1[x - 1]);
+            if ( diff > maxDiff )
+            {
+                maxDiff = diff;
+                maxC = x-1;
+            }
+        }
+    }
+
+    // Now see if there is a greater concavity on the XZ plane
+    for (uint32_t x = 1; x < index2; x++)
+    {
+        if ( edgeError2[x] > 0 && edgeError2[x - 1] > 0 )
+        {
+            double diff = abs(edgeError2[x] - edgeError2[x - 1]);
+            if ( diff > maxDiff )
+            {
+                maxDiff = diff;
+                maxC = x - 1;
+            }
+        }
+    }
+
+    splitLoc = maxC + m_1[idx1];
+
+    // we do not allow an edge split if it is too close to the ends
+    if (    splitLoc > (m_1[idx1] + 4)
+         && splitLoc < (m_2[idx1] - 4) )
+    {
+        ret = true;
+    }
+
+    return ret;
+}
+
+// Finding the greatest area of concavity..
+bool VoxelHull::FindConcavityX(uint32_t& splitLoc)
+{
+    return FindConcavity(0, splitLoc);
+}
+
+// Finding the greatest area of concavity..
+bool VoxelHull::FindConcavityY(uint32_t& splitLoc)
+{
+    return FindConcavity(1, splitLoc);
+}
+
+// Finding the greatest area of concavity..
+bool VoxelHull::FindConcavityZ(uint32_t &splitLoc)
+{
+    return FindConcavity(2, splitLoc);
+}
+
+void VoxelHull::PerformPlaneSplit()
+{
+    if ( IsComplete() )
+    {
+    }
+    else
+    {
+        uint32_t splitLoc;
+        SplitAxis axis = ComputeSplitPlane(splitLoc);
+        switch ( axis )
+        {
+            case SplitAxis::X_AXIS_NEGATIVE:
+            case SplitAxis::X_AXIS_POSITIVE:
+                // Split on the getX axis at this split location
+                m_hullA = std::unique_ptr<VoxelHull>(new VoxelHull(*this, SplitAxis::X_AXIS_NEGATIVE, splitLoc));
+                m_hullB = std::unique_ptr<VoxelHull>(new VoxelHull(*this, SplitAxis::X_AXIS_POSITIVE, splitLoc));
+                break;
+            case SplitAxis::Y_AXIS_NEGATIVE:
+            case SplitAxis::Y_AXIS_POSITIVE:
+                // Split on the 1 axis at this split location
+                m_hullA = std::unique_ptr<VoxelHull>(new VoxelHull(*this, SplitAxis::Y_AXIS_NEGATIVE, splitLoc));
+                m_hullB = std::unique_ptr<VoxelHull>(new VoxelHull(*this, SplitAxis::Y_AXIS_POSITIVE, splitLoc));
+                break;
+            case SplitAxis::Z_AXIS_NEGATIVE:
+            case SplitAxis::Z_AXIS_POSITIVE:
+                // Split on the getZ axis at this split location
+                m_hullA = std::unique_ptr<VoxelHull>(new VoxelHull(*this, SplitAxis::Z_AXIS_NEGATIVE, splitLoc));
+                m_hullB = std::unique_ptr<VoxelHull>(new VoxelHull(*this, SplitAxis::Z_AXIS_POSITIVE, splitLoc));
+                break;
+        }
+    }
+}
+
+void VoxelHull::SaveVoxelMesh(const SimpleMesh &inputMesh,
+                              bool saveVoxelMesh,
+                              bool saveSourceMesh)
+{
+    char scratch[512];
+    snprintf(scratch,
+             sizeof(scratch),
+             "voxel-mesh-%03d.obj",
+             m_index);
+    FILE *fph = fopen(scratch,
+                      "wb");
+    if ( fph )
+    {
+        uint32_t baseIndex = 1;
+        if ( saveVoxelMesh )
+        {
+            WriteOBJ(fph,
+                     m_vertices,
+                     m_indices,
+                     baseIndex);
+            baseIndex += uint32_t(m_vertices.size());
+        }
+        if ( saveSourceMesh )
+        {
+            WriteOBJ(fph,
+                     inputMesh.m_vertices,
+                     inputMesh.m_indices,
+                     baseIndex);
+        }
+        fclose(fph);
+    }
+}
+
+void VoxelHull::SaveOBJ(const char* fname,
+                        const VoxelHull* h)
+{
+    FILE *fph = fopen(fname,"wb");
+    if ( fph )
+    {
+        uint32_t baseIndex = 1;
+        WriteOBJ(fph,
+                 m_vertices,
+                 m_indices,
+                 baseIndex);
+
+        baseIndex += uint32_t(m_vertices.size());
+
+        WriteOBJ(fph,
+                 h->m_vertices,
+                 h->m_indices,
+                 baseIndex);
+        fclose(fph);
+    }
+}
+
+void VoxelHull::SaveOBJ(const char *fname)
+{
+    FILE *fph = fopen(fname, "wb");
+    if ( fph )
+    {
+        printf("Saving '%s' with %d vertices and %d triangles\n",
+                fname,
+                uint32_t(m_vertices.size()),
+                uint32_t(m_indices.size()));
+        WriteOBJ(fph,
+                 m_vertices,
+                 m_indices,
+                 1);
+        fclose(fph);
+    }
+}
+
+void VoxelHull::WriteOBJ(FILE* fph,
+                         const std::vector<VHACD::Vertex>& vertices,
+                         const std::vector<VHACD::Triangle>& indices,
+                         uint32_t baseIndex)
+{
+    if (!fph)
+    {
+        return;
+    }
+
+    for (size_t i = 0; i < vertices.size(); ++i)
+    {
+        const VHACD::Vertex& v = vertices[i];
+        fprintf(fph, "v %0.9f %0.9f %0.9f\n",
+                v.mX,
+                v.mY,
+                v.mZ);
+    }
+
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        const VHACD::Triangle& t = indices[i];
+        fprintf(fph, "f %d %d %d\n",
+                t.mI0 + baseIndex,
+                t.mI1 + baseIndex,
+                t.mI2 + baseIndex);
+    }
+}
+
+class VHACDImpl;
+
+// This class represents a single task to compute the volume error
+// of two convex hulls combined
+class CostTask
+{
+public:
+    VHACDImpl*          m_this{ nullptr };
+    IVHACD::ConvexHull* m_hullA{ nullptr };
+    IVHACD::ConvexHull* m_hullB{ nullptr };
+    double              m_concavity{ 0 }; // concavity of the two combined
+    std::future<void>   m_future;
+};
+
+class HullPair
+{
+public:
+    HullPair() = default;
+    HullPair(uint32_t hullA,
+             uint32_t hullB,
+             double concavity);
+
+    bool operator<(const HullPair &h) const;
+
+    uint32_t    m_hullA{ 0 };
+    uint32_t    m_hullB{ 0 };
+    double      m_concavity{ 0 };
+};
+
+HullPair::HullPair(uint32_t hullA,
+                   uint32_t hullB,
+                   double concavity)
+    : m_hullA(hullA)
+    , m_hullB(hullB)
+    , m_concavity(concavity)
+{
+}
+
+bool HullPair::operator<(const HullPair &h) const
+{
+    return m_concavity > h.m_concavity ? true : false;
+}
+
+// void jobCallback(void* userPtr);
+
+class VHACDImpl : public IVHACD, public VHACDCallbacks
+{
+    // Don't consider more than 100,000 convex hulls.
+    static constexpr uint32_t MaxConvexHullFragments{ 100000 };
+public:
+    VHACDImpl() = default;
+
+    /*
+     * Overrides VHACD::IVHACD
+     */
+    ~VHACDImpl() override
+    {
+        Clean();
+    }
+
+    void Cancel() override final;
+
+    bool Compute(const float* const points,
+                 const uint32_t countPoints,
+                 const uint32_t* const triangles,
+                 const uint32_t countTriangles,
+                 const Parameters& params) override final;
+
+    bool Compute(const double* const points,
+                 const uint32_t countPoints,
+                 const uint32_t* const triangles,
+                 const uint32_t countTriangles,
+                 const Parameters& params) override final;
+
+    uint32_t GetNConvexHulls() const override final;
+
+    bool GetConvexHull(const uint32_t index,
+                       ConvexHull& ch) const override final;
+
+    void Clean() override final;  // release internally allocated memory
+
+    void Release() override final;
+
+    // Will compute the center of mass of the convex hull decomposition results and return it
+    // in 'centerOfMass'.  Returns false if the center of mass could not be computed.
+    bool ComputeCenterOfMass(double centerOfMass[3]) const override final;
+
+    // In synchronous mode (non-multi-threaded) the state is always 'ready'
+    // In asynchronous mode, this returns true if the background thread is not still actively computing
+    // a new solution.  In an asynchronous config the 'IsReady' call will report any update or log
+    // messages in the caller's current thread.
+    bool IsReady(void) const override final;
+
+    /**
+    * At the request of LegionFu : [email protected]
+    * This method will return which convex hull is closest to the source position.
+    * You can use this method to figure out, for example, which vertices in the original
+    * source mesh are best associated with which convex hull.
+    * 
+    * @param pos : The input 3d position to test against
+    * 
+    * @return : Returns which convex hull this position is closest to.
+    */
+    uint32_t findNearestConvexHull(const double pos[3],
+                                   double& distanceToHull) override final;
+
+// private:
+    bool Compute(const std::vector<VHACD::Vertex>& points,
+                 const std::vector<VHACD::Triangle>& triangles,
+                 const Parameters& params);
+
+    // Take the source position, normalize it, and then convert it into an index position
+    uint32_t GetIndex(VHACD::VertexIndex& vi,
+                      const VHACD::Vertex& p);
+
+    // This copies the input mesh while scaling the input positions
+    // to fit into a normalized unit cube. It also re-indexes all of the
+    // vertex positions in case they weren't clean coming in. 
+    void CopyInputMesh(const std::vector<VHACD::Vertex>& points,
+                       const std::vector<VHACD::Triangle>& triangles);
+
+    void ScaleOutputConvexHull(ConvexHull &ch);
+
+    void AddCostToPriorityQueue(CostTask& task);
+
+    void ReleaseConvexHull(ConvexHull* ch);
+
+    void PerformConvexDecomposition();
+
+    double ComputeConvexHullVolume(const ConvexHull& sm);
+
+    double ComputeVolume4(const VHACD::Vect3& a,
+                          const VHACD::Vect3& b,
+                          const VHACD::Vect3& c,
+                          const VHACD::Vect3& d);
+
+    double ComputeConcavity(double volumeSeparate,
+                            double volumeCombined,
+                            double volumeMesh);
+
+    // See if we can compute the cost without having to actually merge convex hulls.
+    // If the axis aligned bounding boxes (slightly inflated) of the two convex hulls
+    // do not intersect, then we don't need to actually compute the merged convex hull
+    // volume.
+    bool DoFastCost(CostTask& mt);
+
+    void PerformMergeCostTask(CostTask& mt);
+
+    ConvexHull* ComputeReducedConvexHull(const ConvexHull& ch,
+                                         uint32_t maxVerts,
+                                         bool projectHullVertices);
+
+    // Take the points in convex hull A and the points in convex hull B and generate
+    // a new convex hull on the combined set of points.
+    // Once completed, we create a SimpleMesh instance to hold the triangle mesh
+    // and we compute an inflated AABB for it.
+    ConvexHull* ComputeCombinedConvexHull(const ConvexHull& sm1,
+                                          const ConvexHull& sm2);
+
+
+    ConvexHull* GetHull(uint32_t index);
+
+    bool RemoveHull(uint32_t index);
+
+    ConvexHull* CopyConvexHull(const ConvexHull& source);
+
+    const char* GetStageName(Stages stage) const;
+
+    /*
+     * Overrides VHACD::VHACDCallbacks
+     */
+    void ProgressUpdate(Stages stage,
+                        double stageProgress,
+                        const char* operation) override final;
+
+    bool IsCanceled() const override final;
+
+    std::atomic<bool>                                   m_canceled{ false };
+    Parameters                                          m_params; // Convex decomposition parameters
+
+    std::vector<IVHACD::ConvexHull*>                    m_convexHulls; // Finalized convex hulls
+    std::vector<std::unique_ptr<VoxelHull>>             m_voxelHulls; // completed voxel hulls
+    std::vector<std::unique_ptr<VoxelHull>>             m_pendingHulls;
+
+    std::vector<std::unique_ptr<AABBTree>>              m_trees;
+    VHACD::AABBTree                                     m_AABBTree;
+    VHACD::Volume                                       m_voxelize;
+    VHACD::Vect3                                        m_center;
+    double                                              m_scale{ double(1.0) };
+    double                                              m_recipScale{ double(1.0) };
+    SimpleMesh                                          m_inputMesh; // re-indexed and normalized input mesh
+    std::vector<VHACD::Vertex>                          m_vertices;
+    std::vector<VHACD::Triangle>                        m_indices;
+
+    double                                              m_overallHullVolume{ double(0.0) };
+    double                                              m_voxelScale{ double(0.0) };
+    double                                              m_voxelHalfScale{ double(0.0) };
+    VHACD::Vect3                                        m_voxelBmin;
+    VHACD::Vect3                                        m_voxelBmax;
+    uint32_t                                            m_meshId{ 0 };
+    std::priority_queue<HullPair>                       m_hullPairQueue;
+#if !VHACD_DISABLE_THREADING
+    std::unique_ptr<ThreadPool>                         m_threadPool{ nullptr };
+#endif
+    std::unordered_map<uint32_t, IVHACD::ConvexHull*>   m_hulls;
+
+    double                                              m_overallProgress{ double(0.0) };
+    double                                              m_stageProgress{ double(0.0) };
+    double                                              m_operationProgress{ double(0.0) };
+};
+
+void VHACDImpl::Cancel()
+{
+    m_canceled = true;
+}
+
+bool VHACDImpl::Compute(const float* const points,
+                        const uint32_t countPoints,
+                        const uint32_t* const triangles,
+                        const uint32_t countTriangles,
+                        const Parameters& params)
+{
+    std::vector<VHACD::Vertex> v;
+    v.reserve(countPoints);
+    for (uint32_t i = 0; i < countPoints; ++i)
+    {
+        v.emplace_back(points[i * 3 + 0],
+                       points[i * 3 + 1],
+                       points[i * 3 + 2]);
+    }
+
+    std::vector<VHACD::Triangle> t;
+    t.reserve(countTriangles);
+    for (uint32_t i = 0; i < countTriangles; ++i)
+    {
+        t.emplace_back(triangles[i * 3 + 0],
+                       triangles[i * 3 + 1],
+                       triangles[i * 3 + 2]);
+    }
+
+    return Compute(v, t, params);
+}
+
+bool VHACDImpl::Compute(const double* const points,
+                        const uint32_t countPoints,
+                        const uint32_t* const triangles,
+                        const uint32_t countTriangles,
+                        const Parameters& params)
+{
+    std::vector<VHACD::Vertex> v;
+    v.reserve(countPoints);
+    for (uint32_t i = 0; i < countPoints; ++i)
+    {
+        v.emplace_back(points[i * 3 + 0],
+                       points[i * 3 + 1],
+                       points[i * 3 + 2]);
+    }
+
+    std::vector<VHACD::Triangle> t;
+    t.reserve(countTriangles);
+    for (uint32_t i = 0; i < countTriangles; ++i)
+    {
+        t.emplace_back(triangles[i * 3 + 0],
+                       triangles[i * 3 + 1],
+                       triangles[i * 3 + 2]);
+    }
+
+    return Compute(v, t, params);
+}
+
+uint32_t VHACDImpl::GetNConvexHulls() const
+{
+    return uint32_t(m_convexHulls.size());
+}
+
+bool VHACDImpl::GetConvexHull(const uint32_t index,
+                              ConvexHull& ch) const
+{
+    bool ret = false;
+
+    if ( index < uint32_t(m_convexHulls.size() ))
+    {
+        ch = *m_convexHulls[index];
+        ret = true;
+    }
+
+    return ret;
+}
+
+void VHACDImpl::Clean()
+{
+#if !VHACD_DISABLE_THREADING
+    m_threadPool = nullptr;
+#endif
+
+    m_trees.clear();
+
+    for (auto& ch : m_convexHulls)
+    {
+        ReleaseConvexHull(ch);
+    }
+    m_convexHulls.clear();
+
+    for (auto& ch : m_hulls)
+    {
+        ReleaseConvexHull(ch.second);
+    }
+    m_hulls.clear();
+
+    m_voxelHulls.clear();
+
+    m_pendingHulls.clear();
+
+    m_vertices.clear();
+    m_indices.clear();
+}
+
+void VHACDImpl::Release()
+{
+    delete this;
+}
+
+bool VHACDImpl::ComputeCenterOfMass(double centerOfMass[3]) const
+{
+    bool ret = false;
+
+    return ret;
+}
+
+bool VHACDImpl::IsReady() const
+{
+    return true;
+}
+
+uint32_t VHACDImpl::findNearestConvexHull(const double pos[3],
+                                          double& distanceToHull)
+{
+    uint32_t ret = 0; // The default return code is zero
+
+    uint32_t hullCount = GetNConvexHulls();
+    distanceToHull = 0;
+    // First, make sure that we have valid and completed results
+    if ( hullCount )
+    {
+        // See if we already have AABB trees created for each convex hull
+        if ( m_trees.empty() )
+        {
+            // For each convex hull, we generate an AABB tree for fast closest point queries
+            for (uint32_t i = 0; i < hullCount; i++)
+            {
+                VHACD::IVHACD::ConvexHull ch;
+                GetConvexHull(i,ch);
+                // Pass the triangle mesh to create an AABB tree instance based on it.
+                m_trees.emplace_back(new AABBTree(ch.m_points,
+                                                  ch.m_triangles));
+            }
+        }
+        // We now compute the closest point to each convex hull and save the nearest one
+        double closest = 1e99;
+        for (uint32_t i = 0; i < hullCount; i++)
+        {
+            std::unique_ptr<AABBTree>& t = m_trees[i];
+            if ( t )
+            {
+                VHACD::Vect3 closestPoint;
+                VHACD::Vect3 position(pos[0],
+                                      pos[1],
+                                      pos[2]);
+                if ( t->GetClosestPointWithinDistance(position, 1e99, closestPoint))
+                {
+                    VHACD::Vect3 d = position - closestPoint;
+                    double distanceSquared = d.GetNormSquared();
+                    if ( distanceSquared < closest )
+                    {
+                        closest = distanceSquared;
+                        ret = i;
+                    }
+                }
+            }
+        }
+        distanceToHull = sqrt(closest); // compute the distance to the nearest convex hull
+    }
+
+    return ret;
+}
+
+bool VHACDImpl::Compute(const std::vector<VHACD::Vertex>& points,
+                        const std::vector<VHACD::Triangle>& triangles,
+                        const Parameters& params)
+{
+    bool ret = false;
+
+    m_params = params;
+    m_canceled = false;
+
+    Clean(); // release any previous results
+#if !VHACD_DISABLE_THREADING
+    if ( m_params.m_asyncACD )
+    {
+        m_threadPool = std::unique_ptr<ThreadPool>(new ThreadPool(8));
+    }
+#endif
+    CopyInputMesh(points,
+                  triangles);
+    if ( !m_canceled )
+    {
+        // We now recursively perform convex decomposition until complete
+        PerformConvexDecomposition();
+    }
+
+    if ( m_canceled )
+    {
+        Clean();
+        ret = false;
+        if ( m_params.m_logger )
+        {
+            m_params.m_logger->Log("VHACD operation canceled before it was complete.");
+        }
+    }
+    else
+    {
+        ret = true;
+    }
+#if !VHACD_DISABLE_THREADING
+    m_threadPool = nullptr;
+#endif
+    return ret;
+}
+
+uint32_t VHACDImpl::GetIndex(VHACD::VertexIndex& vi,
+                             const VHACD::Vertex& p)
+{
+    VHACD::Vect3 pos = (VHACD::Vect3(p) - m_center) * m_recipScale;
+    bool newPos;
+    uint32_t ret = vi.GetIndex(pos,
+                               newPos);
+    return ret;
+}
+
+void VHACDImpl::CopyInputMesh(const std::vector<VHACD::Vertex>& points,
+                              const std::vector<VHACD::Triangle>& triangles)
+{
+    m_vertices.clear();
+    m_indices.clear();
+    m_indices.reserve(triangles.size());
+
+    // First we must find the bounding box of this input vertices and normalize them into a unit-cube
+    VHACD::Vect3 bmin( FLT_MAX);
+    VHACD::Vect3 bmax(-FLT_MAX);
+    ProgressUpdate(Stages::COMPUTE_BOUNDS_OF_INPUT_MESH,
+                   0,
+                   "ComputingBounds");
+    for (uint32_t i = 0; i < points.size(); i++)
+    {
+        const VHACD::Vertex& p = points[i];
+
+        bmin = bmin.CWiseMin(p);
+        bmax = bmax.CWiseMax(p);
+    }
+    ProgressUpdate(Stages::COMPUTE_BOUNDS_OF_INPUT_MESH,
+                   100,
+                   "ComputingBounds");
+
+    m_center = (bmax + bmin) * double(0.5);
+
+    VHACD::Vect3 scale = bmax - bmin;
+    m_scale = scale.MaxCoeff();
+
+    m_recipScale = m_scale > double(0.0) ? double(1.0) / m_scale : double(0.0);
+
+    {
+        VHACD::VertexIndex vi = VHACD::VertexIndex(double(0.001), false);
+
+        uint32_t dcount = 0;
+
+        for (uint32_t i = 0; i < triangles.size() && !m_canceled; ++i)
+        {
+            const VHACD::Triangle& t = triangles[i];
+            const VHACD::Vertex& p1 = points[t.mI0];
+            const VHACD::Vertex& p2 = points[t.mI1];
+            const VHACD::Vertex& p3 = points[t.mI2];
+
+            uint32_t i1 = GetIndex(vi, p1);
+            uint32_t i2 = GetIndex(vi, p2);
+            uint32_t i3 = GetIndex(vi, p3);
+
+            if ( i1 == i2 || i1 == i3 || i2 == i3 )
+            {
+                dcount++;
+            }
+            else
+            {
+                m_indices.emplace_back(i1, i2, i3);
+            }
+        }
+
+        if ( dcount )
+        {
+            if ( m_params.m_logger )
+            {
+                char scratch[512];
+                snprintf(scratch,
+                         sizeof(scratch),
+                         "Skipped %d degenerate triangles", dcount);
+                m_params.m_logger->Log(scratch);
+            }
+        }
+
+        m_vertices = vi.TakeVertices();
+    }
+
+    // Create the raycast mesh
+    if ( !m_canceled )
+    {
+        ProgressUpdate(Stages::CREATE_RAYCAST_MESH,
+                       0,
+                       "Building RaycastMesh");
+        m_AABBTree = VHACD::AABBTree(m_vertices,
+                                     m_indices);
+        ProgressUpdate(Stages::CREATE_RAYCAST_MESH,
+                       100,
+                       "RaycastMesh completed");
+    }
+    if ( !m_canceled )
+    {
+        ProgressUpdate(Stages::VOXELIZING_INPUT_MESH,
+                        0,
+                        "Voxelizing Input Mesh");
+        m_voxelize = VHACD::Volume();
+        m_voxelize.Voxelize(m_vertices,
+                            m_indices,
+                            m_params.m_resolution,
+                            m_params.m_fillMode,
+                            m_AABBTree);
+        m_voxelScale = m_voxelize.GetScale();
+        m_voxelHalfScale = m_voxelScale * double(0.5);
+        m_voxelBmin = m_voxelize.GetBounds().GetMin();
+        m_voxelBmax = m_voxelize.GetBounds().GetMax();
+        ProgressUpdate(Stages::VOXELIZING_INPUT_MESH,
+                       100,
+                       "Voxelization complete");
+    }
+
+    m_inputMesh.m_vertices = m_vertices;
+    m_inputMesh.m_indices = m_indices;
+    if ( !m_canceled )
+    {
+        ProgressUpdate(Stages::BUILD_INITIAL_CONVEX_HULL,
+                        0,
+                        "Build initial ConvexHull");
+        std::unique_ptr<VoxelHull> vh = std::unique_ptr<VoxelHull>(new VoxelHull(m_voxelize,
+                                                                                 m_params,
+                                                                                 this));
+        if ( vh->m_convexHull )
+        {
+            m_overallHullVolume = vh->m_convexHull->m_volume;
+        }
+        m_pendingHulls.push_back(std::move(vh));
+        ProgressUpdate(Stages::BUILD_INITIAL_CONVEX_HULL,
+                       100,
+                       "Initial ConvexHull complete");
+    }
+}
+
+void VHACDImpl::ScaleOutputConvexHull(ConvexHull& ch)
+{
+    for (uint32_t i = 0; i < ch.m_points.size(); i++)
+    {
+        VHACD::Vect3 p = ch.m_points[i];
+        p = (p * m_scale) + m_center;
+        ch.m_points[i] = p;
+    }
+    ch.m_volume = ComputeConvexHullVolume(ch); // get the combined volume
+    VHACD::BoundsAABB b(ch.m_points);
+    ch.mBmin = b.GetMin();
+    ch.mBmax = b.GetMax();
+    ComputeCentroid(ch.m_points,
+                    ch.m_triangles,
+                    ch.m_center);
+}
+
+void VHACDImpl::AddCostToPriorityQueue(CostTask& task)
+{
+    HullPair hp(task.m_hullA->m_meshId,
+                task.m_hullB->m_meshId,
+                task.m_concavity);
+    m_hullPairQueue.push(hp);
+}
+
+void VHACDImpl::ReleaseConvexHull(ConvexHull* ch)
+{
+    if ( ch )
+    {
+        delete ch;
+    }
+}
+
+void jobCallback(std::unique_ptr<VoxelHull>& userPtr)
+{
+    userPtr->PerformPlaneSplit();
+}
+
+void computeMergeCostTask(CostTask& ptr)
+{
+    ptr.m_this->PerformMergeCostTask(ptr);
+}
+
+void VHACDImpl::PerformConvexDecomposition()
+{
+    {
+        ScopedTime st("Convex Decomposition",
+                      m_params.m_logger);
+        double maxHulls = pow(2, m_params.m_maxRecursionDepth);
+        // We recursively split convex hulls until we can
+        // no longer recurse further.
+        Timer t;
+
+        while ( !m_pendingHulls.empty() && !m_canceled )
+        {
+            size_t count = m_pendingHulls.size() + m_voxelHulls.size();
+            double e = t.PeekElapsedSeconds();
+            if ( e >= double(0.1) )
+            {
+                t.Reset();
+                double stageProgress = (double(count) * double(100.0)) / maxHulls;
+                ProgressUpdate(Stages::PERFORMING_DECOMPOSITION,
+                               stageProgress,
+                               "Performing recursive decomposition of convex hulls");
+            }
+            // First we make a copy of the hulls we are processing
+            std::vector<std::unique_ptr<VoxelHull>> oldList = std::move(m_pendingHulls);
+            // For each hull we want to split, we either
+            // immediately perform the plane split or we post it as
+            // a job to be performed in a background thread
+            std::vector<std::future<void>> futures(oldList.size());
+            uint32_t futureCount = 0;
+            for (auto& i : oldList)
+            {
+                if ( i->IsComplete() || count > MaxConvexHullFragments )
+                {
+                }
+                else
+                {
+#if !VHACD_DISABLE_THREADING
+                    if ( m_threadPool )
+                    {
+                        futures[futureCount] = m_threadPool->enqueue([&i]
+                        {
+                            jobCallback(i);
+                        });
+                        futureCount++;
+                    }
+                    else
+#endif
+                    {
+                        i->PerformPlaneSplit();
+                    }
+                }
+            }
+            // Wait for any outstanding jobs to complete in the background threads
+            if ( futureCount )
+            {
+                for (uint32_t i = 0; i < futureCount; i++)
+                {
+                    futures[i].get();
+                }
+            }
+            // Now, we rebuild the pending convex hulls list by
+            // adding the two children to the output list if
+            // we need to recurse them further
+            for (auto& vh : oldList)
+            {
+                if ( vh->IsComplete() || count > MaxConvexHullFragments )
+                {
+                    if ( vh->m_convexHull )
+                    {
+                        m_voxelHulls.push_back(std::move(vh));
+                    }
+                }
+                else
+                {
+                    if ( vh->m_hullA )
+                    {
+                        m_pendingHulls.push_back(std::move(vh->m_hullA));
+                    }
+                    if ( vh->m_hullB )
+                    {
+                        m_pendingHulls.push_back(std::move(vh->m_hullB));
+                    }
+                }
+            }
+        }
+    }
+
+    if ( !m_canceled )
+    {
+        // Give each convex hull a unique guid
+        m_meshId = 0;
+        m_hulls.clear();
+
+        // Build the convex hull id map
+        std::vector<ConvexHull*> hulls;
+
+        ProgressUpdate(Stages::INITIALIZING_CONVEX_HULLS_FOR_MERGING,
+                       0,
+                       "Initializing ConvexHulls");
+        for (auto& vh : m_voxelHulls)
+        {
+            if ( m_canceled )
+            {
+                break;
+            }
+            ConvexHull* ch = CopyConvexHull(*vh->m_convexHull);
+            m_meshId++;
+            ch->m_meshId = m_meshId;
+            m_hulls[m_meshId] = ch;
+            // Compute the volume of the convex hull
+            ch->m_volume = ComputeConvexHullVolume(*ch);
+            // Compute the AABB of the convex hull
+            VHACD::BoundsAABB b = VHACD::BoundsAABB(ch->m_points).Inflate(double(0.1));
+            ch->mBmin = b.GetMin();
+            ch->mBmax = b.GetMax();
+
+            ComputeCentroid(ch->m_points,
+                            ch->m_triangles,
+                            ch->m_center);
+
+            hulls.push_back(ch);
+        }
+        ProgressUpdate(Stages::INITIALIZING_CONVEX_HULLS_FOR_MERGING,
+                        100,
+                        "ConvexHull initialization complete");
+
+        m_voxelHulls.clear();
+
+        // here we merge convex hulls as needed until the match the
+        // desired maximum hull count.
+        size_t hullCount = hulls.size();
+
+        if ( hullCount > m_params.m_maxConvexHulls && !m_canceled)
+        {
+            size_t costMatrixSize = ((hullCount * hullCount) - hullCount) >> 1;
+            std::vector<CostTask> tasks;
+            tasks.reserve(costMatrixSize);
+
+            ScopedTime st("Computing the Cost Matrix",
+                          m_params.m_logger);
+            // First thing we need to do is compute the cost matrix
+            // This is computed as the volume error of any two convex hulls
+            // combined
+            ProgressUpdate(Stages::COMPUTING_COST_MATRIX,
+                           0,
+                           "Computing Hull Merge Cost Matrix");
+            for (size_t i = 1; i < hullCount && !m_canceled; i++)
+            {
+                ConvexHull* chA = hulls[i];
+
+                for (size_t j = 0; j < i && !m_canceled; j++)
+                {
+                    ConvexHull* chB = hulls[j];
+
+                    CostTask t;
+                    t.m_hullA = chA;
+                    t.m_hullB = chB;
+                    t.m_this = this;
+
+                    if ( DoFastCost(t) )
+                    {
+                    }
+                    else
+                    {
+                        tasks.push_back(std::move(t));
+                        CostTask* task = &tasks.back();
+#if !VHACD_DISABLE_THREADING
+                        if ( m_threadPool )
+                        {
+                            task->m_future = m_threadPool->enqueue([task]
+                            {
+                                computeMergeCostTask(*task);
+                            });
+                        }
+#endif
+                    }
+                }
+            }
+
+            if ( !m_canceled )
+            {
+#if !VHACD_DISABLE_THREADING
+                if ( m_threadPool )
+                {
+                    for (CostTask& task : tasks)
+                    {
+                        task.m_future.get();
+                    }
+
+                    for (CostTask& task : tasks)
+                    {
+                        AddCostToPriorityQueue(task);
+                    }
+                }
+                else
+#endif
+                {
+                    for (CostTask& task : tasks)
+                    {
+                        PerformMergeCostTask(task);
+                        AddCostToPriorityQueue(task);
+                    }
+                }
+                ProgressUpdate(Stages::COMPUTING_COST_MATRIX,
+                               100,
+                               "Finished cost matrix");
+            }
+
+            if ( !m_canceled )
+            {
+                ScopedTime stMerging("Merging Convex Hulls",
+                                     m_params.m_logger);
+                Timer t;
+                // Now that we know the cost to merge each hull, we can begin merging them.
+                bool cancel = false;
+
+                uint32_t maxMergeCount = uint32_t(m_hulls.size()) - m_params.m_maxConvexHulls;
+                uint32_t startCount = uint32_t(m_hulls.size());
+
+                while (    !cancel
+                        && m_hulls.size() > m_params.m_maxConvexHulls
+                        && !m_hullPairQueue.empty()
+                        && !m_canceled)
+                {
+                    double e = t.PeekElapsedSeconds();
+                    if ( e >= double(0.1) )
+                    {
+                        t.Reset();
+                        uint32_t hullsProcessed = startCount - uint32_t(m_hulls.size() );
+                        double stageProgress = double(hullsProcessed * 100) / double(maxMergeCount);
+                        ProgressUpdate(Stages::MERGING_CONVEX_HULLS,
+                                       stageProgress,
+                                       "Merging Convex Hulls");
+                    }
+
+                    HullPair hp = m_hullPairQueue.top();
+                    m_hullPairQueue.pop();
+
+                    // It is entirely possible that the hull pair queue can
+                    // have references to convex hulls that are no longer valid
+                    // because they were previously merged. So we check for this
+                    // and if either hull referenced in this pair no longer
+                    // exists, then we skip it.
+
+                    // Look up this pair of hulls by ID
+                    ConvexHull* ch1 = GetHull(hp.m_hullA);
+                    ConvexHull* ch2 = GetHull(hp.m_hullB);
+
+                    // If both hulls are still valid, then we merge them, delete the old
+                    // two hulls and recompute the cost matrix for the new combined hull
+                    // we have created
+                    if ( ch1 && ch2 )
+                    {
+                        // This is the convex hull which results from combining the
+                        // vertices in the two source hulls
+                        ConvexHull* combinedHull = ComputeCombinedConvexHull(*ch1,
+                                                                                *ch2);
+                        // The two old convex hulls are going to get removed
+                        RemoveHull(hp.m_hullA);
+                        RemoveHull(hp.m_hullB);
+
+                        m_meshId++;
+                        combinedHull->m_meshId = m_meshId;
+                        tasks.clear();
+                        tasks.reserve(m_hulls.size());
+
+                        // Compute the cost between this new merged hull
+                        // and all existing convex hulls and then
+                        // add that to the priority queue
+                        for (auto& i : m_hulls)
+                        {
+                            if ( m_canceled )
+                            {
+                                break;
+                            }
+                            ConvexHull* secondHull = i.second;
+                            CostTask t;
+                            t.m_hullA = combinedHull;
+                            t.m_hullB = secondHull;
+                            t.m_this = this;
+                            if ( DoFastCost(t) )
+                            {
+                            }
+                            else
+                            {
+                                tasks.push_back(std::move(t));
+                            }
+                        }
+                        m_hulls[combinedHull->m_meshId] = combinedHull;
+                        // See how many merge cost tasks were posted
+                        // If there are 8 or more and we are running asynchronously, then do them that way.
+#if !VHACD_DISABLE_THREADING
+                        if ( m_threadPool && tasks.size() >= 2)
+                        {
+                            for (CostTask& task : tasks)
+                            {
+                                task.m_future = m_threadPool->enqueue([&task]
+                                {
+                                    computeMergeCostTask(task);
+                                });
+                            }
+
+                            for (CostTask& task : tasks)
+                            {
+                                task.m_future.get();
+                            }
+                        }
+                        else
+#endif
+                        {
+                            for (CostTask& task : tasks)
+                            {
+                                PerformMergeCostTask(task);
+                            }
+                        }
+
+                        for (CostTask& task : tasks)
+                        {
+                            AddCostToPriorityQueue(task);
+                        }
+                    }
+                }
+                // Ok...once we are done, we copy the results!
+                m_meshId -= 0;
+                ProgressUpdate(Stages::FINALIZING_RESULTS,
+                               0,
+                               "Finalizing results");
+                for (auto& i : m_hulls)
+                {
+                    if ( m_canceled )
+                    {
+                        break;
+                    }
+                    ConvexHull* ch = i.second;
+                    // We now must reduce the convex hull
+                    if ( ch->m_points.size() > m_params.m_maxNumVerticesPerCH || m_params.m_shrinkWrap)
+                    {
+                        ConvexHull* reduce = ComputeReducedConvexHull(*ch,
+                                                                      m_params.m_maxNumVerticesPerCH,
+                                                                      m_params.m_shrinkWrap);
+                        ReleaseConvexHull(ch);
+                        ch = reduce;
+                    }
+                    ScaleOutputConvexHull(*ch);
+                    ch->m_meshId = m_meshId;
+                    m_meshId++;
+                    m_convexHulls.push_back(ch);
+                }
+                m_hulls.clear(); // since the hulls were moved into the output list, we don't need to delete them from this container
+                ProgressUpdate(Stages::FINALIZING_RESULTS,
+                               100,
+                               "Finalized results complete");
+            }
+        }
+        else
+        {
+            ProgressUpdate(Stages::FINALIZING_RESULTS,
+                           0,
+                           "Finalizing results");
+            m_meshId = 0;
+            for (auto& ch : hulls)
+            {
+                // We now must reduce the convex hull
+                if ( ch->m_points.size() > m_params.m_maxNumVerticesPerCH  || m_params.m_shrinkWrap )
+                {
+                    ConvexHull* reduce = ComputeReducedConvexHull(*ch,
+                                                                  m_params.m_maxNumVerticesPerCH,
+                                                                  m_params.m_shrinkWrap);
+                    ReleaseConvexHull(ch);
+                    ch = reduce;
+                }
+                ScaleOutputConvexHull(*ch);
+                ch->m_meshId = m_meshId;
+                m_meshId++;
+                m_convexHulls.push_back(ch);
+            }
+            m_hulls.clear();
+            ProgressUpdate(Stages::FINALIZING_RESULTS,
+                           100,
+                           "Finalized results");
+        }
+    }
+}
+
+double VHACDImpl::ComputeConvexHullVolume(const ConvexHull& sm)
+{
+    double totalVolume = 0;
+    VHACD::Vect3 bary(0, 0, 0);
+    for (uint32_t i = 0; i < sm.m_points.size(); i++)
+    {
+        VHACD::Vect3 p(sm.m_points[i]);
+        bary += p;
+    }
+    bary /= double(sm.m_points.size());
+
+    for (uint32_t i = 0; i < sm.m_triangles.size(); i++)
+    {
+        uint32_t i1 = sm.m_triangles[i].mI0;
+        uint32_t i2 = sm.m_triangles[i].mI1;
+        uint32_t i3 = sm.m_triangles[i].mI2;
+
+        VHACD::Vect3 ver0(sm.m_points[i1]);
+        VHACD::Vect3 ver1(sm.m_points[i2]);
+        VHACD::Vect3 ver2(sm.m_points[i3]);
+
+        totalVolume += ComputeVolume4(ver0,
+                                      ver1,
+                                      ver2,
+                                      bary);
+
+    }
+    totalVolume = totalVolume / double(6.0);
+    return totalVolume;
+}
+
+double VHACDImpl::ComputeVolume4(const VHACD::Vect3& a,
+                                 const VHACD::Vect3& b,
+                                 const VHACD::Vect3& c,
+                                 const VHACD::Vect3& d)
+{
+    VHACD::Vect3 ad = a - d;
+    VHACD::Vect3 bd = b - d;
+    VHACD::Vect3 cd = c - d;
+    VHACD::Vect3 bcd = bd.Cross(cd);
+    double dot = ad.Dot(bcd);
+    return dot;
+}
+
+double VHACDImpl::ComputeConcavity(double volumeSeparate,
+                                   double volumeCombined,
+                                   double volumeMesh)
+{
+    return fabs(volumeSeparate - volumeCombined) / volumeMesh;
+}
+
+bool VHACDImpl::DoFastCost(CostTask& mt)
+{
+    bool ret = false;
+
+    ConvexHull* ch1 = mt.m_hullA;
+    ConvexHull* ch2 = mt.m_hullB;
+
+    VHACD::BoundsAABB ch1b(ch1->mBmin,
+                           ch1->mBmax);
+    VHACD::BoundsAABB ch2b(ch2->mBmin,
+                           ch2->mBmax);
+    if (!ch1b.Intersects(ch2b))
+    {
+        VHACD::BoundsAABB b = ch1b.Union(ch2b);
+
+        double combinedVolume = b.Volume();
+        double concavity = ComputeConcavity(ch1->m_volume + ch2->m_volume,
+                                            combinedVolume,
+                                            m_overallHullVolume);
+        HullPair hp(ch1->m_meshId,
+                    ch2->m_meshId,
+                    concavity);
+        m_hullPairQueue.push(hp);
+        ret = true;
+    }
+    return ret;
+}
+
+void VHACDImpl::PerformMergeCostTask(CostTask& mt)
+{
+    ConvexHull* ch1 = mt.m_hullA;
+    ConvexHull* ch2 = mt.m_hullB;
+
+    double volume1 = ch1->m_volume;
+    double volume2 = ch2->m_volume;
+
+    ConvexHull* combined = ComputeCombinedConvexHull(*ch1,
+                                                     *ch2); // Build the combined convex hull
+    double combinedVolume = ComputeConvexHullVolume(*combined); // get the combined volume
+    mt.m_concavity = ComputeConcavity(volume1 + volume2,
+                                      combinedVolume,
+                                      m_overallHullVolume);
+    ReleaseConvexHull(combined);
+}
+
+IVHACD::ConvexHull* VHACDImpl::ComputeReducedConvexHull(const ConvexHull& ch,
+                                                        uint32_t maxVerts,
+                                                        bool projectHullVertices)
+{
+    SimpleMesh sourceConvexHull;
+
+    sourceConvexHull.m_vertices = ch.m_points;
+    sourceConvexHull.m_indices = ch.m_triangles;
+
+    ShrinkWrap(sourceConvexHull,
+               m_AABBTree,
+               maxVerts,
+               m_voxelScale * 4,
+               projectHullVertices);
+
+    ConvexHull *ret = new ConvexHull;
+
+    ret->m_points = sourceConvexHull.m_vertices;
+    ret->m_triangles = sourceConvexHull.m_indices;
+
+    VHACD::BoundsAABB b = VHACD::BoundsAABB(ret->m_points).Inflate(double(0.1));
+    ret->mBmin = b.GetMin();
+    ret->mBmax = b.GetMax();
+    ComputeCentroid(ret->m_points,
+                    ret->m_triangles,
+                    ret->m_center);
+
+    ret->m_volume = ComputeConvexHullVolume(*ret);
+
+    // Return the convex hull
+    return ret;
+}
+
+IVHACD::ConvexHull* VHACDImpl::ComputeCombinedConvexHull(const ConvexHull& sm1,
+                                                         const ConvexHull& sm2)
+{
+    uint32_t vcount = uint32_t(sm1.m_points.size() + sm2.m_points.size()); // Total vertices from both hulls
+    std::vector<VHACD::Vertex> vertices(vcount);
+    auto it = std::copy(sm1.m_points.begin(),
+                        sm1.m_points.end(),
+                        vertices.begin());
+    std::copy(sm2.m_points.begin(),
+                sm2.m_points.end(),
+                it);
+
+    VHACD::QuickHull qh;
+    qh.ComputeConvexHull(vertices,
+                         vcount);
+
+    ConvexHull* ret = new ConvexHull;
+    ret->m_points = qh.GetVertices();
+    ret->m_triangles = qh.GetIndices();
+
+    ret->m_volume = ComputeConvexHullVolume(*ret);
+
+    VHACD::BoundsAABB b = VHACD::BoundsAABB(qh.GetVertices()).Inflate(double(0.1));
+    ret->mBmin = b.GetMin();
+    ret->mBmax = b.GetMax();
+    ComputeCentroid(ret->m_points,
+                    ret->m_triangles,
+                    ret->m_center);
+
+    // Return the convex hull
+    return ret;
+}
+
+IVHACD::ConvexHull* VHACDImpl::GetHull(uint32_t index)
+{
+    ConvexHull* ret = nullptr;
+
+    auto found = m_hulls.find(index);
+    if ( found != m_hulls.end() )
+    {
+        ret = found->second;
+    }
+
+    return ret;
+}
+
+bool VHACDImpl::RemoveHull(uint32_t index)
+{
+    bool ret = false;
+    auto found = m_hulls.find(index);
+    if ( found != m_hulls.end() )
+    {
+        ret = true;
+        ReleaseConvexHull(found->second);
+        m_hulls.erase(found);
+    }
+    return ret;
+}
+
+IVHACD::ConvexHull* VHACDImpl::CopyConvexHull(const ConvexHull& source)
+{
+    ConvexHull *ch = new ConvexHull;
+    *ch = source;
+
+    return ch;
+}
+
+const char* VHACDImpl::GetStageName(Stages stage) const
+{
+    const char *ret = "unknown";
+    switch ( stage )
+    {
+        case Stages::COMPUTE_BOUNDS_OF_INPUT_MESH:
+            ret = "COMPUTE_BOUNDS_OF_INPUT_MESH";
+            break;
+        case Stages::REINDEXING_INPUT_MESH:
+            ret = "REINDEXING_INPUT_MESH";
+            break;
+        case Stages::CREATE_RAYCAST_MESH:
+            ret = "CREATE_RAYCAST_MESH";
+            break;
+        case Stages::VOXELIZING_INPUT_MESH:
+            ret = "VOXELIZING_INPUT_MESH";
+            break;
+        case Stages::BUILD_INITIAL_CONVEX_HULL:
+            ret = "BUILD_INITIAL_CONVEX_HULL";
+            break;
+        case Stages::PERFORMING_DECOMPOSITION:
+            ret = "PERFORMING_DECOMPOSITION";
+            break;
+        case Stages::INITIALIZING_CONVEX_HULLS_FOR_MERGING:
+            ret = "INITIALIZING_CONVEX_HULLS_FOR_MERGING";
+            break;
+        case Stages::COMPUTING_COST_MATRIX:
+            ret = "COMPUTING_COST_MATRIX";
+            break;
+        case Stages::MERGING_CONVEX_HULLS:
+            ret = "MERGING_CONVEX_HULLS";
+            break;
+        case Stages::FINALIZING_RESULTS:
+            ret = "FINALIZING_RESULTS";
+            break;
+        case Stages::NUM_STAGES:
+            // Should be unreachable, here to silence enumeration value not handled in switch warnings
+            // GCC/Clang's -Wswitch
+            break;
+    }
+    return ret;
+}
+
+void VHACDImpl::ProgressUpdate(Stages stage,
+                               double stageProgress,
+                               const char* operation)
+{
+    if ( m_params.m_callback )
+    {
+        double overallProgress = (double(stage) * 100) / double(Stages::NUM_STAGES);
+        const char *s = GetStageName(stage);
+        m_params.m_callback->Update(overallProgress,
+                                    stageProgress,
+                                    s,
+                                    operation);
+    }
+}
+
+bool VHACDImpl::IsCanceled() const
+{
+    return m_canceled;
+}
+
+IVHACD* CreateVHACD(void)
+{
+    VHACDImpl *ret = new VHACDImpl;
+    return static_cast< IVHACD *>(ret);
+}
+
+IVHACD* CreateVHACD(void);
+
+#if !VHACD_DISABLE_THREADING
+
+class LogMessage
+{
+public:
+    double  m_overallProgress{ double(-1.0) };
+    double  m_stageProgress{ double(-1.0) };
+    std::string m_stage;
+    std::string m_operation;
+};
+
+class VHACDAsyncImpl : public VHACD::IVHACD,
+                       public VHACD::IVHACD::IUserCallback,
+                       VHACD::IVHACD::IUserLogger,
+                       VHACD::IVHACD::IUserTaskRunner
+{
+public:
+    VHACDAsyncImpl() = default;
+
+    ~VHACDAsyncImpl() override;
+
+    void Cancel() override final;
+
+    bool Compute(const float* const points,
+                 const uint32_t countPoints,
+                 const uint32_t* const triangles,
+                 const uint32_t countTriangles,
+                 const Parameters& params) override final;
+
+    bool Compute(const double* const points,
+                 const uint32_t countPoints,
+                 const uint32_t* const triangles,
+                 const uint32_t countTriangles,
+                 const Parameters& params) override final;
+
+    bool GetConvexHull(const uint32_t index,
+                       VHACD::IVHACD::ConvexHull& ch) const override final;
+
+    uint32_t GetNConvexHulls() const override final;
+
+    void Clean() override final; // release internally allocated memory
+
+    void Release() override final; // release IVHACD
+
+    // Will compute the center of mass of the convex hull decomposition results and return it
+    // in 'centerOfMass'.  Returns false if the center of mass could not be computed.
+    bool ComputeCenterOfMass(double centerOfMass[3]) const override;
+
+    bool IsReady() const override final;
+
+    /**
+    * At the request of LegionFu : [email protected]
+    * This method will return which convex hull is closest to the source position.
+    * You can use this method to figure out, for example, which vertices in the original
+    * source mesh are best associated with which convex hull.
+    *
+    * @param pos : The input 3d position to test against
+    *
+    * @return : Returns which convex hull this position is closest to.
+    */
+    uint32_t findNearestConvexHull(const double pos[3],
+                                   double& distanceToHull) override final;
+
+    void Update(const double overallProgress,
+                const double stageProgress,
+                const char* const stage,
+                const char *operation) override final;
+
+    void Log(const char* const msg) override final;
+
+    void* StartTask(std::function<void()> func) override;
+
+    void JoinTask(void* Task) override;
+
+    bool Compute(const Parameters params);
+
+    bool ComputeNow(const std::vector<VHACD::Vertex>& points,
+                    const std::vector<VHACD::Triangle>& triangles,
+                    const Parameters& _desc);
+
+    // As a convenience for the calling application we only send it update and log messages from it's own main
+    // thread.  This reduces the complexity burden on the caller by making sure it only has to deal with log
+    // messages in it's main application thread.
+    void ProcessPendingMessages() const;
+
+private:
+    VHACD::VHACDImpl                m_VHACD;
+    std::vector<VHACD::Vertex>      m_vertices;
+    std::vector<VHACD::Triangle>    m_indices;
+    VHACD::IVHACD::IUserCallback*   m_callback{ nullptr };
+    VHACD::IVHACD::IUserLogger*     m_logger{ nullptr };
+    VHACD::IVHACD::IUserTaskRunner* m_taskRunner{ nullptr };
+    void*                           m_task{ nullptr };
+    std::atomic<bool>               m_running{ false };
+    std::atomic<bool>               m_cancel{ false };
+
+    // Thread safe caching mechanism for messages and update status.
+    // This is so that caller always gets messages in his own thread
+    // Member variables are marked as 'mutable' since the message dispatch function
+    // is called from const query methods.
+    mutable std::mutex              m_messageMutex;
+    mutable std::vector<LogMessage> m_messages;
+    mutable std::atomic<bool>       m_haveMessages{ false };
+};
+
+VHACDAsyncImpl::~VHACDAsyncImpl()
+{
+    Cancel();
+}
+
+void VHACDAsyncImpl::Cancel()
+{
+    m_cancel = true;
+    m_VHACD.Cancel();
+
+    if (m_task)
+    {
+        m_taskRunner->JoinTask(m_task); // Wait for the thread to fully exit before we delete the instance
+        m_task = nullptr;
+    }
+    m_cancel = false; // clear the cancel semaphore
+}
+
+bool VHACDAsyncImpl::Compute(const float* const points,
+                             const uint32_t countPoints,
+                             const uint32_t* const triangles,
+                             const uint32_t countTriangles,
+                             const Parameters& params)
+{
+    m_vertices.reserve(countPoints);
+    for (uint32_t i = 0; i < countPoints; ++i)
+    {
+        m_vertices.emplace_back(points[i * 3 + 0],
+                                points[i * 3 + 1],
+                                points[i * 3 + 2]);
+    }
+
+    m_indices.reserve(countTriangles);
+    for (uint32_t i = 0; i < countTriangles; ++i)
+    {
+        m_indices.emplace_back(triangles[i * 3 + 0],
+                               triangles[i * 3 + 1],
+                               triangles[i * 3 + 2]);
+    }
+
+    return Compute(params);
+}
+
+bool VHACDAsyncImpl::Compute(const double* const points,
+                             const uint32_t countPoints,
+                             const uint32_t* const triangles,
+                             const uint32_t countTriangles,
+                             const Parameters& params)
+{
+    // We need to copy the input vertices and triangles into our own buffers so we can operate
+    // on them safely from the background thread.
+    // Can't be local variables due to being asynchronous
+    m_vertices.reserve(countPoints);
+    for (uint32_t i = 0; i < countPoints; ++i)
+    {
+        m_vertices.emplace_back(points[i * 3 + 0],
+                                points[i * 3 + 1],
+                                points[i * 3 + 2]);
+    }
+
+    m_indices.reserve(countTriangles);
+    for (uint32_t i = 0; i < countTriangles; ++i)
+    {
+        m_indices.emplace_back(triangles[i * 3 + 0],
+                               triangles[i * 3 + 1],
+                               triangles[i * 3 + 2]);
+    }
+
+    return Compute(params);
+}
+
+bool VHACDAsyncImpl::GetConvexHull(const uint32_t index,
+                                   VHACD::IVHACD::ConvexHull& ch) const
+{
+    return m_VHACD.GetConvexHull(index,
+                                 ch);
+}
+
+uint32_t VHACDAsyncImpl::GetNConvexHulls() const
+{
+    ProcessPendingMessages();
+    return m_VHACD.GetNConvexHulls();
+}
+
+void VHACDAsyncImpl::Clean()
+{
+    Cancel();
+    m_VHACD.Clean();
+}
+
+void VHACDAsyncImpl::Release()
+{
+    delete this;
+}
+
+bool VHACDAsyncImpl::ComputeCenterOfMass(double centerOfMass[3]) const
+{
+    bool ret = false;
+
+    centerOfMass[0] = 0;
+    centerOfMass[1] = 0;
+    centerOfMass[2] = 0;
+
+    if (IsReady())
+    {
+        ret = m_VHACD.ComputeCenterOfMass(centerOfMass);
+    }
+    return ret;
+}
+
+bool VHACDAsyncImpl::IsReady() const
+{
+    ProcessPendingMessages();
+    return !m_running;
+}
+
+uint32_t VHACDAsyncImpl::findNearestConvexHull(const double pos[3],
+                                               double& distanceToHull)
+{
+    uint32_t ret = 0; // The default return code is zero
+
+    distanceToHull = 0;
+    // First, make sure that we have valid and completed results
+    if (IsReady() )
+    {
+        ret = m_VHACD.findNearestConvexHull(pos,distanceToHull);
+    }
+
+    return ret;
+}
+
+void VHACDAsyncImpl::Update(const double overallProgress,
+                            const double stageProgress,
+                            const char* const stage,
+                            const char* operation)
+{
+    m_messageMutex.lock();
+    LogMessage m;
+    m.m_operation = std::string(operation);
+    m.m_overallProgress = overallProgress;
+    m.m_stageProgress = stageProgress;
+    m.m_stage = std::string(stage);
+    m_messages.push_back(m);
+    m_haveMessages = true;
+    m_messageMutex.unlock();
+}
+
+void VHACDAsyncImpl::Log(const char* const msg)
+{
+    m_messageMutex.lock();
+    LogMessage m;
+    m.m_operation = std::string(msg);
+    m_haveMessages = true;
+    m_messages.push_back(m);
+    m_messageMutex.unlock();
+}
+
+void* VHACDAsyncImpl::StartTask(std::function<void()> func)
+{
+    return new std::thread(func);
+}
+
+void VHACDAsyncImpl::JoinTask(void* Task)
+{
+    std::thread* t = static_cast<std::thread*>(Task);
+    t->join();
+    delete t;
+}
+
+bool VHACDAsyncImpl::Compute(Parameters params)
+{
+    Cancel(); // if we previously had a solution running; cancel it.
+
+    m_taskRunner = params.m_taskRunner ? params.m_taskRunner : this;
+    params.m_taskRunner = m_taskRunner;
+
+    m_running = true;
+    m_task = m_taskRunner->StartTask([this, params]() {
+        ComputeNow(m_vertices,
+                   m_indices,
+                   params);
+        // If we have a user provided callback and the user did *not* call 'cancel' we notify him that the
+        // task is completed. However..if the user selected 'cancel' we do not send a completed notification event.
+        if (params.m_callback && !m_cancel)
+        {
+            params.m_callback->NotifyVHACDComplete();
+        }
+        m_running = false;
+    });
+    return true;
+}
+
+bool VHACDAsyncImpl::ComputeNow(const std::vector<VHACD::Vertex>& points,
+                                const std::vector<VHACD::Triangle>& triangles,
+                                const Parameters& _desc)
+{
+    uint32_t ret = 0;
+
+    Parameters desc;
+    m_callback = _desc.m_callback;
+    m_logger = _desc.m_logger;
+
+    desc = _desc;
+    // Set our intercepting callback interfaces if non-null
+    desc.m_callback = _desc.m_callback ? this : nullptr;
+    desc.m_logger = _desc.m_logger ? this : nullptr;
+
+    // If not task runner provided, then use the default one
+    if (desc.m_taskRunner == nullptr)
+    {
+        desc.m_taskRunner = this;
+    }
+
+    bool ok = m_VHACD.Compute(points,
+                              triangles,
+                              desc);
+    if (ok)
+    {
+        ret = m_VHACD.GetNConvexHulls();
+    }
+
+    return ret ? true : false;
+}
+
+void VHACDAsyncImpl::ProcessPendingMessages() const
+{
+    if (m_cancel)
+    {
+        return;
+    }
+    if ( m_haveMessages )
+    {
+        m_messageMutex.lock();
+        for (auto& i : m_messages)
+        {
+            if ( i.m_overallProgress == -1 )
+            {
+                if ( m_logger )
+                {
+                    m_logger->Log(i.m_operation.c_str());
+                }
+            }
+            else if ( m_callback )
+            {
+                m_callback->Update(i.m_overallProgress,
+                                   i.m_stageProgress,
+                                   i.m_stage.c_str(),
+                                   i.m_operation.c_str());
+            }
+        }
+        m_messages.clear();
+        m_haveMessages = false;
+        m_messageMutex.unlock();
+    }
+}
+
+IVHACD* CreateVHACD_ASYNC()
+{
+    VHACDAsyncImpl* m = new VHACDAsyncImpl;
+    return static_cast<IVHACD*>(m);
+}
+#endif
+
+} // namespace VHACD
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif // _MSC_VER
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif // __GNUC__
+
+#endif // ENABLE_VHACD_IMPLEMENTATION
+
+#endif // VHACD_H

+ 1 - 1
Tools/CMake/torque_configs.cmake

@@ -15,7 +15,7 @@ set(TORQUE_COMPILE_DEFINITIONS ICE_NO_DLL PCRE_STATIC TORQUE_ADVANCED_LIGHTING T
 
 
 # All link libraries. Modules should append to this the path to specify additional link libraries (.a, .lib, .dylib, .so)
 # All link libraries. Modules should append to this the path to specify additional link libraries (.a, .lib, .dylib, .so)
 set(TORQUE_LINK_LIBRARIES tinyxml collada squish opcode assimp FLAC FLAC++ ogg vorbis  
 set(TORQUE_LINK_LIBRARIES tinyxml collada squish opcode assimp FLAC FLAC++ ogg vorbis  
-			vorbisfile vorbisenc opus sndfile SDL2 glad pcre convexDecomp zlib)
+			vorbisfile vorbisenc opus sndfile SDL2 glad pcre convexMath zlib)
 
 
 if(TORQUE_TESTING)
 if(TORQUE_TESTING)
 set(TORQUE_LINK_LIBRARIES ${TORQUE_LINK_LIBRARIES} gtest gmock)
 set(TORQUE_LINK_LIBRARIES ${TORQUE_LINK_LIBRARIES} gtest gmock)