hace 10 años · 699955795b
--- a/build/osx/Polycode.xcworkspace/contents.xcworkspacedata
+++ b/build/osx/Polycode.xcworkspace/contents.xcworkspacedata
@@ -1,6 +1,9 @@
 
				 <?xml version="1.0" encoding="UTF-8"?>
			
 
				 <Workspace
			
 
				    version = "1.0">
			
 
				+   <FileRef
			
 
				+      location = "group:Polycode3DPhsyics/Polycode3DPhsyics.xcodeproj">
			
 
				+   </FileRef>
			
 
				    <FileRef
			
 
				       location = "group:PolycodeUI/PolycodeUI.xcodeproj">
			
 
				    </FileRef>
			
--- a/build/osx/Polycode3DPhsyics/Polycode3DPhsyics.xcodeproj/project.pbxproj
+++ b/build/osx/Polycode3DPhsyics/Polycode3DPhsyics.xcodeproj/project.pbxproj
@@ -0,0 +1,317 @@
 
				+// !$*UTF8*$!
			
 
				+{
			
 
				+	archiveVersion = 1;
			
 
				+	classes = {
			
 
				+	};
			
 
				+	objectVersion = 46;
			
 
				+	objects = {
			
 
				+
			
 
				+/* Begin PBXBuildFile section */
			
 
				+		6DE288421BE96E5000F7B42C /* Polycode3DPhysics.h in Headers */ = {isa = PBXBuildFile; fileRef = 6DE2883C1BE96E5000F7B42C /* Polycode3DPhysics.h */; };
			
 
				+		6DE288431BE96E5000F7B42C /* PolyCollisionScene.h in Headers */ = {isa = PBXBuildFile; fileRef = 6DE2883D1BE96E5000F7B42C /* PolyCollisionScene.h */; };
			
 
				+		6DE288441BE96E5000F7B42C /* PolyCollisionSceneEntity.h in Headers */ = {isa = PBXBuildFile; fileRef = 6DE2883E1BE96E5000F7B42C /* PolyCollisionSceneEntity.h */; };
			
 
				+		6DE288451BE96E5000F7B42C /* PolyPhysicsConstraint.h in Headers */ = {isa = PBXBuildFile; fileRef = 6DE2883F1BE96E5000F7B42C /* PolyPhysicsConstraint.h */; };
			
 
				+		6DE288461BE96E5000F7B42C /* PolyPhysicsScene.h in Headers */ = {isa = PBXBuildFile; fileRef = 6DE288401BE96E5000F7B42C /* PolyPhysicsScene.h */; };
			
 
				+		6DE288471BE96E5000F7B42C /* PolyPhysicsSceneEntity.h in Headers */ = {isa = PBXBuildFile; fileRef = 6DE288411BE96E5000F7B42C /* PolyPhysicsSceneEntity.h */; };
			
 
				+		6DE2884D1BE96E5D00F7B42C /* PolyCollisionScene.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6DE288481BE96E5D00F7B42C /* PolyCollisionScene.cpp */; };
			
 
				+		6DE2884E1BE96E5D00F7B42C /* PolyCollisionSceneEntity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6DE288491BE96E5D00F7B42C /* PolyCollisionSceneEntity.cpp */; };
			
 
				+		6DE2884F1BE96E5D00F7B42C /* PolyPhysicsConstraint.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6DE2884A1BE96E5D00F7B42C /* PolyPhysicsConstraint.cpp */; };
			
 
				+		6DE288501BE96E5D00F7B42C /* PolyPhysicsScene.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6DE2884B1BE96E5D00F7B42C /* PolyPhysicsScene.cpp */; };
			
 
				+		6DE288511BE96E5D00F7B42C /* PolyPhysicsSceneEntity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6DE2884C1BE96E5D00F7B42C /* PolyPhysicsSceneEntity.cpp */; };
			
 
				+/* End PBXBuildFile section */
			
 
				+
			
 
				+/* Begin PBXFileReference section */
			
 
				+		6DE288331BE96E1B00F7B42C /* libPolycode3DPhsyics.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libPolycode3DPhsyics.a; sourceTree = BUILT_PRODUCTS_DIR; };
			
 
				+		6DE2883C1BE96E5000F7B42C /* Polycode3DPhysics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Polycode3DPhysics.h; path = ../../../include/polycode/modules/physics3D/Polycode3DPhysics.h; sourceTree = "<group>"; };
			
 
				+		6DE2883D1BE96E5000F7B42C /* PolyCollisionScene.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolyCollisionScene.h; path = ../../../include/polycode/modules/physics3D/PolyCollisionScene.h; sourceTree = "<group>"; };
			
 
				+		6DE2883E1BE96E5000F7B42C /* PolyCollisionSceneEntity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolyCollisionSceneEntity.h; path = ../../../include/polycode/modules/physics3D/PolyCollisionSceneEntity.h; sourceTree = "<group>"; };
			
 
				+		6DE2883F1BE96E5000F7B42C /* PolyPhysicsConstraint.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolyPhysicsConstraint.h; path = ../../../include/polycode/modules/physics3D/PolyPhysicsConstraint.h; sourceTree = "<group>"; };
			
 
				+		6DE288401BE96E5000F7B42C /* PolyPhysicsScene.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolyPhysicsScene.h; path = ../../../include/polycode/modules/physics3D/PolyPhysicsScene.h; sourceTree = "<group>"; };
			
 
				+		6DE288411BE96E5000F7B42C /* PolyPhysicsSceneEntity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolyPhysicsSceneEntity.h; path = ../../../include/polycode/modules/physics3D/PolyPhysicsSceneEntity.h; sourceTree = "<group>"; };
			
 
				+		6DE288481BE96E5D00F7B42C /* PolyCollisionScene.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PolyCollisionScene.cpp; path = ../../../src/modules/physics3D/PolyCollisionScene.cpp; sourceTree = "<group>"; };
			
 
				+		6DE288491BE96E5D00F7B42C /* PolyCollisionSceneEntity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PolyCollisionSceneEntity.cpp; path = ../../../src/modules/physics3D/PolyCollisionSceneEntity.cpp; sourceTree = "<group>"; };
			
 
				+		6DE2884A1BE96E5D00F7B42C /* PolyPhysicsConstraint.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PolyPhysicsConstraint.cpp; path = ../../../src/modules/physics3D/PolyPhysicsConstraint.cpp; sourceTree = "<group>"; };
			
 
				+		6DE2884B1BE96E5D00F7B42C /* PolyPhysicsScene.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PolyPhysicsScene.cpp; path = ../../../src/modules/physics3D/PolyPhysicsScene.cpp; sourceTree = "<group>"; };
			
 
				+		6DE2884C1BE96E5D00F7B42C /* PolyPhysicsSceneEntity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PolyPhysicsSceneEntity.cpp; path = ../../../src/modules/physics3D/PolyPhysicsSceneEntity.cpp; sourceTree = "<group>"; };
			
 
				+/* End PBXFileReference section */
			
 
				+
			
 
				+/* Begin PBXFrameworksBuildPhase section */
			
 
				+		6DE288301BE96E1B00F7B42C /* Frameworks */ = {
			
 
				+			isa = PBXFrameworksBuildPhase;
			
 
				+			buildActionMask = 2147483647;
			
 
				+			files = (
			
 
				+			);
			
 
				+			runOnlyForDeploymentPostprocessing = 0;
			
 
				+		};
			
 
				+/* End PBXFrameworksBuildPhase section */
			
 
				+
			
 
				+/* Begin PBXGroup section */
			
 
				+		6DE2882A1BE96E1B00F7B42C = {
			
 
				+			isa = PBXGroup;
			
 
				+			children = (
			
 
				+				6DE2883B1BE96E3E00F7B42C /* include */,
			
 
				+				6DE2883A1BE96E3600F7B42C /* src */,
			
 
				+				6DE288341BE96E1B00F7B42C /* Products */,
			
 
				+			);
			
 
				+			sourceTree = "<group>";
			
 
				+		};
			
 
				+		6DE288341BE96E1B00F7B42C /* Products */ = {
			
 
				+			isa = PBXGroup;
			
 
				+			children = (
			
 
				+				6DE288331BE96E1B00F7B42C /* libPolycode3DPhsyics.a */,
			
 
				+			);
			
 
				+			name = Products;
			
 
				+			sourceTree = "<group>";
			
 
				+		};
			
 
				+		6DE2883A1BE96E3600F7B42C /* src */ = {
			
 
				+			isa = PBXGroup;
			
 
				+			children = (
			
 
				+				6DE288481BE96E5D00F7B42C /* PolyCollisionScene.cpp */,
			
 
				+				6DE288491BE96E5D00F7B42C /* PolyCollisionSceneEntity.cpp */,
			
 
				+				6DE2884A1BE96E5D00F7B42C /* PolyPhysicsConstraint.cpp */,
			
 
				+				6DE2884B1BE96E5D00F7B42C /* PolyPhysicsScene.cpp */,
			
 
				+				6DE2884C1BE96E5D00F7B42C /* PolyPhysicsSceneEntity.cpp */,
			
 
				+			);
			
 
				+			name = src;
			
 
				+			sourceTree = "<group>";
			
 
				+		};
			
 
				+		6DE2883B1BE96E3E00F7B42C /* include */ = {
			
 
				+			isa = PBXGroup;
			
 
				+			children = (
			
 
				+				6DE2883C1BE96E5000F7B42C /* Polycode3DPhysics.h */,
			
 
				+				6DE2883D1BE96E5000F7B42C /* PolyCollisionScene.h */,
			
 
				+				6DE2883E1BE96E5000F7B42C /* PolyCollisionSceneEntity.h */,
			
 
				+				6DE2883F1BE96E5000F7B42C /* PolyPhysicsConstraint.h */,
			
 
				+				6DE288401BE96E5000F7B42C /* PolyPhysicsScene.h */,
			
 
				+				6DE288411BE96E5000F7B42C /* PolyPhysicsSceneEntity.h */,
			
 
				+			);
			
 
				+			name = include;
			
 
				+			sourceTree = "<group>";
			
 
				+		};
			
 
				+/* End PBXGroup section */
			
 
				+
			
 
				+/* Begin PBXHeadersBuildPhase section */
			
 
				+		6DE288311BE96E1B00F7B42C /* Headers */ = {
			
 
				+			isa = PBXHeadersBuildPhase;
			
 
				+			buildActionMask = 2147483647;
			
 
				+			files = (
			
 
				+				6DE288471BE96E5000F7B42C /* PolyPhysicsSceneEntity.h in Headers */,
			
 
				+				6DE288431BE96E5000F7B42C /* PolyCollisionScene.h in Headers */,
			
 
				+				6DE288451BE96E5000F7B42C /* PolyPhysicsConstraint.h in Headers */,
			
 
				+				6DE288441BE96E5000F7B42C /* PolyCollisionSceneEntity.h in Headers */,
			
 
				+				6DE288421BE96E5000F7B42C /* Polycode3DPhysics.h in Headers */,
			
 
				+				6DE288461BE96E5000F7B42C /* PolyPhysicsScene.h in Headers */,
			
 
				+			);
			
 
				+			runOnlyForDeploymentPostprocessing = 0;
			
 
				+		};
			
 
				+/* End PBXHeadersBuildPhase section */
			
 
				+
			
 
				+/* Begin PBXNativeTarget section */
			
 
				+		6DE288321BE96E1B00F7B42C /* Polycode3DPhsyics */ = {
			
 
				+			isa = PBXNativeTarget;
			
 
				+			buildConfigurationList = 6DE288371BE96E1B00F7B42C /* Build configuration list for PBXNativeTarget "Polycode3DPhsyics" */;
			
 
				+			buildPhases = (
			
 
				+				6DE2882F1BE96E1B00F7B42C /* Sources */,
			
 
				+				6DE288301BE96E1B00F7B42C /* Frameworks */,
			
 
				+				6DE288311BE96E1B00F7B42C /* Headers */,
			
 
				+				6DE288521BE9718100F7B42C /* ShellScript */,
			
 
				+			);
			
 
				+			buildRules = (
			
 
				+			);
			
 
				+			dependencies = (
			
 
				+			);
			
 
				+			name = Polycode3DPhsyics;
			
 
				+			productName = Polycode3DPhsyics;
			
 
				+			productReference = 6DE288331BE96E1B00F7B42C /* libPolycode3DPhsyics.a */;
			
 
				+			productType = "com.apple.product-type.library.static";
			
 
				+		};
			
 
				+/* End PBXNativeTarget section */
			
 
				+
			
 
				+/* Begin PBXProject section */
			
 
				+		6DE2882B1BE96E1B00F7B42C /* Project object */ = {
			
 
				+			isa = PBXProject;
			
 
				+			attributes = {
			
 
				+				LastUpgradeCheck = 0630;
			
 
				+				ORGANIZATIONNAME = "Ivan Safrin";
			
 
				+				TargetAttributes = {
			
 
				+					6DE288321BE96E1B00F7B42C = {
			
 
				+						CreatedOnToolsVersion = 6.3.2;
			
 
				+					};
			
 
				+				};
			
 
				+			};
			
 
				+			buildConfigurationList = 6DE2882E1BE96E1B00F7B42C /* Build configuration list for PBXProject "Polycode3DPhsyics" */;
			
 
				+			compatibilityVersion = "Xcode 3.2";
			
 
				+			developmentRegion = English;
			
 
				+			hasScannedForEncodings = 0;
			
 
				+			knownRegions = (
			
 
				+				en,
			
 
				+			);
			
 
				+			mainGroup = 6DE2882A1BE96E1B00F7B42C;
			
 
				+			productRefGroup = 6DE288341BE96E1B00F7B42C /* Products */;
			
 
				+			projectDirPath = "";
			
 
				+			projectRoot = "";
			
 
				+			targets = (
			
 
				+				6DE288321BE96E1B00F7B42C /* Polycode3DPhsyics */,
			
 
				+			);
			
 
				+		};
			
 
				+/* End PBXProject section */
			
 
				+
			
 
				+/* Begin PBXShellScriptBuildPhase section */
			
 
				+		6DE288521BE9718100F7B42C /* ShellScript */ = {
			
 
				+			isa = PBXShellScriptBuildPhase;
			
 
				+			buildActionMask = 2147483647;
			
 
				+			files = (
			
 
				+			);
			
 
				+			inputPaths = (
			
 
				+			);
			
 
				+			outputPaths = (
			
 
				+			);
			
 
				+			runOnlyForDeploymentPostprocessing = 0;
			
 
				+			shellPath = /bin/sh;
			
 
				+			shellScript = "cp $TARGET_BUILD_DIR//$EXECUTABLE_NAME ../../../lib/osx";
			
 
				+		};
			
 
				+/* End PBXShellScriptBuildPhase section */
			
 
				+
			
 
				+/* Begin PBXSourcesBuildPhase section */
			
 
				+		6DE2882F1BE96E1B00F7B42C /* Sources */ = {
			
 
				+			isa = PBXSourcesBuildPhase;
			
 
				+			buildActionMask = 2147483647;
			
 
				+			files = (
			
 
				+				6DE288511BE96E5D00F7B42C /* PolyPhysicsSceneEntity.cpp in Sources */,
			
 
				+				6DE2884E1BE96E5D00F7B42C /* PolyCollisionSceneEntity.cpp in Sources */,
			
 
				+				6DE2884D1BE96E5D00F7B42C /* PolyCollisionScene.cpp in Sources */,
			
 
				+				6DE288501BE96E5D00F7B42C /* PolyPhysicsScene.cpp in Sources */,
			
 
				+				6DE2884F1BE96E5D00F7B42C /* PolyPhysicsConstraint.cpp in Sources */,
			
 
				+			);
			
 
				+			runOnlyForDeploymentPostprocessing = 0;
			
 
				+		};
			
 
				+/* End PBXSourcesBuildPhase section */
			
 
				+
			
 
				+/* Begin XCBuildConfiguration section */
			
 
				+		6DE288351BE96E1B00F7B42C /* Debug */ = {
			
 
				+			isa = XCBuildConfiguration;
			
 
				+			buildSettings = {
			
 
				+				ALWAYS_SEARCH_USER_PATHS = NO;
			
 
				+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
			
 
				+				CLANG_CXX_LIBRARY = "libc++";
			
 
				+				CLANG_ENABLE_MODULES = YES;
			
 
				+				CLANG_ENABLE_OBJC_ARC = YES;
			
 
				+				CLANG_WARN_BOOL_CONVERSION = YES;
			
 
				+				CLANG_WARN_CONSTANT_CONVERSION = YES;
			
 
				+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
			
 
				+				CLANG_WARN_EMPTY_BODY = YES;
			
 
				+				CLANG_WARN_ENUM_CONVERSION = YES;
			
 
				+				CLANG_WARN_INT_CONVERSION = YES;
			
 
				+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
			
 
				+				CLANG_WARN_UNREACHABLE_CODE = YES;
			
 
				+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
			
 
				+				COPY_PHASE_STRIP = NO;
			
 
				+				DEBUG_INFORMATION_FORMAT = dwarf;
			
 
				+				ENABLE_STRICT_OBJC_MSGSEND = YES;
			
 
				+				GCC_C_LANGUAGE_STANDARD = gnu99;
			
 
				+				GCC_DYNAMIC_NO_PIC = NO;
			
 
				+				GCC_NO_COMMON_BLOCKS = YES;
			
 
				+				GCC_OPTIMIZATION_LEVEL = 0;
			
 
				+				GCC_PREPROCESSOR_DEFINITIONS = (
			
 
				+					"DEBUG=1",
			
 
				+					"$(inherited)",
			
 
				+				);
			
 
				+				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
			
 
				+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
			
 
				+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
			
 
				+				GCC_WARN_UNDECLARED_SELECTOR = YES;
			
 
				+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
			
 
				+				GCC_WARN_UNUSED_FUNCTION = YES;
			
 
				+				GCC_WARN_UNUSED_VARIABLE = YES;
			
 
				+				MACOSX_DEPLOYMENT_TARGET = 10.10;
			
 
				+				MTL_ENABLE_DEBUG_INFO = YES;
			
 
				+				ONLY_ACTIVE_ARCH = YES;
			
 
				+				SDKROOT = macosx;
			
 
				+			};
			
 
				+			name = Debug;
			
 
				+		};
			
 
				+		6DE288361BE96E1B00F7B42C /* Release */ = {
			
 
				+			isa = XCBuildConfiguration;
			
 
				+			buildSettings = {
			
 
				+				ALWAYS_SEARCH_USER_PATHS = NO;
			
 
				+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
			
 
				+				CLANG_CXX_LIBRARY = "libc++";
			
 
				+				CLANG_ENABLE_MODULES = YES;
			
 
				+				CLANG_ENABLE_OBJC_ARC = YES;
			
 
				+				CLANG_WARN_BOOL_CONVERSION = YES;
			
 
				+				CLANG_WARN_CONSTANT_CONVERSION = YES;
			
 
				+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
			
 
				+				CLANG_WARN_EMPTY_BODY = YES;
			
 
				+				CLANG_WARN_ENUM_CONVERSION = YES;
			
 
				+				CLANG_WARN_INT_CONVERSION = YES;
			
 
				+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
			
 
				+				CLANG_WARN_UNREACHABLE_CODE = YES;
			
 
				+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
			
 
				+				COPY_PHASE_STRIP = NO;
			
 
				+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
			
 
				+				ENABLE_NS_ASSERTIONS = NO;
			
 
				+				ENABLE_STRICT_OBJC_MSGSEND = YES;
			
 
				+				GCC_C_LANGUAGE_STANDARD = gnu99;
			
 
				+				GCC_NO_COMMON_BLOCKS = YES;
			
 
				+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
			
 
				+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
			
 
				+				GCC_WARN_UNDECLARED_SELECTOR = YES;
			
 
				+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
			
 
				+				GCC_WARN_UNUSED_FUNCTION = YES;
			
 
				+				GCC_WARN_UNUSED_VARIABLE = YES;
			
 
				+				MACOSX_DEPLOYMENT_TARGET = 10.10;
			
 
				+				MTL_ENABLE_DEBUG_INFO = NO;
			
 
				+				SDKROOT = macosx;
			
 
				+			};
			
 
				+			name = Release;
			
 
				+		};
			
 
				+		6DE288381BE96E1B00F7B42C /* Debug */ = {
			
 
				+			isa = XCBuildConfiguration;
			
 
				+			buildSettings = {
			
 
				+				EXECUTABLE_PREFIX = lib;
			
 
				+				HEADER_SEARCH_PATHS = (
			
 
				+					"$(inherited)",
			
 
				+					/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include,
			
 
				+					"\"$(SRCROOT)/../../../include\"",
			
 
				+				);
			
 
				+				PRODUCT_NAME = "$(TARGET_NAME)";
			
 
				+			};
			
 
				+			name = Debug;
			
 
				+		};
			
 
				+		6DE288391BE96E1B00F7B42C /* Release */ = {
			
 
				+			isa = XCBuildConfiguration;
			
 
				+			buildSettings = {
			
 
				+				EXECUTABLE_PREFIX = lib;
			
 
				+				HEADER_SEARCH_PATHS = (
			
 
				+					"$(inherited)",
			
 
				+					/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include,
			
 
				+					"\"$(SRCROOT)/../../../include\"",
			
 
				+				);
			
 
				+				PRODUCT_NAME = "$(TARGET_NAME)";
			
 
				+			};
			
 
				+			name = Release;
			
 
				+		};
			
 
				+/* End XCBuildConfiguration section */
			
 
				+
			
 
				+/* Begin XCConfigurationList section */
			
 
				+		6DE2882E1BE96E1B00F7B42C /* Build configuration list for PBXProject "Polycode3DPhsyics" */ = {
			
 
				+			isa = XCConfigurationList;
			
 
				+			buildConfigurations = (
			
 
				+				6DE288351BE96E1B00F7B42C /* Debug */,
			
 
				+				6DE288361BE96E1B00F7B42C /* Release */,
			
 
				+			);
			
 
				+			defaultConfigurationIsVisible = 0;
			
 
				+			defaultConfigurationName = Release;
			
 
				+		};
			
 
				+		6DE288371BE96E1B00F7B42C /* Build configuration list for PBXNativeTarget "Polycode3DPhsyics" */ = {
			
 
				+			isa = XCConfigurationList;
			
 
				+			buildConfigurations = (
			
 
				+				6DE288381BE96E1B00F7B42C /* Debug */,
			
 
				+				6DE288391BE96E1B00F7B42C /* Release */,
			
 
				+			);
			
 
				+			defaultConfigurationIsVisible = 0;
			
 
				+		};
			
 
				+/* End XCConfigurationList section */
			
 
				+	};
			
 
				+	rootObject = 6DE2882B1BE96E1B00F7B42C /* Project object */;
			
 
				+}
			
--- a/build/osx/PolycodeStudio/PolycodeStudio.xcodeproj/project.pbxproj
+++ b/build/osx/PolycodeStudio/PolycodeStudio.xcodeproj/project.pbxproj
@@ -7,6 +7,7 @@
 
				 	objects = {
			
 
				 
			
 
				 /* Begin PBXBuildFile section */
			
 
				+		6DE288291BE96D7B00F7B42C /* PolycodeView.mm in Sources */ = {isa = PBXBuildFile; fileRef = 6DE288281BE96D7B00F7B42C /* PolycodeView.mm */; };
			
 
				 		8A22D92B1BCC67DE009EF0A6 /* OpenGL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A22D92A1BCC67DE009EF0A6 /* OpenGL.framework */; };
			
 
				 		8A22D92D1BCC67E6009EF0A6 /* AudioUnit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A22D92C1BCC67E6009EF0A6 /* AudioUnit.framework */; };
			
 
				 		8A22D92F1BCC6809009EF0A6 /* CoreAudio.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A22D92E1BCC6809009EF0A6 /* CoreAudio.framework */; };
			
@@ -15,7 +16,6 @@
 
				 		8A36D2121B8E5751009897D0 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 8A36D2111B8E5751009897D0 /* main.m */; };
			
 
				 		8A36D2141B8E5751009897D0 /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A36D2131B8E5751009897D0 /* Images.xcassets */; };
			
 
				 		8A36D2171B8E5751009897D0 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 8A36D2151B8E5751009897D0 /* MainMenu.xib */; };
			
 
				-		8A36D22E1B8E5ACC009897D0 /* PolycodeView.mm in Sources */ = {isa = PBXBuildFile; fileRef = 8A36D22D1B8E5ACC009897D0 /* PolycodeView.mm */; };
			
 
				 		8A36D2531B8E5BA9009897D0 /* EditorGrid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8A36D2311B8E5BA9009897D0 /* EditorGrid.cpp */; };
			
 
				 		8A36D2541B8E5BA9009897D0 /* EntityEditorPropertyView.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8A36D2321B8E5BA9009897D0 /* EntityEditorPropertyView.cpp */; };
			
 
				 		8A36D2551B8E5BA9009897D0 /* EntityEditorSettingsView.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8A36D2331B8E5BA9009897D0 /* EntityEditorSettingsView.cpp */; };
			
@@ -65,6 +65,8 @@
 
				 /* End PBXBuildFile section */
			
 
				 
			
 
				 /* Begin PBXFileReference section */
			
 
				+		6DE288271BE96D7100F7B42C /* PolycodeView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolycodeView.h; path = ../../../../include/polycode/view/osx/PolycodeView.h; sourceTree = "<group>"; };
			
 
				+		6DE288281BE96D7B00F7B42C /* PolycodeView.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = PolycodeView.mm; path = ../../../../src/view/osx/PolycodeView.mm; sourceTree = "<group>"; };
			
 
				 		8A22D92A1BCC67DE009EF0A6 /* OpenGL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenGL.framework; path = System/Library/Frameworks/OpenGL.framework; sourceTree = SDKROOT; };
			
 
				 		8A22D92C1BCC67E6009EF0A6 /* AudioUnit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AudioUnit.framework; path = System/Library/Frameworks/AudioUnit.framework; sourceTree = SDKROOT; };
			
 
				 		8A22D92E1BCC6809009EF0A6 /* CoreAudio.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreAudio.framework; path = System/Library/Frameworks/CoreAudio.framework; sourceTree = SDKROOT; };
			
@@ -76,8 +78,6 @@
 
				 		8A36D2111B8E5751009897D0 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
			
 
				 		8A36D2131B8E5751009897D0 /* Images.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Images.xcassets; sourceTree = "<group>"; };
			
 
				 		8A36D2161B8E5751009897D0 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
			
 
				-		8A36D22C1B8E5ACC009897D0 /* PolycodeView.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PolycodeView.h; path = /Users/isafrin/Desktop/Workshop/PolycodeNoCmake/build/osx/TemplateApp/TemplateApp/../../../../include/polycode/view/osx/PolycodeView.h; sourceTree = "<absolute>"; };
			
 
				-		8A36D22D1B8E5ACC009897D0 /* PolycodeView.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = PolycodeView.mm; path = /Users/isafrin/Desktop/Workshop/PolycodeNoCmake/build/osx/TemplateApp/TemplateApp/../../../../src/view/osx/PolycodeView.mm; sourceTree = "<absolute>"; };
			
 
				 		8A36D2311B8E5BA9009897D0 /* EditorGrid.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp.preprocessed; fileEncoding = 4; name = EditorGrid.cpp; path = ../../../src/ide/EditorGrid.cpp; sourceTree = "<group>"; };
			
 
				 		8A36D2321B8E5BA9009897D0 /* EntityEditorPropertyView.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp.preprocessed; fileEncoding = 4; name = EntityEditorPropertyView.cpp; path = ../../../src/ide/EntityEditorPropertyView.cpp; sourceTree = "<group>"; };
			
 
				 		8A36D2331B8E5BA9009897D0 /* EntityEditorSettingsView.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.objcpp.preprocessed; fileEncoding = 4; name = EntityEditorSettingsView.cpp; path = ../../../src/ide/EntityEditorSettingsView.cpp; sourceTree = "<group>"; };
			
@@ -212,9 +212,9 @@
 
				 		8A36D20B1B8E5751009897D0 /* PolycodeStudio */ = {
			
 
				 			isa = PBXGroup;
			
 
				 			children = (
			
 
				+				6DE288271BE96D7100F7B42C /* PolycodeView.h */,
			
 
				+				6DE288281BE96D7B00F7B42C /* PolycodeView.mm */,
			
 
				 				8A36D3281B8E7096009897D0 /* default.pak */,
			
 
				-				8A36D22C1B8E5ACC009897D0 /* PolycodeView.h */,
			
 
				-				8A36D22D1B8E5ACC009897D0 /* PolycodeView.mm */,
			
 
				 				8A36D20E1B8E5751009897D0 /* AppDelegate.h */,
			
 
				 				8A36D20F1B8E5751009897D0 /* AppDelegate.m */,
			
 
				 				8A36D2131B8E5751009897D0 /* Images.xcassets */,
			
@@ -433,12 +433,12 @@
 
				 				8A36D2591B8E5BA9009897D0 /* NewFileWindow.cpp in Sources */,
			
 
				 				8A36D25C1B8E5BA9009897D0 /* PolycodeConsole.cpp in Sources */,
			
 
				 				8A36D2711B8E5BA9009897D0 /* TextureBrowser.cpp in Sources */,
			
 
				+				6DE288291BE96D7B00F7B42C /* PolycodeView.mm in Sources */,
			
 
				 				8A36D2741B8E5BA9009897D0 /* TransformGizmo.cpp in Sources */,
			
 
				 				8A36D2721B8E5BA9009897D0 /* ToolWindows.cpp in Sources */,
			
 
				 				8A36D2541B8E5BA9009897D0 /* EntityEditorPropertyView.cpp in Sources */,
			
 
				 				8A36D2101B8E5751009897D0 /* AppDelegate.m in Sources */,
			
 
				 				8A36D2681B8E5BA9009897D0 /* PolycodeProjectEditor.cpp in Sources */,
			
 
				-				8A36D22E1B8E5ACC009897D0 /* PolycodeView.mm in Sources */,
			
 
				 				8A36D2701B8E5BA9009897D0 /* SettingsWindow.cpp in Sources */,
			
 
				 				8A36D26A1B8E5BA9009897D0 /* PolycodeProps.cpp in Sources */,
			
 
				 				8A36D2601B8E5BA9009897D0 /* PolycodeFontEditor.cpp in Sources */,
			
--- a/build/osx/PolycodeStudio/PolycodeStudio/Base.lproj/MainMenu.xib
+++ b/build/osx/PolycodeStudio/PolycodeStudio/Base.lproj/MainMenu.xib
@@ -1,7 +1,7 @@
 
				 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
			
 
				-<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="8191" systemVersion="15A284" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none">
			
 
				+<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="7706" systemVersion="14F27" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none">
			
 
				     <dependencies>
			
 
				-        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="8191"/>
			
 
				+        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="7706"/>
			
 
				     </dependencies>
			
 
				     <objects>
			
 
				         <customObject id="-2" userLabel="File's Owner" customClass="NSApplication">
			
@@ -340,7 +340,6 @@
 
				             <connections>
			
 
				                 <outlet property="polycodeView" destination="533" id="VnI-xK-DQN"/>
			
 
				                 <outlet property="projectMenu" destination="559" id="564"/>
			
 
				-                <outlet property="sdfsdf" destination="597" id="613"/>
			
 
				                 <outlet property="window" destination="371" id="Z7F-BR-QJp"/>
			
 
				             </connections>
			
 
				         </customObject>
			
--- a/include/Bullet-C-Api.h
+++ b/include/Bullet-C-Api.h
@@ -0,0 +1,176 @@
 
				+/*

			
 
				+Bullet Continuous Collision Detection and Physics Library

			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/

			
 
				+

			
 
				+This software is provided 'as-is', without any express or implied warranty.

			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.

			
 
				+Permission is granted to anyone to use this software for any purpose, 

			
 
				+including commercial applications, and to alter it and redistribute it freely, 

			
 
				+subject to the following restrictions:

			
 
				+

			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.

			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.

			
 
				+3. This notice may not be removed or altered from any source distribution.

			
 
				+*/

			
 
				+

			
 
				+/*

			
 
				+	Draft high-level generic physics C-API. For low-level access, use the physics SDK native API's.

			
 
				+	Work in progress, functionality will be added on demand.

			
 
				+

			
 
				+	If possible, use the richer Bullet C++ API, by including "btBulletDynamicsCommon.h"

			
 
				+*/

			
 
				+

			
 
				+#ifndef BULLET_C_API_H

			
 
				+#define BULLET_C_API_H

			
 
				+

			
 
				+#define PL_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name

			
 
				+

			
 
				+#ifdef BT_USE_DOUBLE_PRECISION

			
 
				+typedef double	plReal;

			
 
				+#else

			
 
				+typedef float	plReal;

			
 
				+#endif

			
 
				+

			
 
				+typedef plReal	plVector3[3];

			
 
				+typedef plReal	plQuaternion[4];

			
 
				+

			
 
				+#ifdef __cplusplus

			
 
				+extern "C" { 

			
 
				+#endif

			
 
				+

			
 
				+/**	Particular physics SDK (C-API) */

			
 
				+	PL_DECLARE_HANDLE(plPhysicsSdkHandle);

			
 
				+

			
 
				+/** 	Dynamics world, belonging to some physics SDK (C-API)*/

			
 
				+	PL_DECLARE_HANDLE(plDynamicsWorldHandle);

			
 
				+

			
 
				+/** Rigid Body that can be part of a Dynamics World (C-API)*/	

			
 
				+	PL_DECLARE_HANDLE(plRigidBodyHandle);

			
 
				+

			
 
				+/** 	Collision Shape/Geometry, property of a Rigid Body (C-API)*/

			
 
				+	PL_DECLARE_HANDLE(plCollisionShapeHandle);

			
 
				+

			
 
				+/** Constraint for Rigid Bodies (C-API)*/

			
 
				+	PL_DECLARE_HANDLE(plConstraintHandle);

			
 
				+

			
 
				+/** Triangle Mesh interface (C-API)*/

			
 
				+	PL_DECLARE_HANDLE(plMeshInterfaceHandle);

			
 
				+

			
 
				+/** Broadphase Scene/Proxy Handles (C-API)*/

			
 
				+	PL_DECLARE_HANDLE(plCollisionBroadphaseHandle);

			
 
				+	PL_DECLARE_HANDLE(plBroadphaseProxyHandle);

			
 
				+	PL_DECLARE_HANDLE(plCollisionWorldHandle);

			
 
				+

			
 
				+/**

			
 
				+	Create and Delete a Physics SDK	

			
 
				+*/

			
 
				+

			
 
				+	extern	plPhysicsSdkHandle	plNewBulletSdk(void); //this could be also another sdk, like ODE, PhysX etc.

			
 
				+	extern	void		plDeletePhysicsSdk(plPhysicsSdkHandle	physicsSdk);

			
 
				+

			
 
				+/** Collision World, not strictly necessary, you can also just create a Dynamics World with Rigid Bodies which internally manages the Collision World with Collision Objects */

			
 
				+

			
 
				+	typedef void(*btBroadphaseCallback)(void* clientData, void* object1,void* object2);

			
 
				+

			
 
				+	extern plCollisionBroadphaseHandle	plCreateSapBroadphase(btBroadphaseCallback beginCallback,btBroadphaseCallback endCallback);

			
 
				+

			
 
				+	extern void	plDestroyBroadphase(plCollisionBroadphaseHandle bp);

			
 
				+

			
 
				+	extern 	plBroadphaseProxyHandle plCreateProxy(plCollisionBroadphaseHandle bp, void* clientData, plReal minX,plReal minY,plReal minZ, plReal maxX,plReal maxY, plReal maxZ);

			
 
				+

			
 
				+	extern void plDestroyProxy(plCollisionBroadphaseHandle bp, plBroadphaseProxyHandle proxyHandle);

			
 
				+

			
 
				+	extern void plSetBoundingBox(plBroadphaseProxyHandle proxyHandle, plReal minX,plReal minY,plReal minZ, plReal maxX,plReal maxY, plReal maxZ);

			
 
				+

			
 
				+/* todo: add pair cache support with queries like add/remove/find pair */

			
 
				+	

			
 
				+	extern plCollisionWorldHandle plCreateCollisionWorld(plPhysicsSdkHandle physicsSdk);

			
 
				+

			
 
				+/* todo: add/remove objects */

			
 
				+	

			
 
				+

			
 
				+/* Dynamics World */

			
 
				+

			
 
				+	extern  plDynamicsWorldHandle plCreateDynamicsWorld(plPhysicsSdkHandle physicsSdk);

			
 
				+

			
 
				+	extern  void           plDeleteDynamicsWorld(plDynamicsWorldHandle world);

			
 
				+

			
 
				+	extern	void	plStepSimulation(plDynamicsWorldHandle,	plReal	timeStep);

			
 
				+

			
 
				+	extern  void plAddRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object);

			
 
				+

			
 
				+	extern  void plRemoveRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object);

			
 
				+

			
 
				+

			
 
				+/* Rigid Body  */

			
 
				+

			
 
				+	extern  plRigidBodyHandle plCreateRigidBody(	void* user_data,  float mass, plCollisionShapeHandle cshape );

			
 
				+

			
 
				+	extern  void plDeleteRigidBody(plRigidBodyHandle body);

			
 
				+

			
 
				+

			
 
				+/* Collision Shape definition */

			
 
				+

			
 
				+	extern  plCollisionShapeHandle plNewSphereShape(plReal radius);

			
 
				+	extern  plCollisionShapeHandle plNewBoxShape(plReal x, plReal y, plReal z);

			
 
				+	extern  plCollisionShapeHandle plNewCapsuleShape(plReal radius, plReal height);	

			
 
				+	extern  plCollisionShapeHandle plNewConeShape(plReal radius, plReal height);

			
 
				+	extern  plCollisionShapeHandle plNewCylinderShape(plReal radius, plReal height);

			
 
				+	extern	plCollisionShapeHandle plNewCompoundShape(void);

			
 
				+	extern	void	plAddChildShape(plCollisionShapeHandle compoundShape,plCollisionShapeHandle childShape, plVector3 childPos,plQuaternion childOrn);

			
 
				+

			
 
				+	extern  void plDeleteShape(plCollisionShapeHandle shape);

			
 
				+

			
 
				+	/* Convex Meshes */

			
 
				+	extern  plCollisionShapeHandle plNewConvexHullShape(void);

			
 
				+	extern  void		plAddVertex(plCollisionShapeHandle convexHull, plReal x,plReal y,plReal z);

			
 
				+/* Concave static triangle meshes */

			
 
				+	extern  plMeshInterfaceHandle		   plNewMeshInterface(void);

			
 
				+	extern  void		plAddTriangle(plMeshInterfaceHandle meshHandle, plVector3 v0,plVector3 v1,plVector3 v2);

			
 
				+	extern  plCollisionShapeHandle plNewStaticTriangleMeshShape(plMeshInterfaceHandle);

			
 
				+

			
 
				+	extern  void plSetScaling(plCollisionShapeHandle shape, plVector3 scaling);

			
 
				+

			
 
				+/* SOLID has Response Callback/Table/Management */

			
 
				+/* PhysX has Triggers, User Callbacks and filtering */

			
 
				+/* ODE has the typedef void dNearCallback (void *data, dGeomID o1, dGeomID o2); */

			
 
				+

			
 
				+/*	typedef void plUpdatedPositionCallback(void* userData, plRigidBodyHandle	rbHandle, plVector3 pos); */

			
 
				+/*	typedef void plUpdatedOrientationCallback(void* userData, plRigidBodyHandle	rbHandle, plQuaternion orientation); */

			
 
				+

			
 
				+	/* get world transform */

			
 
				+	extern void	plGetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix);

			
 
				+	extern void	plGetPosition(plRigidBodyHandle object,plVector3 position);

			
 
				+	extern void plGetOrientation(plRigidBodyHandle object,plQuaternion orientation);

			
 
				+

			
 
				+	/* set world transform (position/orientation) */

			
 
				+	extern  void plSetPosition(plRigidBodyHandle object, const plVector3 position);

			
 
				+	extern  void plSetOrientation(plRigidBodyHandle object, const plQuaternion orientation);

			
 
				+	extern	void plSetEuler(plReal yaw,plReal pitch,plReal roll, plQuaternion orient);

			
 
				+	extern	void plSetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix);

			
 
				+

			
 
				+	typedef struct plRayCastResult {

			
 
				+		plRigidBodyHandle		m_body;  

			
 
				+		plCollisionShapeHandle	m_shape; 		

			
 
				+		plVector3				m_positionWorld; 		

			
 
				+		plVector3				m_normalWorld;

			
 
				+	} plRayCastResult;

			
 
				+

			
 
				+	extern  int plRayCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plRayCastResult res);

			
 
				+

			
 
				+	/* Sweep API */

			
 
				+

			
 
				+	/* extern  plRigidBodyHandle plObjectCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plVector3 hitpoint, plVector3 normal); */

			
 
				+

			
 
				+	/* Continuous Collision Detection API */

			
 
				+	

			
 
				+	// needed for source/blender/blenkernel/intern/collision.c

			
 
				+	double plNearestPoints(float p1[3], float p2[3], float p3[3], float q1[3], float q2[3], float q3[3], float *pa, float *pb, float normal[3]);

			
 
				+

			
 
				+#ifdef __cplusplus

			
 
				+}

			
 
				+#endif

			
 
				+

			
 
				+

			
 
				+#endif //BULLET_C_API_H

			
 
				+

			
--- a/include/Bullet2FileLoader/autogenerated/bullet2.h
+++ b/include/Bullet2FileLoader/autogenerated/bullet2.h
@@ -0,0 +1,1053 @@
 
				+/* Copyright (C) 2011 Erwin Coumans & Charlie C
			
 
				+*
			
 
				+* This software is provided 'as-is', without any express or implied
			
 
				+* warranty.  In no event will the authors be held liable for any damages
			
 
				+* arising from the use of this software.
			
 
				+*
			
 
				+* Permission is granted to anyone to use this software for any purpose,
			
 
				+* including commercial applications, and to alter it and redistribute it
			
 
				+* freely, subject to the following restrictions:
			
 
				+*
			
 
				+* 1. The origin of this software must not be misrepresented; you must not
			
 
				+*    claim that you wrote the original software. If you use this software
			
 
				+*    in a product, an acknowledgment in the product documentation would be
			
 
				+*    appreciated but is not required.
			
 
				+* 2. Altered source versions must be plainly marked as such, and must not be
			
 
				+*    misrepresented as being the original software.
			
 
				+* 3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+// Auto generated from Bullet/Extras/HeaderGenerator/bulletGenerate.py
			
 
				+#ifndef __BULLET_H__
			
 
				+#define __BULLET_H__
			
 
				+namespace Bullet3SerializeBullet2 {
			
 
				+
			
 
				+// put an empty struct in the case
			
 
				+typedef struct bInvalidHandle {
			
 
				+	int unused;
			
 
				+}bInvalidHandle;
			
 
				+
			
 
				+    class PointerArray;
			
 
				+    class b3PhysicsSystem;
			
 
				+    class ListBase;
			
 
				+    class b3Vector3FloatData;
			
 
				+    class b3Vector3DoubleData;
			
 
				+    class b3Matrix3x3FloatData;
			
 
				+    class b3Matrix3x3DoubleData;
			
 
				+    class b3TransformFloatData;
			
 
				+    class b3TransformDoubleData;
			
 
				+    class b3BvhSubtreeInfoData;
			
 
				+    class b3OptimizedBvhNodeFloatData;
			
 
				+    class b3OptimizedBvhNodeDoubleData;
			
 
				+    class b3QuantizedBvhNodeData;
			
 
				+    class b3QuantizedBvhFloatData;
			
 
				+    class b3QuantizedBvhDoubleData;
			
 
				+    class b3CollisionShapeData;
			
 
				+    class b3StaticPlaneShapeData;
			
 
				+    class b3ConvexInternalShapeData;
			
 
				+    class b3PositionAndRadius;
			
 
				+    class b3MultiSphereShapeData;
			
 
				+    class b3IntIndexData;
			
 
				+    class b3ShortIntIndexData;
			
 
				+    class b3ShortIntIndexTripletData;
			
 
				+    class b3CharIndexTripletData;
			
 
				+    class b3MeshPartData;
			
 
				+    class b3StridingMeshInterfaceData;
			
 
				+    class b3TriangleMeshShapeData;
			
 
				+    class b3ScaledTriangleMeshShapeData;
			
 
				+    class b3CompoundShapeChildData;
			
 
				+    class b3CompoundShapeData;
			
 
				+    class b3CylinderShapeData;
			
 
				+    class b3CapsuleShapeData;
			
 
				+    class b3TriangleInfoData;
			
 
				+    class b3TriangleInfoMapData;
			
 
				+    class b3GImpactMeshShapeData;
			
 
				+    class b3ConvexHullShapeData;
			
 
				+    class b3CollisionObjectDoubleData;
			
 
				+    class b3CollisionObjectFloatData;
			
 
				+    class b3DynamicsWorldDoubleData;
			
 
				+    class b3DynamicsWorldFloatData;
			
 
				+    class b3RigidBodyFloatData;
			
 
				+    class b3RigidBodyDoubleData;
			
 
				+    class b3ConstraintInfo1;
			
 
				+    class b3TypedConstraintData;
			
 
				+    class b3Point2PointConstraintFloatData;
			
 
				+    class b3Point2PointConstraintDoubleData;
			
 
				+    class b3HingeConstraintDoubleData;
			
 
				+    class b3HingeConstraintFloatData;
			
 
				+    class b3ConeTwistConstraintData;
			
 
				+    class b3Generic6DofConstraintData;
			
 
				+    class b3Generic6DofSpringConstraintData;
			
 
				+    class b3SliderConstraintData;
			
 
				+    class b3ContactSolverInfoDoubleData;
			
 
				+    class b3ContactSolverInfoFloatData;
			
 
				+    class SoftBodyMaterialData;
			
 
				+    class SoftBodyNodeData;
			
 
				+    class SoftBodyLinkData;
			
 
				+    class SoftBodyFaceData;
			
 
				+    class SoftBodyTetraData;
			
 
				+    class SoftRigidAnchorData;
			
 
				+    class SoftBodyConfigData;
			
 
				+    class SoftBodyPoseData;
			
 
				+    class SoftBodyClusterData;
			
 
				+    class b3SoftBodyJointData;
			
 
				+    class b3SoftBodyFloatData;
			
 
				+// -------------------------------------------------- //
			
 
				+    class PointerArray
			
 
				+    {
			
 
				+    public:
			
 
				+        int m_size;
			
 
				+        int m_capacity;
			
 
				+        void *m_data;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3PhysicsSystem
			
 
				+    {
			
 
				+    public:
			
 
				+        PointerArray m_collisionShapes;
			
 
				+        PointerArray m_collisionObjects;
			
 
				+        PointerArray m_constraints;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class ListBase
			
 
				+    {
			
 
				+    public:
			
 
				+        void *first;
			
 
				+        void *last;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Vector3FloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        float m_floats[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Vector3DoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        double m_floats[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Matrix3x3FloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData m_el[3];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Matrix3x3DoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3DoubleData m_el[3];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3TransformFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Matrix3x3FloatData m_basis;
			
 
				+        b3Vector3FloatData m_origin;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3TransformDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Matrix3x3DoubleData m_basis;
			
 
				+        b3Vector3DoubleData m_origin;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3BvhSubtreeInfoData
			
 
				+    {
			
 
				+    public:
			
 
				+        int m_rootNodeIndex;
			
 
				+        int m_subtreeSize;
			
 
				+        short m_quantizedAabbMin[3];
			
 
				+        short m_quantizedAabbMax[3];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3OptimizedBvhNodeFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData m_aabbMinOrg;
			
 
				+        b3Vector3FloatData m_aabbMaxOrg;
			
 
				+        int m_escapeIndex;
			
 
				+        int m_subPart;
			
 
				+        int m_triangleIndex;
			
 
				+        char m_pad[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3OptimizedBvhNodeDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3DoubleData m_aabbMinOrg;
			
 
				+        b3Vector3DoubleData m_aabbMaxOrg;
			
 
				+        int m_escapeIndex;
			
 
				+        int m_subPart;
			
 
				+        int m_triangleIndex;
			
 
				+        char m_pad[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3QuantizedBvhNodeData
			
 
				+    {
			
 
				+    public:
			
 
				+        short m_quantizedAabbMin[3];
			
 
				+        short m_quantizedAabbMax[3];
			
 
				+        int m_escapeIndexOrTriangleIndex;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3QuantizedBvhFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData m_bvhAabbMin;
			
 
				+        b3Vector3FloatData m_bvhAabbMax;
			
 
				+        b3Vector3FloatData m_bvhQuantization;
			
 
				+        int m_curNodeIndex;
			
 
				+        int m_useQuantization;
			
 
				+        int m_numContiguousLeafNodes;
			
 
				+        int m_numQuantizedContiguousNodes;
			
 
				+        b3OptimizedBvhNodeFloatData *m_contiguousNodesPtr;
			
 
				+        b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr;
			
 
				+        b3BvhSubtreeInfoData *m_subTreeInfoPtr;
			
 
				+        int m_traversalMode;
			
 
				+        int m_numSubtreeHeaders;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3QuantizedBvhDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3DoubleData m_bvhAabbMin;
			
 
				+        b3Vector3DoubleData m_bvhAabbMax;
			
 
				+        b3Vector3DoubleData m_bvhQuantization;
			
 
				+        int m_curNodeIndex;
			
 
				+        int m_useQuantization;
			
 
				+        int m_numContiguousLeafNodes;
			
 
				+        int m_numQuantizedContiguousNodes;
			
 
				+        b3OptimizedBvhNodeDoubleData *m_contiguousNodesPtr;
			
 
				+        b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr;
			
 
				+        int m_traversalMode;
			
 
				+        int m_numSubtreeHeaders;
			
 
				+        b3BvhSubtreeInfoData *m_subTreeInfoPtr;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CollisionShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        char *m_name;
			
 
				+        int m_shapeType;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3StaticPlaneShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionShapeData m_collisionShapeData;
			
 
				+        b3Vector3FloatData m_localScaling;
			
 
				+        b3Vector3FloatData m_planeNormal;
			
 
				+        float m_planeConstant;
			
 
				+        char m_pad[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ConvexInternalShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionShapeData m_collisionShapeData;
			
 
				+        b3Vector3FloatData m_localScaling;
			
 
				+        b3Vector3FloatData m_implicitShapeDimensions;
			
 
				+        float m_collisionMargin;
			
 
				+        int m_padding;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3PositionAndRadius
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData m_pos;
			
 
				+        float m_radius;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3MultiSphereShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3ConvexInternalShapeData m_convexInternalShapeData;
			
 
				+        b3PositionAndRadius *m_localPositionArrayPtr;
			
 
				+        int m_localPositionArraySize;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3IntIndexData
			
 
				+    {
			
 
				+    public:
			
 
				+        int m_value;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ShortIntIndexData
			
 
				+    {
			
 
				+    public:
			
 
				+        short m_value;
			
 
				+        char m_pad[2];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ShortIntIndexTripletData
			
 
				+    {
			
 
				+    public:
			
 
				+        short m_values[3];
			
 
				+        char m_pad[2];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CharIndexTripletData
			
 
				+    {
			
 
				+    public:
			
 
				+        char m_values[3];
			
 
				+        char m_pad;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3MeshPartData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData *m_vertices3f;
			
 
				+        b3Vector3DoubleData *m_vertices3d;
			
 
				+        b3IntIndexData *m_indices32;
			
 
				+        b3ShortIntIndexTripletData *m_3indices16;
			
 
				+        b3CharIndexTripletData *m_3indices8;
			
 
				+        b3ShortIntIndexData *m_indices16;
			
 
				+        int m_numTriangles;
			
 
				+        int m_numVertices;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3StridingMeshInterfaceData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3MeshPartData *m_meshPartsPtr;
			
 
				+        b3Vector3FloatData m_scaling;
			
 
				+        int m_numMeshParts;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3TriangleMeshShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionShapeData m_collisionShapeData;
			
 
				+        b3StridingMeshInterfaceData m_meshInterface;
			
 
				+        b3QuantizedBvhFloatData *m_quantizedFloatBvh;
			
 
				+        b3QuantizedBvhDoubleData *m_quantizedDoubleBvh;
			
 
				+        b3TriangleInfoMapData *m_triangleInfoMap;
			
 
				+        float m_collisionMargin;
			
 
				+        char m_pad3[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ScaledTriangleMeshShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TriangleMeshShapeData m_trimeshShapeData;
			
 
				+        b3Vector3FloatData m_localScaling;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CompoundShapeChildData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TransformFloatData m_transform;
			
 
				+        b3CollisionShapeData *m_childShape;
			
 
				+        int m_childShapeType;
			
 
				+        float m_childMargin;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CompoundShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionShapeData m_collisionShapeData;
			
 
				+        b3CompoundShapeChildData *m_childShapePtr;
			
 
				+        int m_numChildShapes;
			
 
				+        float m_collisionMargin;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CylinderShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3ConvexInternalShapeData m_convexInternalShapeData;
			
 
				+        int m_upAxis;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CapsuleShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3ConvexInternalShapeData m_convexInternalShapeData;
			
 
				+        int m_upAxis;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3TriangleInfoData
			
 
				+    {
			
 
				+    public:
			
 
				+        int m_flags;
			
 
				+        float m_edgeV0V1Angle;
			
 
				+        float m_edgeV1V2Angle;
			
 
				+        float m_edgeV2V0Angle;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3TriangleInfoMapData
			
 
				+    {
			
 
				+    public:
			
 
				+        int *m_hashTablePtr;
			
 
				+        int *m_nextPtr;
			
 
				+        b3TriangleInfoData *m_valueArrayPtr;
			
 
				+        int *m_keyArrayPtr;
			
 
				+        float m_convexEpsilon;
			
 
				+        float m_planarEpsilon;
			
 
				+        float m_equalVertexThreshold;
			
 
				+        float m_edgeDistanceThreshold;
			
 
				+        float m_zeroAreaThreshold;
			
 
				+        int m_nextSize;
			
 
				+        int m_hashTableSize;
			
 
				+        int m_numValues;
			
 
				+        int m_numKeys;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3GImpactMeshShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionShapeData m_collisionShapeData;
			
 
				+        b3StridingMeshInterfaceData m_meshInterface;
			
 
				+        b3Vector3FloatData m_localScaling;
			
 
				+        float m_collisionMargin;
			
 
				+        int m_gimpactSubType;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ConvexHullShapeData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3ConvexInternalShapeData m_convexInternalShapeData;
			
 
				+        b3Vector3FloatData *m_unscaledPointsFloatPtr;
			
 
				+        b3Vector3DoubleData *m_unscaledPointsDoublePtr;
			
 
				+        int m_numUnscaledPoints;
			
 
				+        char m_padding3[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CollisionObjectDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        void *m_broadphaseHandle;
			
 
				+        void *m_collisionShape;
			
 
				+        b3CollisionShapeData *m_rootCollisionShape;
			
 
				+        char *m_name;
			
 
				+        b3TransformDoubleData m_worldTransform;
			
 
				+        b3TransformDoubleData m_interpolationWorldTransform;
			
 
				+        b3Vector3DoubleData m_interpolationLinearVelocity;
			
 
				+        b3Vector3DoubleData m_interpolationAngularVelocity;
			
 
				+        b3Vector3DoubleData m_anisotropicFriction;
			
 
				+        double m_contactProcessingThreshold;
			
 
				+        double m_deactivationTime;
			
 
				+        double m_friction;
			
 
				+        double m_rollingFriction;
			
 
				+        double m_restitution;
			
 
				+        double m_hitFraction;
			
 
				+        double m_ccdSweptSphereRadius;
			
 
				+        double m_ccdMotionThreshold;
			
 
				+        int m_hasAnisotropicFriction;
			
 
				+        int m_collisionFlags;
			
 
				+        int m_islandTag1;
			
 
				+        int m_companionId;
			
 
				+        int m_activationState1;
			
 
				+        int m_internalType;
			
 
				+        int m_checkCollideWith;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3CollisionObjectFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        void *m_broadphaseHandle;
			
 
				+        void *m_collisionShape;
			
 
				+        b3CollisionShapeData *m_rootCollisionShape;
			
 
				+        char *m_name;
			
 
				+        b3TransformFloatData m_worldTransform;
			
 
				+        b3TransformFloatData m_interpolationWorldTransform;
			
 
				+        b3Vector3FloatData m_interpolationLinearVelocity;
			
 
				+        b3Vector3FloatData m_interpolationAngularVelocity;
			
 
				+        b3Vector3FloatData m_anisotropicFriction;
			
 
				+        float m_contactProcessingThreshold;
			
 
				+        float m_deactivationTime;
			
 
				+        float m_friction;
			
 
				+        float m_rollingFriction;
			
 
				+        float m_restitution;
			
 
				+        float m_hitFraction;
			
 
				+        float m_ccdSweptSphereRadius;
			
 
				+        float m_ccdMotionThreshold;
			
 
				+        int m_hasAnisotropicFriction;
			
 
				+        int m_collisionFlags;
			
 
				+        int m_islandTag1;
			
 
				+        int m_companionId;
			
 
				+        int m_activationState1;
			
 
				+        int m_internalType;
			
 
				+        int m_checkCollideWith;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3RigidBodyFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionObjectFloatData m_collisionObjectData;
			
 
				+        b3Matrix3x3FloatData m_invInertiaTensorWorld;
			
 
				+        b3Vector3FloatData m_linearVelocity;
			
 
				+        b3Vector3FloatData m_angularVelocity;
			
 
				+        b3Vector3FloatData m_angularFactor;
			
 
				+        b3Vector3FloatData m_linearFactor;
			
 
				+        b3Vector3FloatData m_gravity;
			
 
				+        b3Vector3FloatData m_gravity_acceleration;
			
 
				+        b3Vector3FloatData m_invInertiaLocal;
			
 
				+        b3Vector3FloatData m_totalForce;
			
 
				+        b3Vector3FloatData m_totalTorque;
			
 
				+        float m_inverseMass;
			
 
				+        float m_linearDamping;
			
 
				+        float m_angularDamping;
			
 
				+        float m_additionalDampingFactor;
			
 
				+        float m_additionalLinearDampingThresholdSqr;
			
 
				+        float m_additionalAngularDampingThresholdSqr;
			
 
				+        float m_additionalAngularDampingFactor;
			
 
				+        float m_linearSleepingThreshold;
			
 
				+        float m_angularSleepingThreshold;
			
 
				+        int m_additionalDamping;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3RigidBodyDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionObjectDoubleData m_collisionObjectData;
			
 
				+        b3Matrix3x3DoubleData m_invInertiaTensorWorld;
			
 
				+        b3Vector3DoubleData m_linearVelocity;
			
 
				+        b3Vector3DoubleData m_angularVelocity;
			
 
				+        b3Vector3DoubleData m_angularFactor;
			
 
				+        b3Vector3DoubleData m_linearFactor;
			
 
				+        b3Vector3DoubleData m_gravity;
			
 
				+        b3Vector3DoubleData m_gravity_acceleration;
			
 
				+        b3Vector3DoubleData m_invInertiaLocal;
			
 
				+        b3Vector3DoubleData m_totalForce;
			
 
				+        b3Vector3DoubleData m_totalTorque;
			
 
				+        double m_inverseMass;
			
 
				+        double m_linearDamping;
			
 
				+        double m_angularDamping;
			
 
				+        double m_additionalDampingFactor;
			
 
				+        double m_additionalLinearDampingThresholdSqr;
			
 
				+        double m_additionalAngularDampingThresholdSqr;
			
 
				+        double m_additionalAngularDampingFactor;
			
 
				+        double m_linearSleepingThreshold;
			
 
				+        double m_angularSleepingThreshold;
			
 
				+        int m_additionalDamping;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ConstraintInfo1
			
 
				+    {
			
 
				+    public:
			
 
				+        int m_numConstraintRows;
			
 
				+        int nub;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3TypedConstraintData
			
 
				+    {
			
 
				+    public:
			
 
				+        bInvalidHandle *m_rbA;
			
 
				+        bInvalidHandle *m_rbB;
			
 
				+        char *m_name;
			
 
				+        int m_objectType;
			
 
				+        int m_userConstraintType;
			
 
				+        int m_userConstraintId;
			
 
				+        int m_needsFeedback;
			
 
				+        float m_appliedImpulse;
			
 
				+        float m_dbgDrawSize;
			
 
				+        int m_disableCollisionsBetweenLinkedBodies;
			
 
				+        int m_overrideNumSolverIterations;
			
 
				+        float m_breakingImpulseThreshold;
			
 
				+        int m_isEnabled;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Point2PointConstraintFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3Vector3FloatData m_pivotInA;
			
 
				+        b3Vector3FloatData m_pivotInB;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Point2PointConstraintDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3Vector3DoubleData m_pivotInA;
			
 
				+        b3Vector3DoubleData m_pivotInB;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3HingeConstraintDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3TransformDoubleData m_rbAFrame;
			
 
				+        b3TransformDoubleData m_rbBFrame;
			
 
				+        int m_useReferenceFrameA;
			
 
				+        int m_angularOnly;
			
 
				+        int m_enableAngularMotor;
			
 
				+        float m_motorTargetVelocity;
			
 
				+        float m_maxMotorImpulse;
			
 
				+        float m_lowerLimit;
			
 
				+        float m_upperLimit;
			
 
				+        float m_limitSoftness;
			
 
				+        float m_biasFactor;
			
 
				+        float m_relaxationFactor;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3HingeConstraintFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3TransformFloatData m_rbAFrame;
			
 
				+        b3TransformFloatData m_rbBFrame;
			
 
				+        int m_useReferenceFrameA;
			
 
				+        int m_angularOnly;
			
 
				+        int m_enableAngularMotor;
			
 
				+        float m_motorTargetVelocity;
			
 
				+        float m_maxMotorImpulse;
			
 
				+        float m_lowerLimit;
			
 
				+        float m_upperLimit;
			
 
				+        float m_limitSoftness;
			
 
				+        float m_biasFactor;
			
 
				+        float m_relaxationFactor;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ConeTwistConstraintData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3TransformFloatData m_rbAFrame;
			
 
				+        b3TransformFloatData m_rbBFrame;
			
 
				+        float m_swingSpan1;
			
 
				+        float m_swingSpan2;
			
 
				+        float m_twistSpan;
			
 
				+        float m_limitSoftness;
			
 
				+        float m_biasFactor;
			
 
				+        float m_relaxationFactor;
			
 
				+        float m_damping;
			
 
				+        char m_pad[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Generic6DofConstraintData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3TransformFloatData m_rbAFrame;
			
 
				+        b3TransformFloatData m_rbBFrame;
			
 
				+        b3Vector3FloatData m_linearUpperLimit;
			
 
				+        b3Vector3FloatData m_linearLowerLimit;
			
 
				+        b3Vector3FloatData m_angularUpperLimit;
			
 
				+        b3Vector3FloatData m_angularLowerLimit;
			
 
				+        int m_useLinearReferenceFrameA;
			
 
				+        int m_useOffsetForConstraintFrame;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3Generic6DofSpringConstraintData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Generic6DofConstraintData m_6dofData;
			
 
				+        int m_springEnabled[6];
			
 
				+        float m_equilibriumPoint[6];
			
 
				+        float m_springStiffness[6];
			
 
				+        float m_springDamping[6];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3SliderConstraintData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TypedConstraintData m_typeConstraintData;
			
 
				+        b3TransformFloatData m_rbAFrame;
			
 
				+        b3TransformFloatData m_rbBFrame;
			
 
				+        float m_linearUpperLimit;
			
 
				+        float m_linearLowerLimit;
			
 
				+        float m_angularUpperLimit;
			
 
				+        float m_angularLowerLimit;
			
 
				+        int m_useLinearReferenceFrameA;
			
 
				+        int m_useOffsetForConstraintFrame;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ContactSolverInfoDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        double m_tau;
			
 
				+        double m_damping;
			
 
				+        double m_friction;
			
 
				+        double m_timeStep;
			
 
				+        double m_restitution;
			
 
				+        double m_maxErrorReduction;
			
 
				+        double m_sor;
			
 
				+        double m_erp;
			
 
				+        double m_erp2;
			
 
				+        double m_globalCfm;
			
 
				+        double m_splitImpulsePenetrationThreshold;
			
 
				+        double m_splitImpulseTurnErp;
			
 
				+        double m_linearSlop;
			
 
				+        double m_warmstartingFactor;
			
 
				+        double m_maxGyroscopicForce;
			
 
				+        double m_singleAxisRollingFrictionThreshold;
			
 
				+        int m_numIterations;
			
 
				+        int m_solverMode;
			
 
				+        int m_restingContactRestitutionThreshold;
			
 
				+        int m_minimumSolverBatchSize;
			
 
				+        int m_splitImpulse;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3ContactSolverInfoFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        float m_tau;
			
 
				+        float m_damping;
			
 
				+        float m_friction;
			
 
				+        float m_timeStep;
			
 
				+        float m_restitution;
			
 
				+        float m_maxErrorReduction;
			
 
				+        float m_sor;
			
 
				+        float m_erp;
			
 
				+        float m_erp2;
			
 
				+        float m_globalCfm;
			
 
				+        float m_splitImpulsePenetrationThreshold;
			
 
				+        float m_splitImpulseTurnErp;
			
 
				+        float m_linearSlop;
			
 
				+        float m_warmstartingFactor;
			
 
				+        float m_maxGyroscopicForce;
			
 
				+        float m_singleAxisRollingFrictionThreshold;
			
 
				+        int m_numIterations;
			
 
				+        int m_solverMode;
			
 
				+        int m_restingContactRestitutionThreshold;
			
 
				+        int m_minimumSolverBatchSize;
			
 
				+        int m_splitImpulse;
			
 
				+        char m_padding[4];
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+	// -------------------------------------------------- //
			
 
				+    class b3DynamicsWorldDoubleData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3ContactSolverInfoDoubleData m_solverInfo;
			
 
				+        b3Vector3DoubleData m_gravity;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3DynamicsWorldFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3ContactSolverInfoFloatData m_solverInfo;
			
 
				+        b3Vector3FloatData m_gravity;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyMaterialData
			
 
				+    {
			
 
				+    public:
			
 
				+        float m_linearStiffness;
			
 
				+        float m_angularStiffness;
			
 
				+        float m_volumeStiffness;
			
 
				+        int m_flags;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyNodeData
			
 
				+    {
			
 
				+    public:
			
 
				+        SoftBodyMaterialData *m_material;
			
 
				+        b3Vector3FloatData m_position;
			
 
				+        b3Vector3FloatData m_previousPosition;
			
 
				+        b3Vector3FloatData m_velocity;
			
 
				+        b3Vector3FloatData m_accumulatedForce;
			
 
				+        b3Vector3FloatData m_normal;
			
 
				+        float m_inverseMass;
			
 
				+        float m_area;
			
 
				+        int m_attach;
			
 
				+        int m_pad;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyLinkData
			
 
				+    {
			
 
				+    public:
			
 
				+        SoftBodyMaterialData *m_material;
			
 
				+        int m_nodeIndices[2];
			
 
				+        float m_restLength;
			
 
				+        int m_bbending;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyFaceData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData m_normal;
			
 
				+        SoftBodyMaterialData *m_material;
			
 
				+        int m_nodeIndices[3];
			
 
				+        float m_restArea;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyTetraData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Vector3FloatData m_c0[4];
			
 
				+        SoftBodyMaterialData *m_material;
			
 
				+        int m_nodeIndices[4];
			
 
				+        float m_restVolume;
			
 
				+        float m_c1;
			
 
				+        float m_c2;
			
 
				+        int m_pad;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftRigidAnchorData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Matrix3x3FloatData m_c0;
			
 
				+        b3Vector3FloatData m_c1;
			
 
				+        b3Vector3FloatData m_localFrame;
			
 
				+        bInvalidHandle *m_rigidBody;
			
 
				+        int m_nodeIndex;
			
 
				+        float m_c2;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyConfigData
			
 
				+    {
			
 
				+    public:
			
 
				+        int m_aeroModel;
			
 
				+        float m_baumgarte;
			
 
				+        float m_damping;
			
 
				+        float m_drag;
			
 
				+        float m_lift;
			
 
				+        float m_pressure;
			
 
				+        float m_volume;
			
 
				+        float m_dynamicFriction;
			
 
				+        float m_poseMatch;
			
 
				+        float m_rigidContactHardness;
			
 
				+        float m_kineticContactHardness;
			
 
				+        float m_softContactHardness;
			
 
				+        float m_anchorHardness;
			
 
				+        float m_softRigidClusterHardness;
			
 
				+        float m_softKineticClusterHardness;
			
 
				+        float m_softSoftClusterHardness;
			
 
				+        float m_softRigidClusterImpulseSplit;
			
 
				+        float m_softKineticClusterImpulseSplit;
			
 
				+        float m_softSoftClusterImpulseSplit;
			
 
				+        float m_maxVolume;
			
 
				+        float m_timeScale;
			
 
				+        int m_velocityIterations;
			
 
				+        int m_positionIterations;
			
 
				+        int m_driftIterations;
			
 
				+        int m_clusterIterations;
			
 
				+        int m_collisionFlags;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyPoseData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3Matrix3x3FloatData m_rot;
			
 
				+        b3Matrix3x3FloatData m_scale;
			
 
				+        b3Matrix3x3FloatData m_aqq;
			
 
				+        b3Vector3FloatData m_com;
			
 
				+        b3Vector3FloatData *m_positions;
			
 
				+        float *m_weights;
			
 
				+        int m_numPositions;
			
 
				+        int m_numWeigts;
			
 
				+        int m_bvolume;
			
 
				+        int m_bframe;
			
 
				+        float m_restVolume;
			
 
				+        int m_pad;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class SoftBodyClusterData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3TransformFloatData m_framexform;
			
 
				+        b3Matrix3x3FloatData m_locii;
			
 
				+        b3Matrix3x3FloatData m_invwi;
			
 
				+        b3Vector3FloatData m_com;
			
 
				+        b3Vector3FloatData m_vimpulses[2];
			
 
				+        b3Vector3FloatData m_dimpulses[2];
			
 
				+        b3Vector3FloatData m_lv;
			
 
				+        b3Vector3FloatData m_av;
			
 
				+        b3Vector3FloatData *m_framerefs;
			
 
				+        int *m_nodeIndices;
			
 
				+        float *m_masses;
			
 
				+        int m_numFrameRefs;
			
 
				+        int m_numNodes;
			
 
				+        int m_numMasses;
			
 
				+        float m_idmass;
			
 
				+        float m_imass;
			
 
				+        int m_nvimpulses;
			
 
				+        int m_ndimpulses;
			
 
				+        float m_ndamping;
			
 
				+        float m_ldamping;
			
 
				+        float m_adamping;
			
 
				+        float m_matching;
			
 
				+        float m_maxSelfCollisionImpulse;
			
 
				+        float m_selfCollisionImpulseFactor;
			
 
				+        int m_containsAnchor;
			
 
				+        int m_collide;
			
 
				+        int m_clusterIndex;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3SoftBodyJointData
			
 
				+    {
			
 
				+    public:
			
 
				+        void *m_bodyA;
			
 
				+        void *m_bodyB;
			
 
				+        b3Vector3FloatData m_refs[2];
			
 
				+        float m_cfm;
			
 
				+        float m_erp;
			
 
				+        float m_split;
			
 
				+        int m_delete;
			
 
				+        b3Vector3FloatData m_relPosition[2];
			
 
				+        int m_bodyAtype;
			
 
				+        int m_bodyBtype;
			
 
				+        int m_jointType;
			
 
				+        int m_pad;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+// -------------------------------------------------- //
			
 
				+    class b3SoftBodyFloatData
			
 
				+    {
			
 
				+    public:
			
 
				+        b3CollisionObjectFloatData m_collisionObjectData;
			
 
				+        SoftBodyPoseData *m_pose;
			
 
				+        SoftBodyMaterialData **m_materials;
			
 
				+        SoftBodyNodeData *m_nodes;
			
 
				+        SoftBodyLinkData *m_links;
			
 
				+        SoftBodyFaceData *m_faces;
			
 
				+        SoftBodyTetraData *m_tetrahedra;
			
 
				+        SoftRigidAnchorData *m_anchors;
			
 
				+        SoftBodyClusterData *m_clusters;
			
 
				+        b3SoftBodyJointData *m_joints;
			
 
				+        int m_numMaterials;
			
 
				+        int m_numNodes;
			
 
				+        int m_numLinks;
			
 
				+        int m_numFaces;
			
 
				+        int m_numTetrahedra;
			
 
				+        int m_numAnchors;
			
 
				+        int m_numClusters;
			
 
				+        int m_numJoints;
			
 
				+        SoftBodyConfigData m_config;
			
 
				+    };
			
 
				+
			
 
				+
			
 
				+}
			
 
				+#endif//__BULLET_H__
			
--- a/include/Bullet2FileLoader/b3BulletFile.h
+++ b/include/Bullet2FileLoader/b3BulletFile.h
@@ -0,0 +1,83 @@
 
				+/*
			
 
				+bParse
			
 
				+Copyright (c) 2006-2010 Charlie C & Erwin Coumans  http://gamekit.googlecode.com
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_BULLET_FILE_H
			
 
				+#define B3_BULLET_FILE_H
			
 
				+
			
 
				+
			
 
				+#include "b3File.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "b3Defines.h"
			
 
				+
			
 
				+#include "Bullet3Serialize/Bullet2FileLoader/b3Serializer.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+namespace bParse {
			
 
				+
			
 
				+	// ----------------------------------------------------- //
			
 
				+	class b3BulletFile : public bFile
			
 
				+	{
			
 
				+		
			
 
				+
			
 
				+	protected:
			
 
				+	
			
 
				+		char*	m_DnaCopy;
			
 
				+				
			
 
				+	public:
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_softBodies;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_rigidBodies;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_collisionObjects;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_collisionShapes;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_constraints;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_bvhs;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_triangleInfoMaps;
			
 
				+
			
 
				+		b3AlignedObjectArray<bStructHandle*>	m_dynamicsWorldInfo;
			
 
				+
			
 
				+		b3AlignedObjectArray<char*>				m_dataBlocks;
			
 
				+		b3BulletFile();
			
 
				+
			
 
				+		b3BulletFile(const char* fileName);
			
 
				+
			
 
				+		b3BulletFile(char *memoryBuffer, int len);
			
 
				+
			
 
				+		virtual ~b3BulletFile();
			
 
				+
			
 
				+		virtual	void	addDataBlock(char* dataBlock);
			
 
				+	
			
 
				+
			
 
				+		// experimental
			
 
				+		virtual int		write(const char* fileName, bool fixupPointers=false);
			
 
				+
			
 
				+		virtual	void	parse(int verboseMode);
			
 
				+
			
 
				+		virtual	void parseData();
			
 
				+
			
 
				+		virtual	void	writeDNA(FILE* fp);
			
 
				+
			
 
				+		void	addStruct(const char* structType,void* data, int len, void* oldPtr, int code);
			
 
				+
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+#endif //B3_BULLET_FILE_H
			
--- a/include/Bullet2FileLoader/b3Chunk.h
+++ b/include/Bullet2FileLoader/b3Chunk.h
@@ -0,0 +1,92 @@
 
				+/*
			
 
				+bParse
			
 
				+Copyright (c) 2006-2009 Charlie C & Erwin Coumans  http://gamekit.googlecode.com
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef __BCHUNK_H__
			
 
				+#define __BCHUNK_H__
			
 
				+
			
 
				+#if defined (_WIN32) && ! defined (__MINGW32__)
			
 
				+	#define b3Long64 __int64
			
 
				+#elif defined (__MINGW32__)	
			
 
				+	#include <stdint.h>
			
 
				+	#define b3Long64 int64_t
			
 
				+#else
			
 
				+	#define b3Long64 long long
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+namespace bParse {
			
 
				+
			
 
				+
			
 
				+	// ----------------------------------------------------- //
			
 
				+	class bChunkPtr4
			
 
				+	{
			
 
				+	public:
			
 
				+		bChunkPtr4(){}
			
 
				+		int code;
			
 
				+		int len;
			
 
				+		union
			
 
				+		{
			
 
				+			int m_uniqueInt;
			
 
				+		};
			
 
				+		int dna_nr;
			
 
				+		int nr;
			
 
				+	};
			
 
				+
			
 
				+	// ----------------------------------------------------- //
			
 
				+	class bChunkPtr8
			
 
				+	{
			
 
				+	public:
			
 
				+		bChunkPtr8(){}
			
 
				+		int code,  len;
			
 
				+		union
			
 
				+		{
			
 
				+			b3Long64 oldPrev;
			
 
				+			int	m_uniqueInts[2];
			
 
				+		};
			
 
				+		int dna_nr, nr;
			
 
				+	};
			
 
				+
			
 
				+	// ----------------------------------------------------- //
			
 
				+	class bChunkInd
			
 
				+	{
			
 
				+	public:
			
 
				+		bChunkInd(){}
			
 
				+		int code, len;
			
 
				+		void *oldPtr;
			
 
				+		int dna_nr, nr;
			
 
				+	};
			
 
				+
			
 
				+
			
 
				+	// ----------------------------------------------------- //
			
 
				+	class ChunkUtils
			
 
				+	{
			
 
				+	public:
			
 
				+		
			
 
				+		// file chunk offset
			
 
				+		static int getOffset(int flags);
			
 
				+
			
 
				+		// endian utils
			
 
				+		static short swapShort(short sht);
			
 
				+		static int swapInt(int inte);
			
 
				+		static b3Long64 swapLong64(b3Long64 lng);
			
 
				+
			
 
				+	};
			
 
				+
			
 
				+
			
 
				+	const int CHUNK_HEADER_LEN = ((sizeof(bChunkInd)));
			
 
				+	const bool VOID_IS_8 = ((sizeof(void*)==8));
			
 
				+}
			
 
				+
			
 
				+#endif//__BCHUNK_H__
			
--- a/include/Bullet2FileLoader/b3Common.h
+++ b/include/Bullet2FileLoader/b3Common.h
@@ -0,0 +1,39 @@
 
				+/*
			
 
				+bParse
			
 
				+Copyright (c) 2006-2009 Charlie C & Erwin Coumans  http://gamekit.googlecode.com
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef __BCOMMON_H__
			
 
				+#define __BCOMMON_H__
			
 
				+
			
 
				+
			
 
				+#include <assert.h>
			
 
				+//#include "bLog.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3HashMap.h"
			
 
				+
			
 
				+namespace bParse {
			
 
				+
			
 
				+	class bMain;
			
 
				+	class bFileData;
			
 
				+	class bFile;
			
 
				+	class bDNA;
			
 
				+
			
 
				+	// delete void* undefined
			
 
				+	typedef struct bStructHandle {int unused;}bStructHandle;
			
 
				+	typedef b3AlignedObjectArray<bStructHandle*>	bListBasePtr;
			
 
				+	typedef b3HashMap<b3HashPtr, bStructHandle*> bPtrMap;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif//__BCOMMON_H__
			
--- a/include/Bullet2FileLoader/b3DNA.h
+++ b/include/Bullet2FileLoader/b3DNA.h
@@ -0,0 +1,110 @@
 
				+/*
			
 
				+bParse
			
 
				+Copyright (c) 2006-2009 Charlie C & Erwin Coumans  http://gamekit.googlecode.com
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef __BDNA_H__
			
 
				+#define __BDNA_H__
			
 
				+
			
 
				+
			
 
				+#include "b3Common.h"
			
 
				+
			
 
				+namespace bParse {
			
 
				+
			
 
				+	struct	bNameInfo
			
 
				+	{
			
 
				+		char*	m_name;
			
 
				+		bool	m_isPointer;
			
 
				+		int		m_dim0;
			
 
				+		int		m_dim1;
			
 
				+	};
			
 
				+
			
 
				+	class bDNA
			
 
				+	{
			
 
				+	public:
			
 
				+		bDNA();
			
 
				+		~bDNA();
			
 
				+
			
 
				+		void init(char *data, int len, bool swap=false);
			
 
				+
			
 
				+		int getArraySize(char* str);
			
 
				+		int getArraySizeNew(short name)
			
 
				+		{
			
 
				+			const bNameInfo& nameInfo = m_Names[name];
			
 
				+			return nameInfo.m_dim0*nameInfo.m_dim1;
			
 
				+		}
			
 
				+		int getElementSize(short type, short name)
			
 
				+		{
			
 
				+			const bNameInfo& nameInfo = m_Names[name];
			
 
				+			int size = nameInfo.m_isPointer ? mPtrLen*nameInfo.m_dim0*nameInfo.m_dim1 : mTlens[type]*nameInfo.m_dim0*nameInfo.m_dim1;
			
 
				+			return size;
			
 
				+		}
			
 
				+
			
 
				+		int	getNumNames() const
			
 
				+		{
			
 
				+			return m_Names.size();
			
 
				+		}
			
 
				+
			
 
				+		char *getName(int ind);
			
 
				+		char *getType(int ind);
			
 
				+		short *getStruct(int ind);
			
 
				+		short getLength(int ind);
			
 
				+		int getReverseType(short type);
			
 
				+		int getReverseType(const char *type);
			
 
				+
			
 
				+
			
 
				+		int getNumStructs();
			
 
				+
			
 
				+		//
			
 
				+		bool lessThan(bDNA* other);
			
 
				+
			
 
				+		void initCmpFlags(bDNA *memDNA);
			
 
				+		bool flagNotEqual(int dna_nr);
			
 
				+		bool flagEqual(int dna_nr);
			
 
				+		bool flagNone(int dna_nr);
			
 
				+
			
 
				+
			
 
				+		int getPointerSize();
			
 
				+
			
 
				+		void	dumpTypeDefinitions();
			
 
				+
			
 
				+	
			
 
				+	private:
			
 
				+		enum FileDNAFlags
			
 
				+		{
			
 
				+			FDF_NONE=0,
			
 
				+			FDF_STRUCT_NEQU,
			
 
				+			FDF_STRUCT_EQU
			
 
				+		};
			
 
				+
			
 
				+		void initRecurseCmpFlags(int i);
			
 
				+
			
 
				+		b3AlignedObjectArray<int>			mCMPFlags;
			
 
				+
			
 
				+		b3AlignedObjectArray<bNameInfo>			m_Names;
			
 
				+		b3AlignedObjectArray<char*>			mTypes;
			
 
				+		b3AlignedObjectArray<short*>			mStructs;
			
 
				+		b3AlignedObjectArray<short>			mTlens;
			
 
				+		b3HashMap<b3HashInt, int>			mStructReverse;
			
 
				+		b3HashMap<b3HashString,int>	mTypeLookup;
			
 
				+
			
 
				+		int							mPtrLen;
			
 
				+		
			
 
				+
			
 
				+
			
 
				+
			
 
				+	};
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif//__BDNA_H__
			
--- a/include/Bullet2FileLoader/b3Defines.h
+++ b/include/Bullet2FileLoader/b3Defines.h
@@ -0,0 +1,136 @@
 
				+/* Copyright (C) 2006-2009 Charlie C & Erwin Coumans http://gamekit.googlecode.com
			
 
				+*
			
 
				+* This software is provided 'as-is', without any express or implied
			
 
				+* warranty.  In no event will the authors be held liable for any damages
			
 
				+* arising from the use of this software.
			
 
				+*
			
 
				+* Permission is granted to anyone to use this software for any purpose,
			
 
				+* including commercial applications, and to alter it and redistribute it
			
 
				+* freely, subject to the following restrictions:
			
 
				+*
			
 
				+* 1. The origin of this software must not be misrepresented; you must not
			
 
				+*    claim that you wrote the original software. If you use this software
			
 
				+*    in a product, an acknowledgment in the product documentation would be
			
 
				+*    appreciated but is not required.
			
 
				+* 2. Altered source versions must be plainly marked as such, and must not be
			
 
				+*    misrepresented as being the original software.
			
 
				+* 3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+#ifndef __B_DEFINES_H__
			
 
				+#define __B_DEFINES_H__
			
 
				+
			
 
				+
			
 
				+// MISC defines, see BKE_global.h, BKE_utildefines.h
			
 
				+#define B3_SIZEOFBLENDERHEADER 12
			
 
				+
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#if defined(__sgi) || defined (__sparc) || defined (__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
			
 
				+#	define B3_MAKE_ID(a,b,c,d) ( (int)(a)<<24 | (int)(b)<<16 | (c)<<8 | (d) )
			
 
				+#else
			
 
				+#	define B3_MAKE_ID(a,b,c,d) ( (int)(d)<<24 | (int)(c)<<16 | (b)<<8 | (a) )
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
			
 
				+#	define B3_MAKE_ID2(c, d) ( (c)<<8 | (d) )
			
 
				+#else
			
 
				+#	define B3_MAKE_ID2(c, d) ( (d)<<8 | (c) )
			
 
				+#endif
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#define B3_ID_SCE		B3_MAKE_ID2('S', 'C')
			
 
				+#define B3_ID_LI		B3_MAKE_ID2('L', 'I')
			
 
				+#define B3_ID_OB		B3_MAKE_ID2('O', 'B')
			
 
				+#define B3_ID_ME		B3_MAKE_ID2('M', 'E')
			
 
				+#define B3_ID_CU		B3_MAKE_ID2('C', 'U')
			
 
				+#define B3_ID_MB		B3_MAKE_ID2('M', 'B')
			
 
				+#define B3_ID_MA		B3_MAKE_ID2('M', 'A')
			
 
				+#define B3_ID_TE		B3_MAKE_ID2('T', 'E')
			
 
				+#define B3_ID_IM		B3_MAKE_ID2('I', 'M')
			
 
				+#define B3_ID_IK		B3_MAKE_ID2('I', 'K')
			
 
				+#define B3_ID_WV		B3_MAKE_ID2('W', 'V')
			
 
				+#define B3_ID_LT		B3_MAKE_ID2('L', 'T')
			
 
				+#define B3_ID_SE		B3_MAKE_ID2('S', 'E')
			
 
				+#define B3_ID_LF		B3_MAKE_ID2('L', 'F')
			
 
				+#define B3_ID_LA		B3_MAKE_ID2('L', 'A')
			
 
				+#define B3_ID_CA		B3_MAKE_ID2('C', 'A')
			
 
				+#define B3_ID_IP		B3_MAKE_ID2('I', 'P')
			
 
				+#define B3_ID_KE		B3_MAKE_ID2('K', 'E')
			
 
				+#define B3_ID_WO		B3_MAKE_ID2('W', 'O')
			
 
				+#define B3_ID_SCR		B3_MAKE_ID2('S', 'R')
			
 
				+#define B3_ID_VF		B3_MAKE_ID2('V', 'F')
			
 
				+#define B3_ID_TXT		B3_MAKE_ID2('T', 'X')
			
 
				+#define B3_ID_SO		B3_MAKE_ID2('S', 'O')
			
 
				+#define B3_ID_SAMPLE	B3_MAKE_ID2('S', 'A')
			
 
				+#define B3_ID_GR		B3_MAKE_ID2('G', 'R')
			
 
				+#define B3_ID_ID		B3_MAKE_ID2('I', 'D')
			
 
				+#define B3_ID_AR		B3_MAKE_ID2('A', 'R')
			
 
				+#define B3_ID_AC		B3_MAKE_ID2('A', 'C')
			
 
				+#define B3_ID_SCRIPT	B3_MAKE_ID2('P', 'Y')
			
 
				+#define B3_ID_FLUIDSIM	B3_MAKE_ID2('F', 'S')
			
 
				+#define B3_ID_NT		B3_MAKE_ID2('N', 'T')
			
 
				+#define B3_ID_BR		B3_MAKE_ID2('B', 'R')
			
 
				+
			
 
				+
			
 
				+#define B3_ID_SEQ		B3_MAKE_ID2('S', 'Q')
			
 
				+#define B3_ID_CO		B3_MAKE_ID2('C', 'O')
			
 
				+#define B3_ID_PO		B3_MAKE_ID2('A', 'C')
			
 
				+#define B3_ID_NLA		B3_MAKE_ID2('N', 'L')
			
 
				+
			
 
				+#define B3_ID_VS		B3_MAKE_ID2('V', 'S')
			
 
				+#define B3_ID_VN		B3_MAKE_ID2('V', 'N')
			
 
				+
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#define B3_FORM B3_MAKE_ID('F','O','R','M')
			
 
				+#define B3_DDG1 B3_MAKE_ID('3','D','G','1')
			
 
				+#define B3_DDG2 B3_MAKE_ID('3','D','G','2')
			
 
				+#define B3_DDG3 B3_MAKE_ID('3','D','G','3')
			
 
				+#define B3_DDG4 B3_MAKE_ID('3','D','G','4')
			
 
				+#define B3_GOUR B3_MAKE_ID('G','O','U','R')
			
 
				+#define B3_BLEN B3_MAKE_ID('B','L','E','N')
			
 
				+#define B3_DER_ B3_MAKE_ID('D','E','R','_')
			
 
				+#define B3_V100 B3_MAKE_ID('V','1','0','0')
			
 
				+#define B3_DATA B3_MAKE_ID('D','A','T','A')
			
 
				+#define B3_GLOB B3_MAKE_ID('G','L','O','B')
			
 
				+#define B3_IMAG B3_MAKE_ID('I','M','A','G')
			
 
				+#define B3_TEST B3_MAKE_ID('T','E','S','T')
			
 
				+#define B3_USER B3_MAKE_ID('U','S','E','R')
			
 
				+
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#define B3_DNA1 B3_MAKE_ID('D','N','A','1')
			
 
				+#define B3_REND B3_MAKE_ID('R','E','N','D')
			
 
				+#define B3_ENDB B3_MAKE_ID('E','N','D','B')
			
 
				+#define B3_NAME B3_MAKE_ID('N','A','M','E')
			
 
				+#define B3_SDNA B3_MAKE_ID('S','D','N','A')
			
 
				+#define B3_TYPE B3_MAKE_ID('T','Y','P','E')
			
 
				+#define B3_TLEN B3_MAKE_ID('T','L','E','N')
			
 
				+#define B3_STRC B3_MAKE_ID('S','T','R','C')
			
 
				+
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#define B3_SWITCH_INT(a) { \
			
 
				+    char s_i, *p_i; \
			
 
				+    p_i= (char *)&(a); \
			
 
				+    s_i=p_i[0]; p_i[0]=p_i[3]; p_i[3]=s_i; \
			
 
				+    s_i=p_i[1]; p_i[1]=p_i[2]; p_i[2]=s_i; }
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#define B3_SWITCH_SHORT(a)	{ \
			
 
				+    char s_i, *p_i; \
			
 
				+	p_i= (char *)&(a); \
			
 
				+	s_i=p_i[0]; p_i[0]=p_i[1]; p_i[1]=s_i; }
			
 
				+
			
 
				+// ------------------------------------------------------------
			
 
				+#define B3_SWITCH_LONGINT(a) { \
			
 
				+    char s_i, *p_i; \
			
 
				+    p_i= (char *)&(a);  \
			
 
				+    s_i=p_i[0]; p_i[0]=p_i[7]; p_i[7]=s_i; \
			
 
				+    s_i=p_i[1]; p_i[1]=p_i[6]; p_i[6]=s_i; \
			
 
				+    s_i=p_i[2]; p_i[2]=p_i[5]; p_i[5]=s_i; \
			
 
				+    s_i=p_i[3]; p_i[3]=p_i[4]; p_i[4]=s_i; }
			
 
				+
			
 
				+#endif//__B_DEFINES_H__
			
--- a/include/Bullet2FileLoader/b3File.h
+++ b/include/Bullet2FileLoader/b3File.h
@@ -0,0 +1,165 @@
 
				+/*
			
 
				+bParse
			
 
				+Copyright (c) 2006-2009 Charlie C & Erwin Coumans  http://gamekit.googlecode.com
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef __BFILE_H__
			
 
				+#define __BFILE_H__
			
 
				+
			
 
				+#include "b3Common.h"
			
 
				+#include "b3Chunk.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+namespace bParse {
			
 
				+
			
 
				+	// ----------------------------------------------------- //
			
 
				+	enum bFileFlags
			
 
				+	{
			
 
				+		FD_INVALID   =0,
			
 
				+		FD_OK        =1,
			
 
				+		FD_VOID_IS_8 =2,
			
 
				+		FD_ENDIAN_SWAP      =4,
			
 
				+		FD_FILE_64   =8,
			
 
				+		FD_BITS_VARIES    =16,
			
 
				+		FD_VERSION_VARIES = 32,
			
 
				+		FD_DOUBLE_PRECISION =64,
			
 
				+		FD_BROKEN_DNA = 128
			
 
				+	};
			
 
				+
			
 
				+	enum bFileVerboseMode
			
 
				+	{
			
 
				+		FD_VERBOSE_EXPORT_XML = 1,
			
 
				+		FD_VERBOSE_DUMP_DNA_TYPE_DEFINITIONS = 2,
			
 
				+		FD_VERBOSE_DUMP_CHUNKS = 4,
			
 
				+		FD_VERBOSE_DUMP_FILE_INFO=8,
			
 
				+	};
			
 
				+	// ----------------------------------------------------- //
			
 
				+	class bFile
			
 
				+	{
			
 
				+	protected:
			
 
				+		
			
 
				+		char				m_headerString[7];
			
 
				+
			
 
				+		bool				mOwnsBuffer;
			
 
				+		char*				mFileBuffer;
			
 
				+		int					mFileLen;
			
 
				+		int					mVersion;
			
 
				+
			
 
				+
			
 
				+		bPtrMap				mLibPointers;
			
 
				+
			
 
				+		int					mDataStart;
			
 
				+		bDNA*				mFileDNA;
			
 
				+		bDNA*				mMemoryDNA;
			
 
				+
			
 
				+		b3AlignedObjectArray<char*>	m_pointerFixupArray;
			
 
				+		b3AlignedObjectArray<char*>	m_pointerPtrFixupArray;
			
 
				+		
			
 
				+		b3AlignedObjectArray<bChunkInd>	m_chunks;
			
 
				+        b3HashMap<b3HashPtr, bChunkInd> m_chunkPtrPtrMap;
			
 
				+
			
 
				+        // 
			
 
				+	
			
 
				+		bPtrMap				mDataPointers;
			
 
				+
			
 
				+		
			
 
				+		int					mFlags;
			
 
				+
			
 
				+		// ////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+			// buffer offset util
			
 
				+		int getNextBlock(bChunkInd *dataChunk,  const char *dataPtr, const int flags);
			
 
				+		void safeSwapPtr(char *dst, const char *src);
			
 
				+
			
 
				+		virtual	void parseHeader();
			
 
				+		
			
 
				+		virtual	void parseData() = 0;
			
 
				+
			
 
				+		void resolvePointersMismatch();
			
 
				+		void resolvePointersChunk(const bChunkInd& dataChunk, int verboseMode);
			
 
				+
			
 
				+		int resolvePointersStructRecursive(char *strcPtr, int old_dna, int verboseMode, int recursion);
			
 
				+		//void swapPtr(char *dst, char *src);
			
 
				+
			
 
				+		void parseStruct(char *strcPtr, char *dtPtr, int old_dna, int new_dna, bool fixupPointers);
			
 
				+		void getMatchingFileDNA(short* old, const char* lookupName, const char* lookupType, char *strcData, char *data, bool fixupPointers);
			
 
				+		char* getFileElement(short *firstStruct, char *lookupName, char *lookupType, char *data, short **foundPos);
			
 
				+
			
 
				+
			
 
				+		void swap(char *head, class bChunkInd& ch, bool ignoreEndianFlag);
			
 
				+		void swapData(char *data, short type, int arraySize, bool ignoreEndianFlag);
			
 
				+		void swapStruct(int dna_nr, char *data, bool ignoreEndianFlag);
			
 
				+		void swapLen(char *dataPtr);
			
 
				+		void swapDNA(char* ptr);
			
 
				+
			
 
				+
			
 
				+		char* readStruct(char *head, class bChunkInd& chunk);
			
 
				+		char *getAsString(int code);
			
 
				+
			
 
				+		void	parseInternal(int verboseMode, char* memDna,int memDnaLength);
			
 
				+
			
 
				+	public:
			
 
				+		bFile(const char *filename, const char headerString[7]);
			
 
				+		
			
 
				+		//todo: make memoryBuffer const char
			
 
				+		//bFile( const char *memoryBuffer, int len);
			
 
				+		bFile( char *memoryBuffer, int len, const char headerString[7]);
			
 
				+		virtual ~bFile();
			
 
				+
			
 
				+		bDNA*				getFileDNA()
			
 
				+		{
			
 
				+			return mFileDNA;
			
 
				+		}
			
 
				+
			
 
				+		virtual	void	addDataBlock(char* dataBlock) = 0;
			
 
				+
			
 
				+		int	getFlags() const
			
 
				+		{
			
 
				+			return mFlags;
			
 
				+		}
			
 
				+
			
 
				+		bPtrMap&		getLibPointers()
			
 
				+		{
			
 
				+			return mLibPointers;
			
 
				+		}
			
 
				+		
			
 
				+		void* findLibPointer(void *ptr);
			
 
				+
			
 
				+		bool ok();
			
 
				+
			
 
				+		virtual	void parse(int verboseMode) = 0;
			
 
				+
			
 
				+		virtual	int	write(const char* fileName, bool fixupPointers=false) = 0;
			
 
				+
			
 
				+		virtual	void	writeChunks(FILE* fp, bool fixupPointers );
			
 
				+
			
 
				+		virtual	void	writeDNA(FILE* fp) = 0;
			
 
				+
			
 
				+		void	updateOldPointers();
			
 
				+		void	resolvePointers(int verboseMode);
			
 
				+
			
 
				+		void	dumpChunks(bDNA* dna);
			
 
				+		
			
 
				+		int		getVersion() const
			
 
				+		{
			
 
				+			return mVersion;
			
 
				+		}
			
 
				+		//pre-swap the endianness, so that data loaded on a target with different endianness doesn't need to be swapped
			
 
				+		void preSwap();
			
 
				+		void writeFile(const char* fileName);
			
 
				+
			
 
				+	};
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif//__BFILE_H__
			
--- a/include/Bullet2FileLoader/b3Serializer.h
+++ b/include/Bullet2FileLoader/b3Serializer.h
@@ -0,0 +1,639 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SERIALIZER_H
			
 
				+#define B3_SERIALIZER_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h" // has definitions like B3_FORCE_INLINE
			
 
				+#include "Bullet3Common/b3StackAlloc.h"
			
 
				+#include "Bullet3Common/b3HashMap.h"
			
 
				+
			
 
				+#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
			
 
				+#include <memory.h>
			
 
				+#endif
			
 
				+#include <string.h>
			
 
				+
			
 
				+
			
 
				+
			
 
				+extern char b3s_bulletDNAstr[];
			
 
				+extern int b3s_bulletDNAlen;
			
 
				+extern char b3s_bulletDNAstr64[];
			
 
				+extern int b3s_bulletDNAlen64;
			
 
				+
			
 
				+B3_FORCE_INLINE	int b3StrLen(const char* str) 
			
 
				+{
			
 
				+    if (!str) 
			
 
				+		return(0);
			
 
				+	int len = 0;
			
 
				+    
			
 
				+	while (*str != 0)
			
 
				+	{
			
 
				+        str++;
			
 
				+        len++;
			
 
				+    }
			
 
				+
			
 
				+    return len;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class b3Chunk
			
 
				+{
			
 
				+public:
			
 
				+	int		m_chunkCode;
			
 
				+	int		m_length;
			
 
				+	void	*m_oldPtr;
			
 
				+	int		m_dna_nr;
			
 
				+	int		m_number;
			
 
				+};
			
 
				+
			
 
				+enum	b3SerializationFlags
			
 
				+{
			
 
				+	B3_SERIALIZE_NO_BVH = 1,
			
 
				+	B3_SERIALIZE_NO_TRIANGLEINFOMAP = 2,
			
 
				+	B3_SERIALIZE_NO_DUPLICATE_ASSERT = 4
			
 
				+};
			
 
				+
			
 
				+class	b3Serializer
			
 
				+{
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	virtual ~b3Serializer() {}
			
 
				+
			
 
				+	virtual	const unsigned char*		getBufferPointer() const = 0;
			
 
				+
			
 
				+	virtual	int		getCurrentBufferSize() const = 0;
			
 
				+
			
 
				+	virtual	b3Chunk*	allocate(size_t size, int numElements) = 0;
			
 
				+
			
 
				+	virtual	void	finalizeChunk(b3Chunk* chunk, const char* structType, int chunkCode,void* oldPtr)= 0;
			
 
				+
			
 
				+	virtual	 void*	findPointer(void* oldPtr)  = 0;
			
 
				+
			
 
				+	virtual	void*	getUniquePointer(void*oldPtr) = 0;
			
 
				+
			
 
				+	virtual	void	startSerialization() = 0;
			
 
				+	
			
 
				+	virtual	void	finishSerialization() = 0;
			
 
				+
			
 
				+	virtual	const char*	findNameForPointer(const void* ptr) const = 0;
			
 
				+
			
 
				+	virtual	void	registerNameForPointer(const void* ptr, const char* name) = 0;
			
 
				+
			
 
				+	virtual void	serializeName(const char* ptr) = 0;
			
 
				+
			
 
				+	virtual int		getSerializationFlags() const = 0;
			
 
				+
			
 
				+	virtual void	setSerializationFlags(int flags) = 0;
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#define B3_HEADER_LENGTH 12
			
 
				+#if defined(__sgi) || defined (__sparc) || defined (__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
			
 
				+#	define B3_MAKE_ID(a,b,c,d) ( (int)(a)<<24 | (int)(b)<<16 | (c)<<8 | (d) )
			
 
				+#else
			
 
				+#	define B3_MAKE_ID(a,b,c,d) ( (int)(d)<<24 | (int)(c)<<16 | (b)<<8 | (a) )
			
 
				+#endif
			
 
				+
			
 
				+#define B3_SOFTBODY_CODE		B3_MAKE_ID('S','B','D','Y')
			
 
				+#define B3_COLLISIONOBJECT_CODE B3_MAKE_ID('C','O','B','J')
			
 
				+#define B3_RIGIDBODY_CODE		B3_MAKE_ID('R','B','D','Y')
			
 
				+#define B3_CONSTRAINT_CODE		B3_MAKE_ID('C','O','N','S')
			
 
				+#define B3_BOXSHAPE_CODE		B3_MAKE_ID('B','O','X','S')
			
 
				+#define B3_QUANTIZED_BVH_CODE	B3_MAKE_ID('Q','B','V','H')
			
 
				+#define B3_TRIANLGE_INFO_MAP	B3_MAKE_ID('T','M','A','P')
			
 
				+#define B3_SHAPE_CODE			B3_MAKE_ID('S','H','A','P')
			
 
				+#define B3_ARRAY_CODE			B3_MAKE_ID('A','R','A','Y')
			
 
				+#define B3_SBMATERIAL_CODE		B3_MAKE_ID('S','B','M','T')
			
 
				+#define B3_SBNODE_CODE			B3_MAKE_ID('S','B','N','D')
			
 
				+#define B3_DYNAMICSWORLD_CODE	B3_MAKE_ID('D','W','L','D')
			
 
				+#define B3_DNA_CODE				B3_MAKE_ID('D','N','A','1')
			
 
				+
			
 
				+
			
 
				+struct	b3PointerUid
			
 
				+{
			
 
				+	union
			
 
				+	{
			
 
				+		void*	m_ptr;
			
 
				+		int		m_uniqueIds[2];
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+///The b3DefaultSerializer is the main Bullet serialization class.
			
 
				+///The constructor takes an optional argument for backwards compatibility, it is recommended to leave this empty/zero.
			
 
				+class b3DefaultSerializer	:	public b3Serializer
			
 
				+{
			
 
				+
			
 
				+
			
 
				+	b3AlignedObjectArray<char*>			mTypes;
			
 
				+	b3AlignedObjectArray<short*>			mStructs;
			
 
				+	b3AlignedObjectArray<short>			mTlens;
			
 
				+	b3HashMap<b3HashInt, int>			mStructReverse;
			
 
				+	b3HashMap<b3HashString,int>	mTypeLookup;
			
 
				+
			
 
				+	
			
 
				+	b3HashMap<b3HashPtr,void*>	m_chunkP;
			
 
				+	
			
 
				+	b3HashMap<b3HashPtr,const char*>	m_nameMap;
			
 
				+
			
 
				+	b3HashMap<b3HashPtr,b3PointerUid>	m_uniquePointers;
			
 
				+	int	m_uniqueIdGenerator;
			
 
				+
			
 
				+	int					m_totalSize;
			
 
				+	unsigned char*		m_buffer;
			
 
				+	int					m_currentSize;
			
 
				+	void*				m_dna;
			
 
				+	int					m_dnaLength;
			
 
				+
			
 
				+	int					m_serializationFlags;
			
 
				+
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Chunk*>	m_chunkPtrs;
			
 
				+	
			
 
				+protected:
			
 
				+
			
 
				+	virtual	void*	findPointer(void* oldPtr) 
			
 
				+	{
			
 
				+		void** ptr = m_chunkP.find(oldPtr);
			
 
				+		if (ptr && *ptr)
			
 
				+			return *ptr;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+
			
 
				+		void	writeDNA()
			
 
				+		{
			
 
				+			b3Chunk* dnaChunk = allocate(m_dnaLength,1);
			
 
				+			memcpy(dnaChunk->m_oldPtr,m_dna,m_dnaLength);
			
 
				+			finalizeChunk(dnaChunk,"DNA1",B3_DNA_CODE, m_dna);
			
 
				+		}
			
 
				+
			
 
				+		int getReverseType(const char *type) const
			
 
				+		{
			
 
				+
			
 
				+			b3HashString key(type);
			
 
				+			const int* valuePtr = mTypeLookup.find(key);
			
 
				+			if (valuePtr)
			
 
				+				return *valuePtr;
			
 
				+			
			
 
				+			return -1;
			
 
				+		}
			
 
				+
			
 
				+		void initDNA(const char* bdnaOrg,int dnalen)
			
 
				+		{
			
 
				+			///was already initialized
			
 
				+			if (m_dna)
			
 
				+				return;
			
 
				+
			
 
				+			int littleEndian= 1;
			
 
				+			littleEndian= ((char*)&littleEndian)[0];
			
 
				+			
			
 
				+
			
 
				+			m_dna = b3AlignedAlloc(dnalen,16);
			
 
				+			memcpy(m_dna,bdnaOrg,dnalen);
			
 
				+			m_dnaLength = dnalen;
			
 
				+
			
 
				+			int *intPtr=0;
			
 
				+			short *shtPtr=0;
			
 
				+			char *cp = 0;int dataLen =0;
			
 
				+			intPtr = (int*)m_dna;
			
 
				+
			
 
				+			/*
			
 
				+				SDNA (4 bytes) (magic number)
			
 
				+				NAME (4 bytes)
			
 
				+				<nr> (4 bytes) amount of names (int)
			
 
				+				<string>
			
 
				+				<string>
			
 
				+			*/
			
 
				+
			
 
				+			if (strncmp((const char*)m_dna, "SDNA", 4)==0)
			
 
				+			{
			
 
				+				// skip ++ NAME
			
 
				+				intPtr++; intPtr++;
			
 
				+			}
			
 
				+
			
 
				+			// Parse names
			
 
				+			if (!littleEndian)
			
 
				+				*intPtr = b3SwapEndian(*intPtr);
			
 
				+				
			
 
				+			dataLen = *intPtr;
			
 
				+			
			
 
				+			intPtr++;
			
 
				+
			
 
				+			cp = (char*)intPtr;
			
 
				+			int i;
			
 
				+			for ( i=0; i<dataLen; i++)
			
 
				+			{
			
 
				+				
			
 
				+				while (*cp)cp++;
			
 
				+				cp++;
			
 
				+			}
			
 
				+			cp = b3AlignPointer(cp,4);
			
 
				+
			
 
				+			/*
			
 
				+				TYPE (4 bytes)
			
 
				+				<nr> amount of types (int)
			
 
				+				<string>
			
 
				+				<string>
			
 
				+			*/
			
 
				+
			
 
				+			intPtr = (int*)cp;
			
 
				+			b3Assert(strncmp(cp, "TYPE", 4)==0); intPtr++;
			
 
				+
			
 
				+			if (!littleEndian)
			
 
				+				*intPtr =  b3SwapEndian(*intPtr);
			
 
				+			
			
 
				+			dataLen = *intPtr;
			
 
				+			intPtr++;
			
 
				+
			
 
				+			
			
 
				+			cp = (char*)intPtr;
			
 
				+			for (i=0; i<dataLen; i++)
			
 
				+			{
			
 
				+				mTypes.push_back(cp);
			
 
				+				while (*cp)cp++;
			
 
				+				cp++;
			
 
				+			}
			
 
				+
			
 
				+			cp = b3AlignPointer(cp,4);
			
 
				+
			
 
				+
			
 
				+			/*
			
 
				+				TLEN (4 bytes)
			
 
				+				<len> (short) the lengths of types
			
 
				+				<len>
			
 
				+			*/
			
 
				+
			
 
				+			// Parse type lens
			
 
				+			intPtr = (int*)cp;
			
 
				+			b3Assert(strncmp(cp, "TLEN", 4)==0); intPtr++;
			
 
				+
			
 
				+			dataLen = (int)mTypes.size();
			
 
				+
			
 
				+			shtPtr = (short*)intPtr;
			
 
				+			for (i=0; i<dataLen; i++, shtPtr++)
			
 
				+			{
			
 
				+				if (!littleEndian)
			
 
				+					shtPtr[0] = b3SwapEndian(shtPtr[0]);
			
 
				+				mTlens.push_back(shtPtr[0]);
			
 
				+			}
			
 
				+
			
 
				+			if (dataLen & 1) shtPtr++;
			
 
				+
			
 
				+			/*
			
 
				+				STRC (4 bytes)
			
 
				+				<nr> amount of structs (int)
			
 
				+				<typenr>
			
 
				+				<nr_of_elems>
			
 
				+				<typenr>
			
 
				+				<namenr>
			
 
				+				<typenr>
			
 
				+				<namenr>
			
 
				+			*/
			
 
				+
			
 
				+			intPtr = (int*)shtPtr;
			
 
				+			cp = (char*)intPtr;
			
 
				+			b3Assert(strncmp(cp, "STRC", 4)==0); intPtr++;
			
 
				+
			
 
				+			if (!littleEndian)
			
 
				+				*intPtr = b3SwapEndian(*intPtr);
			
 
				+			dataLen = *intPtr ; 
			
 
				+			intPtr++;
			
 
				+
			
 
				+
			
 
				+			shtPtr = (short*)intPtr;
			
 
				+			for (i=0; i<dataLen; i++)
			
 
				+			{
			
 
				+				mStructs.push_back (shtPtr);
			
 
				+				
			
 
				+				if (!littleEndian)
			
 
				+				{
			
 
				+					shtPtr[0]= b3SwapEndian(shtPtr[0]);
			
 
				+					shtPtr[1]= b3SwapEndian(shtPtr[1]);
			
 
				+
			
 
				+					int len = shtPtr[1];
			
 
				+					shtPtr+= 2;
			
 
				+
			
 
				+					for (int a=0; a<len; a++, shtPtr+=2)
			
 
				+					{
			
 
				+							shtPtr[0]= b3SwapEndian(shtPtr[0]);
			
 
				+							shtPtr[1]= b3SwapEndian(shtPtr[1]);
			
 
				+					}
			
 
				+
			
 
				+				} else
			
 
				+				{
			
 
				+					shtPtr+= (2*shtPtr[1])+2;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			// build reverse lookups
			
 
				+			for (i=0; i<(int)mStructs.size(); i++)
			
 
				+			{
			
 
				+				short *strc = mStructs.at(i);
			
 
				+				mStructReverse.insert(strc[0], i);
			
 
				+				mTypeLookup.insert(b3HashString(mTypes[strc[0]]),i);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+public:	
			
 
				+	
			
 
				+
			
 
				+	
			
 
				+
			
 
				+		b3DefaultSerializer(int totalSize=0)
			
 
				+			:m_totalSize(totalSize),
			
 
				+			m_currentSize(0),
			
 
				+			m_dna(0),
			
 
				+			m_dnaLength(0),
			
 
				+			m_serializationFlags(0)
			
 
				+		{
			
 
				+			m_buffer = m_totalSize?(unsigned char*)b3AlignedAlloc(totalSize,16):0;
			
 
				+			
			
 
				+			const bool VOID_IS_8 = ((sizeof(void*)==8));
			
 
				+
			
 
				+#ifdef B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
			
 
				+			if (VOID_IS_8)
			
 
				+			{
			
 
				+#if _WIN64
			
 
				+				initDNA((const char*)b3s_bulletDNAstr64,b3s_bulletDNAlen64);
			
 
				+#else
			
 
				+				b3Assert(0);
			
 
				+#endif
			
 
				+			} else
			
 
				+			{
			
 
				+#ifndef _WIN64
			
 
				+				initDNA((const char*)b3s_bulletDNAstr,b3s_bulletDNAlen);
			
 
				+#else
			
 
				+				b3Assert(0);
			
 
				+#endif
			
 
				+			}
			
 
				+	
			
 
				+#else //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
			
 
				+			if (VOID_IS_8)
			
 
				+			{
			
 
				+				initDNA((const char*)b3s_bulletDNAstr64,b3s_bulletDNAlen64);
			
 
				+			} else
			
 
				+			{
			
 
				+				initDNA((const char*)b3s_bulletDNAstr,b3s_bulletDNAlen);
			
 
				+			}
			
 
				+#endif //B3_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
			
 
				+	
			
 
				+		}
			
 
				+
			
 
				+		virtual ~b3DefaultSerializer() 
			
 
				+		{
			
 
				+			if (m_buffer)
			
 
				+				b3AlignedFree(m_buffer);
			
 
				+			if (m_dna)
			
 
				+				b3AlignedFree(m_dna);
			
 
				+		}
			
 
				+
			
 
				+		void	writeHeader(unsigned char* buffer) const
			
 
				+		{
			
 
				+			
			
 
				+
			
 
				+#ifdef  B3_USE_DOUBLE_PRECISION
			
 
				+			memcpy(buffer, "BULLETd", 7);
			
 
				+#else
			
 
				+			memcpy(buffer, "BULLETf", 7);
			
 
				+#endif //B3_USE_DOUBLE_PRECISION
			
 
				+	
			
 
				+			int littleEndian= 1;
			
 
				+			littleEndian= ((char*)&littleEndian)[0];
			
 
				+
			
 
				+			if (sizeof(void*)==8)
			
 
				+			{
			
 
				+				buffer[7] = '-';
			
 
				+			} else
			
 
				+			{
			
 
				+				buffer[7] = '_';
			
 
				+			}
			
 
				+
			
 
				+			if (littleEndian)
			
 
				+			{
			
 
				+				buffer[8]='v';				
			
 
				+			} else
			
 
				+			{
			
 
				+				buffer[8]='V';
			
 
				+			}
			
 
				+
			
 
				+
			
 
				+			buffer[9] = '2';
			
 
				+			buffer[10] = '8';
			
 
				+			buffer[11] = '1';
			
 
				+
			
 
				+		}
			
 
				+
			
 
				+		virtual	void	startSerialization()
			
 
				+		{
			
 
				+			m_uniqueIdGenerator= 1;
			
 
				+			if (m_totalSize)
			
 
				+			{
			
 
				+				unsigned char* buffer = internalAlloc(B3_HEADER_LENGTH);
			
 
				+				writeHeader(buffer);
			
 
				+			}
			
 
				+			
			
 
				+		}
			
 
				+
			
 
				+		virtual	void	finishSerialization()
			
 
				+		{
			
 
				+			writeDNA();
			
 
				+
			
 
				+			//if we didn't pre-allocate a buffer, we need to create a contiguous buffer now
			
 
				+			int mysize = 0;
			
 
				+			if (!m_totalSize)
			
 
				+			{
			
 
				+				if (m_buffer)
			
 
				+					b3AlignedFree(m_buffer);
			
 
				+
			
 
				+				m_currentSize += B3_HEADER_LENGTH;
			
 
				+				m_buffer = (unsigned char*)b3AlignedAlloc(m_currentSize,16);
			
 
				+
			
 
				+				unsigned char* currentPtr = m_buffer;
			
 
				+				writeHeader(m_buffer);
			
 
				+				currentPtr += B3_HEADER_LENGTH;
			
 
				+				mysize+=B3_HEADER_LENGTH;
			
 
				+				for (int i=0;i<	m_chunkPtrs.size();i++)
			
 
				+				{
			
 
				+					int curLength = sizeof(b3Chunk)+m_chunkPtrs[i]->m_length;
			
 
				+					memcpy(currentPtr,m_chunkPtrs[i], curLength);
			
 
				+					b3AlignedFree(m_chunkPtrs[i]);
			
 
				+					currentPtr+=curLength;
			
 
				+					mysize+=curLength;
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			mTypes.clear();
			
 
				+			mStructs.clear();
			
 
				+			mTlens.clear();
			
 
				+			mStructReverse.clear();
			
 
				+			mTypeLookup.clear();
			
 
				+			m_chunkP.clear();
			
 
				+			m_nameMap.clear();
			
 
				+			m_uniquePointers.clear();
			
 
				+			m_chunkPtrs.clear();
			
 
				+		}
			
 
				+
			
 
				+		virtual	void*	getUniquePointer(void*oldPtr)
			
 
				+		{
			
 
				+			if (!oldPtr)
			
 
				+				return 0;
			
 
				+
			
 
				+			b3PointerUid* uptr = (b3PointerUid*)m_uniquePointers.find(oldPtr);
			
 
				+			if (uptr)
			
 
				+			{
			
 
				+				return uptr->m_ptr;
			
 
				+			}
			
 
				+			m_uniqueIdGenerator++;
			
 
				+			
			
 
				+			b3PointerUid uid;
			
 
				+			uid.m_uniqueIds[0] = m_uniqueIdGenerator;
			
 
				+			uid.m_uniqueIds[1] = m_uniqueIdGenerator;
			
 
				+			m_uniquePointers.insert(oldPtr,uid);
			
 
				+			return uid.m_ptr;
			
 
				+
			
 
				+		}
			
 
				+
			
 
				+		virtual	const unsigned char*		getBufferPointer() const
			
 
				+		{
			
 
				+			return m_buffer;
			
 
				+		}
			
 
				+
			
 
				+		virtual	int					getCurrentBufferSize() const
			
 
				+		{
			
 
				+			return	m_currentSize;
			
 
				+		}
			
 
				+
			
 
				+		virtual	void	finalizeChunk(b3Chunk* chunk, const char* structType, int chunkCode,void* oldPtr)
			
 
				+		{
			
 
				+			if (!(m_serializationFlags&B3_SERIALIZE_NO_DUPLICATE_ASSERT))
			
 
				+			{
			
 
				+				b3Assert(!findPointer(oldPtr));
			
 
				+			}
			
 
				+
			
 
				+			chunk->m_dna_nr = getReverseType(structType);
			
 
				+			
			
 
				+			chunk->m_chunkCode = chunkCode;
			
 
				+			
			
 
				+			void* uniquePtr = getUniquePointer(oldPtr);
			
 
				+			
			
 
				+			m_chunkP.insert(oldPtr,uniquePtr);//chunk->m_oldPtr);
			
 
				+			chunk->m_oldPtr = uniquePtr;//oldPtr;
			
 
				+			
			
 
				+		}
			
 
				+
			
 
				+		
			
 
				+		virtual unsigned char* internalAlloc(size_t size)
			
 
				+		{
			
 
				+			unsigned char* ptr = 0;
			
 
				+
			
 
				+			if (m_totalSize)
			
 
				+			{
			
 
				+				ptr = m_buffer+m_currentSize;
			
 
				+				m_currentSize += int(size);
			
 
				+				b3Assert(m_currentSize<m_totalSize);
			
 
				+			} else
			
 
				+			{
			
 
				+				ptr = (unsigned char*)b3AlignedAlloc(size,16);
			
 
				+				m_currentSize += int(size);
			
 
				+			}
			
 
				+			return ptr;
			
 
				+		}
			
 
				+
			
 
				+		
			
 
				+
			
 
				+		virtual	b3Chunk*	allocate(size_t size, int numElements)
			
 
				+		{
			
 
				+
			
 
				+			unsigned char* ptr = internalAlloc(int(size)*numElements+sizeof(b3Chunk));
			
 
				+
			
 
				+			unsigned char* data = ptr + sizeof(b3Chunk);
			
 
				+			
			
 
				+			b3Chunk* chunk = (b3Chunk*)ptr;
			
 
				+			chunk->m_chunkCode = 0;
			
 
				+			chunk->m_oldPtr = data;
			
 
				+			chunk->m_length = int(size)*numElements;
			
 
				+			chunk->m_number = numElements;
			
 
				+			
			
 
				+			m_chunkPtrs.push_back(chunk);
			
 
				+			
			
 
				+
			
 
				+			return chunk;
			
 
				+		}
			
 
				+
			
 
				+		virtual	const char*	findNameForPointer(const void* ptr) const
			
 
				+		{
			
 
				+			const char*const * namePtr = m_nameMap.find(ptr);
			
 
				+			if (namePtr && *namePtr)
			
 
				+				return *namePtr;
			
 
				+			return 0;
			
 
				+
			
 
				+		}
			
 
				+
			
 
				+		virtual	void	registerNameForPointer(const void* ptr, const char* name)
			
 
				+		{
			
 
				+			m_nameMap.insert(ptr,name);
			
 
				+		}
			
 
				+
			
 
				+		virtual void	serializeName(const char* name)
			
 
				+		{
			
 
				+			if (name)
			
 
				+			{
			
 
				+				//don't serialize name twice
			
 
				+				if (findPointer((void*)name))
			
 
				+					return;
			
 
				+
			
 
				+				int len = b3StrLen(name);
			
 
				+				if (len)
			
 
				+				{
			
 
				+
			
 
				+					int newLen = len+1;
			
 
				+					int padding = ((newLen+3)&~3)-newLen;
			
 
				+					newLen += padding;
			
 
				+
			
 
				+					//serialize name string now
			
 
				+					b3Chunk* chunk = allocate(sizeof(char),newLen);
			
 
				+					char* destinationName = (char*)chunk->m_oldPtr;
			
 
				+					for (int i=0;i<len;i++)
			
 
				+					{
			
 
				+						destinationName[i] = name[i];
			
 
				+					}
			
 
				+					destinationName[len] = 0;
			
 
				+					finalizeChunk(chunk,"char",B3_ARRAY_CODE,(void*)name);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		virtual int		getSerializationFlags() const
			
 
				+		{
			
 
				+			return m_serializationFlags;
			
 
				+		}
			
 
				+
			
 
				+		virtual void	setSerializationFlags(int flags)
			
 
				+		{
			
 
				+			m_serializationFlags = flags;
			
 
				+		}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_SERIALIZER_H
			
 
				+
			
--- a/include/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h
+++ b/include/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h
@@ -0,0 +1,40 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_BROADPHASE_CALLBACK_H
			
 
				+#define B3_BROADPHASE_CALLBACK_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+struct b3BroadphaseProxy;
			
 
				+
			
 
				+
			
 
				+struct	b3BroadphaseAabbCallback
			
 
				+{
			
 
				+	virtual ~b3BroadphaseAabbCallback() {}
			
 
				+	virtual bool	process(const b3BroadphaseProxy* proxy) = 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct	b3BroadphaseRayCallback : public b3BroadphaseAabbCallback
			
 
				+{
			
 
				+	///added some cached data to accelerate ray-AABB tests
			
 
				+	b3Vector3		m_rayDirectionInverse;
			
 
				+	unsigned int	m_signs[3];
			
 
				+	b3Scalar		m_lambda_max;
			
 
				+
			
 
				+	virtual ~b3BroadphaseRayCallback() {}
			
 
				+};
			
 
				+
			
 
				+#endif //B3_BROADPHASE_CALLBACK_H
			
--- a/include/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h
+++ b/include/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h
@@ -0,0 +1,1270 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+///b3DynamicBvh implementation by Nathanael Presson
			
 
				+
			
 
				+#ifndef B3_DYNAMIC_BOUNDING_VOLUME_TREE_H
			
 
				+#define B3_DYNAMIC_BOUNDING_VOLUME_TREE_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Geometry/b3AabbUtil.h"
			
 
				+
			
 
				+//
			
 
				+// Compile time configuration
			
 
				+//
			
 
				+
			
 
				+
			
 
				+// Implementation profiles
			
 
				+#define B3_DBVT_IMPL_GENERIC		0	// Generic implementation	
			
 
				+#define B3_DBVT_IMPL_SSE			1	// SSE
			
 
				+
			
 
				+// Template implementation of ICollide
			
 
				+#ifdef _WIN32
			
 
				+#if (defined (_MSC_VER) && _MSC_VER >= 1400)
			
 
				+#define	B3_DBVT_USE_TEMPLATE		1
			
 
				+#else
			
 
				+#define	B3_DBVT_USE_TEMPLATE		0
			
 
				+#endif
			
 
				+#else
			
 
				+#define	B3_DBVT_USE_TEMPLATE		0
			
 
				+#endif
			
 
				+
			
 
				+// Use only intrinsics instead of inline asm
			
 
				+#define B3_DBVT_USE_INTRINSIC_SSE	1
			
 
				+
			
 
				+// Using memmov for collideOCL
			
 
				+#define B3_DBVT_USE_MEMMOVE		1
			
 
				+
			
 
				+// Enable benchmarking code
			
 
				+#define	B3_DBVT_ENABLE_BENCHMARK	0
			
 
				+
			
 
				+// Inlining
			
 
				+#define B3_DBVT_INLINE				B3_FORCE_INLINE
			
 
				+
			
 
				+// Specific methods implementation
			
 
				+
			
 
				+//SSE gives errors on a MSVC 7.1
			
 
				+#if defined (B3_USE_SSE) //&& defined (_WIN32)
			
 
				+#define B3_DBVT_SELECT_IMPL		B3_DBVT_IMPL_SSE
			
 
				+#define B3_DBVT_MERGE_IMPL			B3_DBVT_IMPL_SSE
			
 
				+#define B3_DBVT_INT0_IMPL			B3_DBVT_IMPL_SSE
			
 
				+#else
			
 
				+#define B3_DBVT_SELECT_IMPL		B3_DBVT_IMPL_GENERIC
			
 
				+#define B3_DBVT_MERGE_IMPL			B3_DBVT_IMPL_GENERIC
			
 
				+#define B3_DBVT_INT0_IMPL			B3_DBVT_IMPL_GENERIC
			
 
				+#endif
			
 
				+
			
 
				+#if	(B3_DBVT_SELECT_IMPL==B3_DBVT_IMPL_SSE)||	\
			
 
				+	(B3_DBVT_MERGE_IMPL==B3_DBVT_IMPL_SSE)||	\
			
 
				+	(B3_DBVT_INT0_IMPL==B3_DBVT_IMPL_SSE)
			
 
				+#include <emmintrin.h>
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+// Auto config and checks
			
 
				+//
			
 
				+
			
 
				+#if B3_DBVT_USE_TEMPLATE
			
 
				+#define	B3_DBVT_VIRTUAL
			
 
				+#define B3_DBVT_VIRTUAL_DTOR(a)
			
 
				+#define B3_DBVT_PREFIX					template <typename T>
			
 
				+#define B3_DBVT_IPOLICY				T& policy
			
 
				+#define B3_DBVT_CHECKTYPE				static const ICollide&	typechecker=*(T*)1;(void)typechecker;
			
 
				+#else
			
 
				+#define	B3_DBVT_VIRTUAL_DTOR(a)		virtual ~a() {}
			
 
				+#define B3_DBVT_VIRTUAL				virtual
			
 
				+#define B3_DBVT_PREFIX
			
 
				+#define B3_DBVT_IPOLICY				ICollide& policy
			
 
				+#define B3_DBVT_CHECKTYPE
			
 
				+#endif
			
 
				+
			
 
				+#if B3_DBVT_USE_MEMMOVE
			
 
				+#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
			
 
				+#include <memory.h>
			
 
				+#endif
			
 
				+#include <string.h>
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_USE_TEMPLATE
			
 
				+#error "B3_DBVT_USE_TEMPLATE undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_USE_MEMMOVE
			
 
				+#error "B3_DBVT_USE_MEMMOVE undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_ENABLE_BENCHMARK
			
 
				+#error "B3_DBVT_ENABLE_BENCHMARK undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_SELECT_IMPL
			
 
				+#error "B3_DBVT_SELECT_IMPL undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_MERGE_IMPL
			
 
				+#error "B3_DBVT_MERGE_IMPL undefined"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_DBVT_INT0_IMPL
			
 
				+#error "B3_DBVT_INT0_IMPL undefined"
			
 
				+#endif
			
 
				+
			
 
				+//
			
 
				+// Defaults volumes
			
 
				+//
			
 
				+
			
 
				+/* b3DbvtAabbMm			*/ 
			
 
				+struct	b3DbvtAabbMm
			
 
				+{
			
 
				+	B3_DBVT_INLINE b3Vector3			Center() const	{ return((mi+mx)/2); }
			
 
				+	B3_DBVT_INLINE b3Vector3			Lengths() const	{ return(mx-mi); }
			
 
				+	B3_DBVT_INLINE b3Vector3			Extents() const	{ return((mx-mi)/2); }
			
 
				+	B3_DBVT_INLINE const b3Vector3&	Mins() const	{ return(mi); }
			
 
				+	B3_DBVT_INLINE const b3Vector3&	Maxs() const	{ return(mx); }
			
 
				+	static inline b3DbvtAabbMm		FromCE(const b3Vector3& c,const b3Vector3& e);
			
 
				+	static inline b3DbvtAabbMm		FromCR(const b3Vector3& c,b3Scalar r);
			
 
				+	static inline b3DbvtAabbMm		FromMM(const b3Vector3& mi,const b3Vector3& mx);
			
 
				+	static inline b3DbvtAabbMm		FromPoints(const b3Vector3* pts,int n);
			
 
				+	static inline b3DbvtAabbMm		FromPoints(const b3Vector3** ppts,int n);
			
 
				+	B3_DBVT_INLINE void				Expand(const b3Vector3& e);
			
 
				+	B3_DBVT_INLINE void				SignedExpand(const b3Vector3& e);
			
 
				+	B3_DBVT_INLINE bool				Contain(const b3DbvtAabbMm& a) const;
			
 
				+	B3_DBVT_INLINE int					Classify(const b3Vector3& n,b3Scalar o,int s) const;
			
 
				+	B3_DBVT_INLINE b3Scalar			ProjectMinimum(const b3Vector3& v,unsigned signs) const;
			
 
				+	B3_DBVT_INLINE friend bool			b3Intersect(	const b3DbvtAabbMm& a,
			
 
				+		const b3DbvtAabbMm& b);
			
 
				+	
			
 
				+	B3_DBVT_INLINE friend bool			b3Intersect(	const b3DbvtAabbMm& a,
			
 
				+		const b3Vector3& b);
			
 
				+
			
 
				+	B3_DBVT_INLINE friend b3Scalar		b3Proximity(	const b3DbvtAabbMm& a,
			
 
				+		const b3DbvtAabbMm& b);
			
 
				+	B3_DBVT_INLINE friend int			b3Select(		const b3DbvtAabbMm& o,
			
 
				+		const b3DbvtAabbMm& a,
			
 
				+		const b3DbvtAabbMm& b);
			
 
				+	B3_DBVT_INLINE friend void			b3Merge(		const b3DbvtAabbMm& a,
			
 
				+		const b3DbvtAabbMm& b,
			
 
				+		b3DbvtAabbMm& r);
			
 
				+	B3_DBVT_INLINE friend bool			b3NotEqual(	const b3DbvtAabbMm& a,
			
 
				+		const b3DbvtAabbMm& b);
			
 
				+    
			
 
				+    B3_DBVT_INLINE b3Vector3&	tMins()	{ return(mi); }
			
 
				+	B3_DBVT_INLINE b3Vector3&	tMaxs()	{ return(mx); }
			
 
				+    
			
 
				+private:
			
 
				+	B3_DBVT_INLINE void				AddSpan(const b3Vector3& d,b3Scalar& smi,b3Scalar& smx) const;
			
 
				+private:
			
 
				+	b3Vector3	mi,mx;
			
 
				+};
			
 
				+
			
 
				+// Types	
			
 
				+typedef	b3DbvtAabbMm	b3DbvtVolume;
			
 
				+
			
 
				+/* b3DbvtNode				*/ 
			
 
				+struct	b3DbvtNode
			
 
				+{
			
 
				+	b3DbvtVolume	volume;
			
 
				+	b3DbvtNode*		parent;
			
 
				+	B3_DBVT_INLINE bool	isleaf() const		{ return(childs[1]==0); }
			
 
				+	B3_DBVT_INLINE bool	isinternal() const	{ return(!isleaf()); }
			
 
				+	union
			
 
				+	{
			
 
				+		b3DbvtNode*	childs[2];
			
 
				+		void*	data;
			
 
				+		int		dataAsInt;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+///The b3DynamicBvh class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
			
 
				+///This b3DynamicBvh is used for soft body collision detection and for the b3DynamicBvhBroadphase. It has a fast insert, remove and update of nodes.
			
 
				+///Unlike the b3QuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
			
 
				+struct	b3DynamicBvh
			
 
				+{
			
 
				+	/* Stack element	*/ 
			
 
				+	struct	sStkNN
			
 
				+	{
			
 
				+		const b3DbvtNode*	a;
			
 
				+		const b3DbvtNode*	b;
			
 
				+		sStkNN() {}
			
 
				+		sStkNN(const b3DbvtNode* na,const b3DbvtNode* nb) : a(na),b(nb) {}
			
 
				+	};
			
 
				+	struct	sStkNP
			
 
				+	{
			
 
				+		const b3DbvtNode*	node;
			
 
				+		int			mask;
			
 
				+		sStkNP(const b3DbvtNode* n,unsigned m) : node(n),mask(m) {}
			
 
				+	};
			
 
				+	struct	sStkNPS
			
 
				+	{
			
 
				+		const b3DbvtNode*	node;
			
 
				+		int			mask;
			
 
				+		b3Scalar	value;
			
 
				+		sStkNPS() {}
			
 
				+		sStkNPS(const b3DbvtNode* n,unsigned m,b3Scalar v) : node(n),mask(m),value(v) {}
			
 
				+	};
			
 
				+	struct	sStkCLN
			
 
				+	{
			
 
				+		const b3DbvtNode*	node;
			
 
				+		b3DbvtNode*		parent;
			
 
				+		sStkCLN(const b3DbvtNode* n,b3DbvtNode* p) : node(n),parent(p) {}
			
 
				+	};
			
 
				+	// Policies/Interfaces
			
 
				+
			
 
				+	/* ICollide	*/ 
			
 
				+	struct	ICollide
			
 
				+	{		
			
 
				+		B3_DBVT_VIRTUAL_DTOR(ICollide)
			
 
				+			B3_DBVT_VIRTUAL void	Process(const b3DbvtNode*,const b3DbvtNode*)		{}
			
 
				+		B3_DBVT_VIRTUAL void	Process(const b3DbvtNode*)					{}
			
 
				+		B3_DBVT_VIRTUAL void	Process(const b3DbvtNode* n,b3Scalar)			{ Process(n); }
			
 
				+		B3_DBVT_VIRTUAL bool	Descent(const b3DbvtNode*)					{ return(true); }
			
 
				+		B3_DBVT_VIRTUAL bool	AllLeaves(const b3DbvtNode*)					{ return(true); }
			
 
				+	};
			
 
				+	/* IWriter	*/ 
			
 
				+	struct	IWriter
			
 
				+	{
			
 
				+		virtual ~IWriter() {}
			
 
				+		virtual void		Prepare(const b3DbvtNode* root,int numnodes)=0;
			
 
				+		virtual void		WriteNode(const b3DbvtNode*,int index,int parent,int child0,int child1)=0;
			
 
				+		virtual void		WriteLeaf(const b3DbvtNode*,int index,int parent)=0;
			
 
				+	};
			
 
				+	/* IClone	*/ 
			
 
				+	struct	IClone
			
 
				+	{
			
 
				+		virtual ~IClone()	{}
			
 
				+		virtual void		CloneLeaf(b3DbvtNode*) {}
			
 
				+	};
			
 
				+
			
 
				+	// Constants
			
 
				+	enum	{
			
 
				+		B3_SIMPLE_STACKSIZE	=	64,
			
 
				+		B3_DOUBLE_STACKSIZE	=	B3_SIMPLE_STACKSIZE*2
			
 
				+	};
			
 
				+
			
 
				+	// Fields
			
 
				+	b3DbvtNode*		m_root;
			
 
				+	b3DbvtNode*		m_free;
			
 
				+	int				m_lkhd;
			
 
				+	int				m_leaves;
			
 
				+	unsigned		m_opath;
			
 
				+
			
 
				+	
			
 
				+	b3AlignedObjectArray<sStkNN>	m_stkStack;
			
 
				+	mutable b3AlignedObjectArray<const b3DbvtNode*>	m_rayTestStack;
			
 
				+
			
 
				+
			
 
				+	// Methods
			
 
				+	b3DynamicBvh();
			
 
				+	~b3DynamicBvh();
			
 
				+	void			clear();
			
 
				+	bool			empty() const { return(0==m_root); }
			
 
				+	void			optimizeBottomUp();
			
 
				+	void			optimizeTopDown(int bu_treshold=128);
			
 
				+	void			optimizeIncremental(int passes);
			
 
				+	b3DbvtNode*		insert(const b3DbvtVolume& box,void* data);
			
 
				+	void			update(b3DbvtNode* leaf,int lookahead=-1);
			
 
				+	void			update(b3DbvtNode* leaf,b3DbvtVolume& volume);
			
 
				+	bool			update(b3DbvtNode* leaf,b3DbvtVolume& volume,const b3Vector3& velocity,b3Scalar margin);
			
 
				+	bool			update(b3DbvtNode* leaf,b3DbvtVolume& volume,const b3Vector3& velocity);
			
 
				+	bool			update(b3DbvtNode* leaf,b3DbvtVolume& volume,b3Scalar margin);	
			
 
				+	void			remove(b3DbvtNode* leaf);
			
 
				+	void			write(IWriter* iwriter) const;
			
 
				+	void			clone(b3DynamicBvh& dest,IClone* iclone=0) const;
			
 
				+	static int		maxdepth(const b3DbvtNode* node);
			
 
				+	static int		countLeaves(const b3DbvtNode* node);
			
 
				+	static void		extractLeaves(const b3DbvtNode* node,b3AlignedObjectArray<const b3DbvtNode*>& leaves);
			
 
				+#if B3_DBVT_ENABLE_BENCHMARK
			
 
				+	static void		benchmark();
			
 
				+#else
			
 
				+	static void		benchmark(){}
			
 
				+#endif
			
 
				+	// B3_DBVT_IPOLICY must support ICollide policy/interface
			
 
				+	B3_DBVT_PREFIX
			
 
				+		static void		enumNodes(	const b3DbvtNode* root,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+		static void		enumLeaves(	const b3DbvtNode* root,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTT(	const b3DbvtNode* root0,
			
 
				+		const b3DbvtNode* root1,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTTpersistentStack(	const b3DbvtNode* root0,
			
 
				+		  const b3DbvtNode* root1,
			
 
				+		  B3_DBVT_IPOLICY);
			
 
				+#if 0
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTT(	const b3DbvtNode* root0,
			
 
				+		const b3DbvtNode* root1,
			
 
				+		const b3Transform& xform,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTT(	const b3DbvtNode* root0,
			
 
				+		const b3Transform& xform0,
			
 
				+		const b3DbvtNode* root1,
			
 
				+		const b3Transform& xform1,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+#endif
			
 
				+
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		collideTV(	const b3DbvtNode* root,
			
 
				+		const b3DbvtVolume& volume,
			
 
				+		B3_DBVT_IPOLICY) const;
			
 
				+	///rayTest is a re-entrant ray test, and can be called in parallel as long as the b3AlignedAlloc is thread-safe (uses locking etc)
			
 
				+	///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time
			
 
				+	B3_DBVT_PREFIX
			
 
				+		static void		rayTest(	const b3DbvtNode* root,
			
 
				+		const b3Vector3& rayFrom,
			
 
				+		const b3Vector3& rayTo,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	///rayTestInternal is faster than rayTest, because it uses a persistent stack (to reduce dynamic memory allocations to a minimum) and it uses precomputed signs/rayInverseDirections
			
 
				+	///rayTestInternal is used by b3DynamicBvhBroadphase to accelerate world ray casts
			
 
				+	B3_DBVT_PREFIX
			
 
				+		void		rayTestInternal(	const b3DbvtNode* root,
			
 
				+								const b3Vector3& rayFrom,
			
 
				+								const b3Vector3& rayTo,
			
 
				+								const b3Vector3& rayDirectionInverse,
			
 
				+								unsigned int signs[3],
			
 
				+								b3Scalar lambda_max,
			
 
				+								const b3Vector3& aabbMin,
			
 
				+								const b3Vector3& aabbMax,
			
 
				+								B3_DBVT_IPOLICY) const;
			
 
				+
			
 
				+	B3_DBVT_PREFIX
			
 
				+		static void		collideKDOP(const b3DbvtNode* root,
			
 
				+		const b3Vector3* normals,
			
 
				+		const b3Scalar* offsets,
			
 
				+		int count,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	B3_DBVT_PREFIX
			
 
				+		static void		collideOCL(	const b3DbvtNode* root,
			
 
				+		const b3Vector3* normals,
			
 
				+		const b3Scalar* offsets,
			
 
				+		const b3Vector3& sortaxis,
			
 
				+		int count,								
			
 
				+		B3_DBVT_IPOLICY,
			
 
				+		bool fullsort=true);
			
 
				+	B3_DBVT_PREFIX
			
 
				+		static void		collideTU(	const b3DbvtNode* root,
			
 
				+		B3_DBVT_IPOLICY);
			
 
				+	// Helpers	
			
 
				+	static B3_DBVT_INLINE int	nearest(const int* i,const b3DynamicBvh::sStkNPS* a,b3Scalar v,int l,int h)
			
 
				+	{
			
 
				+		int	m=0;
			
 
				+		while(l<h)
			
 
				+		{
			
 
				+			m=(l+h)>>1;
			
 
				+			if(a[i[m]].value>=v) l=m+1; else h=m;
			
 
				+		}
			
 
				+		return(h);
			
 
				+	}
			
 
				+	static B3_DBVT_INLINE int	allocate(	b3AlignedObjectArray<int>& ifree,
			
 
				+		b3AlignedObjectArray<sStkNPS>& stock,
			
 
				+		const sStkNPS& value)
			
 
				+	{
			
 
				+		int	i;
			
 
				+		if(ifree.size()>0)
			
 
				+		{ i=ifree[ifree.size()-1];ifree.pop_back();stock[i]=value; }
			
 
				+		else
			
 
				+		{ i=stock.size();stock.push_back(value); }
			
 
				+		return(i); 
			
 
				+	}
			
 
				+	//
			
 
				+private:
			
 
				+	b3DynamicBvh(const b3DynamicBvh&)	{}	
			
 
				+};
			
 
				+
			
 
				+//
			
 
				+// Inline's
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm			b3DbvtAabbMm::FromCE(const b3Vector3& c,const b3Vector3& e)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi=c-e;box.mx=c+e;
			
 
				+	return(box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm			b3DbvtAabbMm::FromCR(const b3Vector3& c,b3Scalar r)
			
 
				+{
			
 
				+	return(FromCE(c,b3MakeVector3(r,r,r)));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm			b3DbvtAabbMm::FromMM(const b3Vector3& mi,const b3Vector3& mx)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi=mi;box.mx=mx;
			
 
				+	return(box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm			b3DbvtAabbMm::FromPoints(const b3Vector3* pts,int n)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi=box.mx=pts[0];
			
 
				+	for(int i=1;i<n;++i)
			
 
				+	{
			
 
				+		box.mi.setMin(pts[i]);
			
 
				+		box.mx.setMax(pts[i]);
			
 
				+	}
			
 
				+	return(box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+inline b3DbvtAabbMm			b3DbvtAabbMm::FromPoints(const b3Vector3** ppts,int n)
			
 
				+{
			
 
				+	b3DbvtAabbMm box;
			
 
				+	box.mi=box.mx=*ppts[0];
			
 
				+	for(int i=1;i<n;++i)
			
 
				+	{
			
 
				+		box.mi.setMin(*ppts[i]);
			
 
				+		box.mx.setMax(*ppts[i]);
			
 
				+	}
			
 
				+	return(box);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void		b3DbvtAabbMm::Expand(const b3Vector3& e)
			
 
				+{
			
 
				+	mi-=e;mx+=e;
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void		b3DbvtAabbMm::SignedExpand(const b3Vector3& e)
			
 
				+{
			
 
				+	if(e.x>0) mx.setX(mx.x+e[0]); else mi.setX(mi.x+e[0]);
			
 
				+	if(e.y>0) mx.setY(mx.y+e[1]); else mi.setY(mi.y+e[1]);
			
 
				+	if(e.z>0) mx.setZ(mx.z+e[2]); else mi.setZ(mi.z+e[2]);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool		b3DbvtAabbMm::Contain(const b3DbvtAabbMm& a) const
			
 
				+{
			
 
				+	return(	(mi.x<=a.mi.x)&&
			
 
				+		(mi.y<=a.mi.y)&&
			
 
				+		(mi.z<=a.mi.z)&&
			
 
				+		(mx.x>=a.mx.x)&&
			
 
				+		(mx.y>=a.mx.y)&&
			
 
				+		(mx.z>=a.mx.z));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE int		b3DbvtAabbMm::Classify(const b3Vector3& n,b3Scalar o,int s) const
			
 
				+{
			
 
				+	b3Vector3			pi,px;
			
 
				+	switch(s)
			
 
				+	{
			
 
				+	case	(0+0+0):	px=b3MakeVector3(mi.x,mi.y,mi.z);
			
 
				+		pi=b3MakeVector3(mx.x,mx.y,mx.z);break;
			
 
				+	case	(1+0+0):	px=b3MakeVector3(mx.x,mi.y,mi.z);
			
 
				+		pi=b3MakeVector3(mi.x,mx.y,mx.z);break;
			
 
				+	case	(0+2+0):	px=b3MakeVector3(mi.x,mx.y,mi.z);
			
 
				+		pi=b3MakeVector3(mx.x,mi.y,mx.z);break;
			
 
				+	case	(1+2+0):	px=b3MakeVector3(mx.x,mx.y,mi.z);
			
 
				+		pi=b3MakeVector3(mi.x,mi.y,mx.z);break;
			
 
				+	case	(0+0+4):	px=b3MakeVector3(mi.x,mi.y,mx.z);
			
 
				+		pi=b3MakeVector3(mx.x,mx.y,mi.z);break;
			
 
				+	case	(1+0+4):	px=b3MakeVector3(mx.x,mi.y,mx.z);
			
 
				+		pi=b3MakeVector3(mi.x,mx.y,mi.z);break;
			
 
				+	case	(0+2+4):	px=b3MakeVector3(mi.x,mx.y,mx.z);
			
 
				+		pi=b3MakeVector3(mx.x,mi.y,mi.z);break;
			
 
				+	case	(1+2+4):	px=b3MakeVector3(mx.x,mx.y,mx.z);
			
 
				+		pi=b3MakeVector3(mi.x,mi.y,mi.z);break;
			
 
				+	}
			
 
				+	if((b3Dot(n,px)+o)<0)		return(-1);
			
 
				+	if((b3Dot(n,pi)+o)>=0)	return(+1);
			
 
				+	return(0);
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE b3Scalar	b3DbvtAabbMm::ProjectMinimum(const b3Vector3& v,unsigned signs) const
			
 
				+{
			
 
				+	const b3Vector3*	b[]={&mx,&mi};
			
 
				+	const b3Vector3		p = b3MakeVector3(	b[(signs>>0)&1]->x,
			
 
				+		b[(signs>>1)&1]->y,
			
 
				+		b[(signs>>2)&1]->z);
			
 
				+	return(b3Dot(p,v));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void		b3DbvtAabbMm::AddSpan(const b3Vector3& d,b3Scalar& smi,b3Scalar& smx) const
			
 
				+{
			
 
				+	for(int i=0;i<3;++i)
			
 
				+	{
			
 
				+		if(d[i]<0)
			
 
				+		{ smi+=mx[i]*d[i];smx+=mi[i]*d[i]; }
			
 
				+		else
			
 
				+		{ smi+=mi[i]*d[i];smx+=mx[i]*d[i]; }
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool		b3Intersect(	const b3DbvtAabbMm& a,
			
 
				+								  const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+#if	B3_DBVT_INT0_IMPL == B3_DBVT_IMPL_SSE
			
 
				+	const __m128	rt(_mm_or_ps(	_mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
			
 
				+		_mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
			
 
				+#if defined (_WIN32)
			
 
				+	const __int32*	pu((const __int32*)&rt);
			
 
				+#else
			
 
				+    const int*	pu((const int*)&rt);
			
 
				+#endif
			
 
				+	return((pu[0]|pu[1]|pu[2])==0);
			
 
				+#else
			
 
				+	return(	(a.mi.x<=b.mx.x)&&
			
 
				+		(a.mx.x>=b.mi.x)&&
			
 
				+		(a.mi.y<=b.mx.y)&&
			
 
				+		(a.mx.y>=b.mi.y)&&
			
 
				+		(a.mi.z<=b.mx.z)&&		
			
 
				+		(a.mx.z>=b.mi.z));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool		b3Intersect(	const b3DbvtAabbMm& a,
			
 
				+								  const b3Vector3& b)
			
 
				+{
			
 
				+	return(	(b.x>=a.mi.x)&&
			
 
				+		(b.y>=a.mi.y)&&
			
 
				+		(b.z>=a.mi.z)&&
			
 
				+		(b.x<=a.mx.x)&&
			
 
				+		(b.y<=a.mx.y)&&
			
 
				+		(b.z<=a.mx.z));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+//////////////////////////////////////
			
 
				+
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE b3Scalar	b3Proximity(	const b3DbvtAabbMm& a,
			
 
				+								  const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+	const b3Vector3	d=(a.mi+a.mx)-(b.mi+b.mx);
			
 
				+	return(b3Fabs(d.x)+b3Fabs(d.y)+b3Fabs(d.z));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE int			b3Select(	const b3DbvtAabbMm& o,
			
 
				+							   const b3DbvtAabbMm& a,
			
 
				+							   const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+#if	B3_DBVT_SELECT_IMPL == B3_DBVT_IMPL_SSE
			
 
				+    
			
 
				+#if defined (_WIN32)
			
 
				+	static B3_ATTRIBUTE_ALIGNED16(const unsigned __int32)	mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
			
 
				+#else
			
 
				+    static B3_ATTRIBUTE_ALIGNED16(const unsigned int)	mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x00000000 /*0x7fffffff*/};
			
 
				+#endif
			
 
				+	///@todo: the intrinsic version is 11% slower
			
 
				+#if B3_DBVT_USE_INTRINSIC_SSE
			
 
				+
			
 
				+	union b3SSEUnion ///NOTE: if we use more intrinsics, move b3SSEUnion into the LinearMath directory
			
 
				+	{
			
 
				+	   __m128		ssereg;
			
 
				+	   float		floats[4];
			
 
				+	   int			ints[4];
			
 
				+	};
			
 
				+
			
 
				+	__m128	omi(_mm_load_ps(o.mi));
			
 
				+	omi=_mm_add_ps(omi,_mm_load_ps(o.mx));
			
 
				+	__m128	ami(_mm_load_ps(a.mi));
			
 
				+	ami=_mm_add_ps(ami,_mm_load_ps(a.mx));
			
 
				+	ami=_mm_sub_ps(ami,omi);
			
 
				+	ami=_mm_and_ps(ami,_mm_load_ps((const float*)mask));
			
 
				+	__m128	bmi(_mm_load_ps(b.mi));
			
 
				+	bmi=_mm_add_ps(bmi,_mm_load_ps(b.mx));
			
 
				+	bmi=_mm_sub_ps(bmi,omi);
			
 
				+	bmi=_mm_and_ps(bmi,_mm_load_ps((const float*)mask));
			
 
				+	__m128	t0(_mm_movehl_ps(ami,ami));
			
 
				+	ami=_mm_add_ps(ami,t0);
			
 
				+	ami=_mm_add_ss(ami,_mm_shuffle_ps(ami,ami,1));
			
 
				+	__m128 t1(_mm_movehl_ps(bmi,bmi));
			
 
				+	bmi=_mm_add_ps(bmi,t1);
			
 
				+	bmi=_mm_add_ss(bmi,_mm_shuffle_ps(bmi,bmi,1));
			
 
				+	
			
 
				+	b3SSEUnion tmp;
			
 
				+	tmp.ssereg = _mm_cmple_ss(bmi,ami);
			
 
				+	return tmp.ints[0]&1;
			
 
				+
			
 
				+#else
			
 
				+	B3_ATTRIBUTE_ALIGNED16(__int32	r[1]);
			
 
				+	__asm
			
 
				+	{
			
 
				+		mov		eax,o
			
 
				+			mov		ecx,a
			
 
				+			mov		edx,b
			
 
				+			movaps	xmm0,[eax]
			
 
				+		movaps	xmm5,mask
			
 
				+			addps	xmm0,[eax+16]	
			
 
				+		movaps	xmm1,[ecx]
			
 
				+		movaps	xmm2,[edx]
			
 
				+		addps	xmm1,[ecx+16]
			
 
				+		addps	xmm2,[edx+16]
			
 
				+		subps	xmm1,xmm0
			
 
				+			subps	xmm2,xmm0
			
 
				+			andps	xmm1,xmm5
			
 
				+			andps	xmm2,xmm5
			
 
				+			movhlps	xmm3,xmm1
			
 
				+			movhlps	xmm4,xmm2
			
 
				+			addps	xmm1,xmm3
			
 
				+			addps	xmm2,xmm4
			
 
				+			pshufd	xmm3,xmm1,1
			
 
				+			pshufd	xmm4,xmm2,1
			
 
				+			addss	xmm1,xmm3
			
 
				+			addss	xmm2,xmm4
			
 
				+			cmpless	xmm2,xmm1
			
 
				+			movss	r,xmm2
			
 
				+	}
			
 
				+	return(r[0]&1);
			
 
				+#endif
			
 
				+#else
			
 
				+	return(b3Proximity(o,a)<b3Proximity(o,b)?0:1);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE void		b3Merge(	const b3DbvtAabbMm& a,
			
 
				+							  const b3DbvtAabbMm& b,
			
 
				+							  b3DbvtAabbMm& r)
			
 
				+{
			
 
				+#if B3_DBVT_MERGE_IMPL==B3_DBVT_IMPL_SSE
			
 
				+	__m128	ami(_mm_load_ps(a.mi));
			
 
				+	__m128	amx(_mm_load_ps(a.mx));
			
 
				+	__m128	bmi(_mm_load_ps(b.mi));
			
 
				+	__m128	bmx(_mm_load_ps(b.mx));
			
 
				+	ami=_mm_min_ps(ami,bmi);
			
 
				+	amx=_mm_max_ps(amx,bmx);
			
 
				+	_mm_store_ps(r.mi,ami);
			
 
				+	_mm_store_ps(r.mx,amx);
			
 
				+#else
			
 
				+	for(int i=0;i<3;++i)
			
 
				+	{
			
 
				+		if(a.mi[i]<b.mi[i]) r.mi[i]=a.mi[i]; else r.mi[i]=b.mi[i];
			
 
				+		if(a.mx[i]>b.mx[i]) r.mx[i]=a.mx[i]; else r.mx[i]=b.mx[i];
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_INLINE bool		b3NotEqual(	const b3DbvtAabbMm& a,
			
 
				+								 const b3DbvtAabbMm& b)
			
 
				+{
			
 
				+	return(	(a.mi.x!=b.mi.x)||
			
 
				+		(a.mi.y!=b.mi.y)||
			
 
				+		(a.mi.z!=b.mi.z)||
			
 
				+		(a.mx.x!=b.mx.x)||
			
 
				+		(a.mx.y!=b.mx.y)||
			
 
				+		(a.mx.z!=b.mx.z));
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+// Inline's
			
 
				+//
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::enumNodes(	const b3DbvtNode* root,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		policy.Process(root);
			
 
				+	if(root->isinternal())
			
 
				+	{
			
 
				+		enumNodes(root->childs[0],policy);
			
 
				+		enumNodes(root->childs[1],policy);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::enumLeaves(	const b3DbvtNode* root,
			
 
				+								   B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root->isinternal())
			
 
				+		{
			
 
				+			enumLeaves(root->childs[0],policy);
			
 
				+			enumLeaves(root->childs[1],policy);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			policy.Process(root);
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTT(	const b3DbvtNode* root0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root0&&root1)
			
 
				+		{
			
 
				+			int								depth=1;
			
 
				+			int								treshold=B3_DOUBLE_STACKSIZE-4;
			
 
				+			b3AlignedObjectArray<sStkNN>	stkStack;
			
 
				+			stkStack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+			stkStack[0]=sStkNN(root0,root1);
			
 
				+			do	{		
			
 
				+				sStkNN	p=stkStack[--depth];
			
 
				+				if(depth>treshold)
			
 
				+				{
			
 
				+					stkStack.resize(stkStack.size()*2);
			
 
				+					treshold=stkStack.size()-4;
			
 
				+				}
			
 
				+				if(p.a==p.b)
			
 
				+				{
			
 
				+					if(p.a->isinternal())
			
 
				+					{
			
 
				+						stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]);
			
 
				+						stkStack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]);
			
 
				+						stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]);
			
 
				+					}
			
 
				+				}
			
 
				+				else if(b3Intersect(p.a->volume,p.b->volume))
			
 
				+				{
			
 
				+					if(p.a->isinternal())
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							policy.Process(p.a,p.b);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			} while(depth);
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTTpersistentStack(	const b3DbvtNode* root0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root0&&root1)
			
 
				+		{
			
 
				+			int								depth=1;
			
 
				+			int								treshold=B3_DOUBLE_STACKSIZE-4;
			
 
				+			
			
 
				+			m_stkStack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+			m_stkStack[0]=sStkNN(root0,root1);
			
 
				+			do	{		
			
 
				+				sStkNN	p=m_stkStack[--depth];
			
 
				+				if(depth>treshold)
			
 
				+				{
			
 
				+					m_stkStack.resize(m_stkStack.size()*2);
			
 
				+					treshold=m_stkStack.size()-4;
			
 
				+				}
			
 
				+				if(p.a==p.b)
			
 
				+				{
			
 
				+					if(p.a->isinternal())
			
 
				+					{
			
 
				+						m_stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]);
			
 
				+						m_stkStack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]);
			
 
				+						m_stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]);
			
 
				+					}
			
 
				+				}
			
 
				+				else if(b3Intersect(p.a->volume,p.b->volume))
			
 
				+				{
			
 
				+					if(p.a->isinternal())
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{
			
 
				+							m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
			
 
				+							m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
			
 
				+							m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
			
 
				+							m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
			
 
				+							m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{
			
 
				+							m_stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
			
 
				+							m_stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							policy.Process(p.a,p.b);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			} while(depth);
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+#if 0
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTT(	const b3DbvtNode* root0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  const b3Transform& xform,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root0&&root1)
			
 
				+		{
			
 
				+			int								depth=1;
			
 
				+			int								treshold=B3_DOUBLE_STACKSIZE-4;
			
 
				+			b3AlignedObjectArray<sStkNN>	stkStack;
			
 
				+			stkStack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+			stkStack[0]=sStkNN(root0,root1);
			
 
				+			do	{
			
 
				+				sStkNN	p=stkStack[--depth];
			
 
				+				if(b3Intersect(p.a->volume,p.b->volume,xform))
			
 
				+				{
			
 
				+					if(depth>treshold)
			
 
				+					{
			
 
				+						stkStack.resize(stkStack.size()*2);
			
 
				+						treshold=stkStack.size()-4;
			
 
				+					}
			
 
				+					if(p.a->isinternal())
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{					
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
			
 
				+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if(p.b->isinternal())
			
 
				+						{
			
 
				+							stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
			
 
				+							stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							policy.Process(p.a,p.b);
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+			} while(depth);
			
 
				+		}
			
 
				+}
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTT(	const b3DbvtNode* root0,
			
 
				+								  const b3Transform& xform0,
			
 
				+								  const b3DbvtNode* root1,
			
 
				+								  const b3Transform& xform1,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	const b3Transform	xform=xform0.inverse()*xform1;
			
 
				+	collideTT(root0,root1,xform,policy);
			
 
				+}
			
 
				+#endif 
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTV(	const b3DbvtNode* root,
			
 
				+								  const b3DbvtVolume& vol,
			
 
				+								  B3_DBVT_IPOLICY) const
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root)
			
 
				+		{
			
 
				+			B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)		volume(vol);
			
 
				+			b3AlignedObjectArray<const b3DbvtNode*>	stack;
			
 
				+			stack.resize(0);
			
 
				+			stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+			stack.push_back(root);
			
 
				+			do	{
			
 
				+				const b3DbvtNode*	n=stack[stack.size()-1];
			
 
				+				stack.pop_back();
			
 
				+				if(b3Intersect(n->volume,volume))
			
 
				+				{
			
 
				+					if(n->isinternal())
			
 
				+					{
			
 
				+						stack.push_back(n->childs[0]);
			
 
				+						stack.push_back(n->childs[1]);
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						policy.Process(n);
			
 
				+					}
			
 
				+				}
			
 
				+			} while(stack.size()>0);
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::rayTestInternal(	const b3DbvtNode* root,
			
 
				+								const b3Vector3& rayFrom,
			
 
				+								const b3Vector3& rayTo,
			
 
				+								const b3Vector3& rayDirectionInverse,
			
 
				+								unsigned int signs[3],
			
 
				+								b3Scalar lambda_max,
			
 
				+								const b3Vector3& aabbMin,
			
 
				+								const b3Vector3& aabbMax,
			
 
				+								B3_DBVT_IPOLICY) const
			
 
				+{
			
 
				+        (void) rayTo;
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+	if(root)
			
 
				+	{
			
 
				+		b3Vector3 resultNormal;
			
 
				+
			
 
				+		int								depth=1;
			
 
				+		int								treshold=B3_DOUBLE_STACKSIZE-2;
			
 
				+		b3AlignedObjectArray<const b3DbvtNode*>&	stack = m_rayTestStack;
			
 
				+		stack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+		stack[0]=root;
			
 
				+		b3Vector3 bounds[2];
			
 
				+		do	
			
 
				+		{
			
 
				+			const b3DbvtNode*	node=stack[--depth];
			
 
				+			bounds[0] = node->volume.Mins()-aabbMax;
			
 
				+			bounds[1] = node->volume.Maxs()-aabbMin;
			
 
				+			b3Scalar tmin=1.f,lambda_min=0.f;
			
 
				+			unsigned int result1=false;
			
 
				+			result1 = b3RayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
			
 
				+			if(result1)
			
 
				+			{
			
 
				+				if(node->isinternal())
			
 
				+				{
			
 
				+					if(depth>treshold)
			
 
				+					{
			
 
				+						stack.resize(stack.size()*2);
			
 
				+						treshold=stack.size()-2;
			
 
				+					}
			
 
				+					stack[depth++]=node->childs[0];
			
 
				+					stack[depth++]=node->childs[1];
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					policy.Process(node);
			
 
				+				}
			
 
				+			}
			
 
				+		} while(depth);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::rayTest(	const b3DbvtNode* root,
			
 
				+								const b3Vector3& rayFrom,
			
 
				+								const b3Vector3& rayTo,
			
 
				+								B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root)
			
 
				+		{
			
 
				+			b3Vector3 rayDir = (rayTo-rayFrom);
			
 
				+			rayDir.normalize ();
			
 
				+
			
 
				+			///what about division by zero? --> just set rayDirection[i] to INF/B3_LARGE_FLOAT
			
 
				+			b3Vector3 rayDirectionInverse;
			
 
				+			rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0];
			
 
				+			rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1];
			
 
				+			rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2];
			
 
				+			unsigned int signs[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};
			
 
				+
			
 
				+			b3Scalar lambda_max = rayDir.dot(rayTo-rayFrom);
			
 
				+
			
 
				+			b3Vector3 resultNormal;
			
 
				+
			
 
				+			b3AlignedObjectArray<const b3DbvtNode*>	stack;
			
 
				+
			
 
				+			int								depth=1;
			
 
				+			int								treshold=B3_DOUBLE_STACKSIZE-2;
			
 
				+
			
 
				+			stack.resize(B3_DOUBLE_STACKSIZE);
			
 
				+			stack[0]=root;
			
 
				+			b3Vector3 bounds[2];
			
 
				+			do	{
			
 
				+				const b3DbvtNode*	node=stack[--depth];
			
 
				+
			
 
				+				bounds[0] = node->volume.Mins();
			
 
				+				bounds[1] = node->volume.Maxs();
			
 
				+				
			
 
				+				b3Scalar tmin=1.f,lambda_min=0.f;
			
 
				+				unsigned int result1 = b3RayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
			
 
				+
			
 
				+#ifdef COMPARE_BTRAY_AABB2
			
 
				+				b3Scalar param=1.f;
			
 
				+				bool result2 = b3RayAabb(rayFrom,rayTo,node->volume.Mins(),node->volume.Maxs(),param,resultNormal);
			
 
				+				b3Assert(result1 == result2);
			
 
				+#endif //TEST_BTRAY_AABB2
			
 
				+
			
 
				+				if(result1)
			
 
				+				{
			
 
				+					if(node->isinternal())
			
 
				+					{
			
 
				+						if(depth>treshold)
			
 
				+						{
			
 
				+							stack.resize(stack.size()*2);
			
 
				+							treshold=stack.size()-2;
			
 
				+						}
			
 
				+						stack[depth++]=node->childs[0];
			
 
				+						stack[depth++]=node->childs[1];
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						policy.Process(node);
			
 
				+					}
			
 
				+				}
			
 
				+			} while(depth);
			
 
				+
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideKDOP(const b3DbvtNode* root,
			
 
				+									const b3Vector3* normals,
			
 
				+									const b3Scalar* offsets,
			
 
				+									int count,
			
 
				+									B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root)
			
 
				+		{
			
 
				+			const int						inside=(1<<count)-1;
			
 
				+			b3AlignedObjectArray<sStkNP>	stack;
			
 
				+			int								signs[sizeof(unsigned)*8];
			
 
				+			b3Assert(count<int (sizeof(signs)/sizeof(signs[0])));
			
 
				+			for(int i=0;i<count;++i)
			
 
				+			{
			
 
				+				signs[i]=	((normals[i].x>=0)?1:0)+
			
 
				+					((normals[i].y>=0)?2:0)+
			
 
				+					((normals[i].z>=0)?4:0);
			
 
				+			}
			
 
				+			stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+			stack.push_back(sStkNP(root,0));
			
 
				+			do	{
			
 
				+				sStkNP	se=stack[stack.size()-1];
			
 
				+				bool	out=false;
			
 
				+				stack.pop_back();
			
 
				+				for(int i=0,j=1;(!out)&&(i<count);++i,j<<=1)
			
 
				+				{
			
 
				+					if(0==(se.mask&j))
			
 
				+					{
			
 
				+						const int	side=se.node->volume.Classify(normals[i],offsets[i],signs[i]);
			
 
				+						switch(side)
			
 
				+						{
			
 
				+						case	-1:	out=true;break;
			
 
				+						case	+1:	se.mask|=j;break;
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				if(!out)
			
 
				+				{
			
 
				+					if((se.mask!=inside)&&(se.node->isinternal()))
			
 
				+					{
			
 
				+						stack.push_back(sStkNP(se.node->childs[0],se.mask));
			
 
				+						stack.push_back(sStkNP(se.node->childs[1],se.mask));
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						if(policy.AllLeaves(se.node)) enumLeaves(se.node,policy);
			
 
				+					}
			
 
				+				}
			
 
				+			} while(stack.size());
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideOCL(	const b3DbvtNode* root,
			
 
				+								   const b3Vector3* normals,
			
 
				+								   const b3Scalar* offsets,
			
 
				+								   const b3Vector3& sortaxis,
			
 
				+								   int count,
			
 
				+								   B3_DBVT_IPOLICY,
			
 
				+								   bool fsort)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root)
			
 
				+		{
			
 
				+			const unsigned					srtsgns=(sortaxis[0]>=0?1:0)+
			
 
				+				(sortaxis[1]>=0?2:0)+
			
 
				+				(sortaxis[2]>=0?4:0);
			
 
				+			const int						inside=(1<<count)-1;
			
 
				+			b3AlignedObjectArray<sStkNPS>	stock;
			
 
				+			b3AlignedObjectArray<int>		ifree;
			
 
				+			b3AlignedObjectArray<int>		stack;
			
 
				+			int								signs[sizeof(unsigned)*8];
			
 
				+			b3Assert(count<int (sizeof(signs)/sizeof(signs[0])));
			
 
				+			for(int i=0;i<count;++i)
			
 
				+			{
			
 
				+				signs[i]=	((normals[i].x>=0)?1:0)+
			
 
				+					((normals[i].y>=0)?2:0)+
			
 
				+					((normals[i].z>=0)?4:0);
			
 
				+			}
			
 
				+			stock.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+			stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+			ifree.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+			stack.push_back(allocate(ifree,stock,sStkNPS(root,0,root->volume.ProjectMinimum(sortaxis,srtsgns))));
			
 
				+			do	{
			
 
				+				const int	id=stack[stack.size()-1];
			
 
				+				sStkNPS		se=stock[id];
			
 
				+				stack.pop_back();ifree.push_back(id);
			
 
				+				if(se.mask!=inside)
			
 
				+				{
			
 
				+					bool	out=false;
			
 
				+					for(int i=0,j=1;(!out)&&(i<count);++i,j<<=1)
			
 
				+					{
			
 
				+						if(0==(se.mask&j))
			
 
				+						{
			
 
				+							const int	side=se.node->volume.Classify(normals[i],offsets[i],signs[i]);
			
 
				+							switch(side)
			
 
				+							{
			
 
				+							case	-1:	out=true;break;
			
 
				+							case	+1:	se.mask|=j;break;
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+					if(out) continue;
			
 
				+				}
			
 
				+				if(policy.Descent(se.node))
			
 
				+				{
			
 
				+					if(se.node->isinternal())
			
 
				+					{
			
 
				+						const b3DbvtNode* pns[]={	se.node->childs[0],se.node->childs[1]};
			
 
				+						sStkNPS		nes[]={	sStkNPS(pns[0],se.mask,pns[0]->volume.ProjectMinimum(sortaxis,srtsgns)),
			
 
				+							sStkNPS(pns[1],se.mask,pns[1]->volume.ProjectMinimum(sortaxis,srtsgns))};
			
 
				+						const int	q=nes[0].value<nes[1].value?1:0;				
			
 
				+						int			j=stack.size();
			
 
				+						if(fsort&&(j>0))
			
 
				+						{
			
 
				+							/* Insert 0	*/ 
			
 
				+							j=nearest(&stack[0],&stock[0],nes[q].value,0,stack.size());
			
 
				+							stack.push_back(0);
			
 
				+#if B3_DBVT_USE_MEMMOVE
			
 
				+							memmove(&stack[j+1],&stack[j],sizeof(int)*(stack.size()-j-1));
			
 
				+#else
			
 
				+							for(int k=stack.size()-1;k>j;--k) stack[k]=stack[k-1];
			
 
				+#endif
			
 
				+							stack[j]=allocate(ifree,stock,nes[q]);
			
 
				+							/* Insert 1	*/ 
			
 
				+							j=nearest(&stack[0],&stock[0],nes[1-q].value,j,stack.size());
			
 
				+							stack.push_back(0);
			
 
				+#if B3_DBVT_USE_MEMMOVE
			
 
				+							memmove(&stack[j+1],&stack[j],sizeof(int)*(stack.size()-j-1));
			
 
				+#else
			
 
				+							for(int k=stack.size()-1;k>j;--k) stack[k]=stack[k-1];
			
 
				+#endif
			
 
				+							stack[j]=allocate(ifree,stock,nes[1-q]);
			
 
				+						}
			
 
				+						else
			
 
				+						{
			
 
				+							stack.push_back(allocate(ifree,stock,nes[q]));
			
 
				+							stack.push_back(allocate(ifree,stock,nes[1-q]));
			
 
				+						}
			
 
				+					}
			
 
				+					else
			
 
				+					{
			
 
				+						policy.Process(se.node,se.value);
			
 
				+					}
			
 
				+				}
			
 
				+			} while(stack.size());
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+B3_DBVT_PREFIX
			
 
				+inline void		b3DynamicBvh::collideTU(	const b3DbvtNode* root,
			
 
				+								  B3_DBVT_IPOLICY)
			
 
				+{
			
 
				+	B3_DBVT_CHECKTYPE
			
 
				+		if(root)
			
 
				+		{
			
 
				+			b3AlignedObjectArray<const b3DbvtNode*>	stack;
			
 
				+			stack.reserve(B3_SIMPLE_STACKSIZE);
			
 
				+			stack.push_back(root);
			
 
				+			do	{
			
 
				+				const b3DbvtNode*	n=stack[stack.size()-1];
			
 
				+				stack.pop_back();
			
 
				+				if(policy.Descent(n))
			
 
				+				{
			
 
				+					if(n->isinternal())
			
 
				+					{ stack.push_back(n->childs[0]);stack.push_back(n->childs[1]); }
			
 
				+					else
			
 
				+					{ policy.Process(n); }
			
 
				+				}
			
 
				+			} while(stack.size()>0);
			
 
				+		}
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+// PP Cleanup
			
 
				+//
			
 
				+
			
 
				+#undef B3_DBVT_USE_MEMMOVE
			
 
				+#undef B3_DBVT_USE_TEMPLATE
			
 
				+#undef B3_DBVT_VIRTUAL_DTOR
			
 
				+#undef B3_DBVT_VIRTUAL
			
 
				+#undef B3_DBVT_PREFIX
			
 
				+#undef B3_DBVT_IPOLICY
			
 
				+#undef B3_DBVT_CHECKTYPE
			
 
				+#undef B3_DBVT_IMPL_GENERIC
			
 
				+#undef B3_DBVT_IMPL_SSE
			
 
				+#undef B3_DBVT_USE_INTRINSIC_SSE
			
 
				+#undef B3_DBVT_SELECT_IMPL
			
 
				+#undef B3_DBVT_MERGE_IMPL
			
 
				+#undef B3_DBVT_INT0_IMPL
			
 
				+
			
 
				+#endif
			
--- a/include/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h
+++ b/include/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h
@@ -0,0 +1,208 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+///b3DynamicBvhBroadphase implementation by Nathanael Presson
			
 
				+#ifndef B3_DBVT_BROADPHASE_H
			
 
				+#define B3_DBVT_BROADPHASE_H
			
 
				+
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+#include "b3BroadphaseCallback.h"
			
 
				+
			
 
				+//
			
 
				+// Compile time config
			
 
				+//
			
 
				+
			
 
				+#define	B3_DBVT_BP_PROFILE					0
			
 
				+//#define B3_DBVT_BP_SORTPAIRS				1
			
 
				+#define B3_DBVT_BP_PREVENTFALSEUPDATE		0
			
 
				+#define B3_DBVT_BP_ACCURATESLEEPING		0
			
 
				+#define B3_DBVT_BP_ENABLE_BENCHMARK		0
			
 
				+#define B3_DBVT_BP_MARGIN					(b3Scalar)0.05
			
 
				+
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+#define	B3_DBVT_BP_PROFILING_RATE	256
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3BroadphaseProxy
			
 
				+{
			
 
				+
			
 
				+B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+	
			
 
				+	///optional filtering to cull potential collisions
			
 
				+	enum CollisionFilterGroups
			
 
				+	{
			
 
				+	        DefaultFilter = 1,
			
 
				+	        StaticFilter = 2,
			
 
				+	        KinematicFilter = 4,
			
 
				+	        DebrisFilter = 8,
			
 
				+			SensorTrigger = 16,
			
 
				+			CharacterFilter = 32,
			
 
				+	        AllFilter = -1 //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger
			
 
				+	};
			
 
				+
			
 
				+	//Usually the client b3CollisionObject or Rigidbody class
			
 
				+	void*	m_clientObject;
			
 
				+	short int m_collisionFilterGroup;
			
 
				+	short int m_collisionFilterMask;
			
 
				+	void*	m_multiSapParentProxy;		
			
 
				+	int			m_uniqueId;//m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc.
			
 
				+
			
 
				+	b3Vector3	m_aabbMin;
			
 
				+	b3Vector3	m_aabbMax;
			
 
				+
			
 
				+	B3_FORCE_INLINE int getUid() const
			
 
				+	{
			
 
				+		return m_uniqueId;
			
 
				+	}
			
 
				+
			
 
				+	//used for memory pools
			
 
				+	b3BroadphaseProxy() :m_clientObject(0),m_multiSapParentProxy(0)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	b3BroadphaseProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask,void* multiSapParentProxy=0)
			
 
				+		:m_clientObject(userPtr),
			
 
				+		m_collisionFilterGroup(collisionFilterGroup),
			
 
				+		m_collisionFilterMask(collisionFilterMask),
			
 
				+		m_aabbMin(aabbMin),
			
 
				+		m_aabbMax(aabbMax)
			
 
				+	{
			
 
				+		m_multiSapParentProxy = multiSapParentProxy;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+//
			
 
				+// b3DbvtProxy
			
 
				+//
			
 
				+struct b3DbvtProxy : b3BroadphaseProxy
			
 
				+{
			
 
				+	/* Fields		*/ 
			
 
				+	//b3DbvtAabbMm	aabb;
			
 
				+	b3DbvtNode*		leaf;
			
 
				+	b3DbvtProxy*	links[2];
			
 
				+	int				stage;
			
 
				+	/* ctor			*/ 
			
 
				+
			
 
				+	explicit b3DbvtProxy() {}
			
 
				+	b3DbvtProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask) :
			
 
				+	b3BroadphaseProxy(aabbMin,aabbMax,userPtr,collisionFilterGroup,collisionFilterMask)
			
 
				+	{
			
 
				+		links[0]=links[1]=0;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+typedef b3AlignedObjectArray<b3DbvtProxy*>	b3DbvtProxyArray;
			
 
				+
			
 
				+///The b3DynamicBvhBroadphase implements a broadphase using two dynamic AABB bounding volume hierarchies/trees (see b3DynamicBvh).
			
 
				+///One tree is used for static/non-moving objects, and another tree is used for dynamic objects. Objects can move from one tree to the other.
			
 
				+///This is a very fast broadphase, especially for very dynamic worlds where many objects are moving. Its insert/add and remove of objects is generally faster than the sweep and prune broadphases b3AxisSweep3 and b332BitAxisSweep3.
			
 
				+struct	b3DynamicBvhBroadphase 
			
 
				+{
			
 
				+	/* Config		*/ 
			
 
				+	enum	{
			
 
				+		DYNAMIC_SET			=	0,	/* Dynamic set index	*/ 
			
 
				+		FIXED_SET			=	1,	/* Fixed set index		*/ 
			
 
				+		STAGECOUNT			=	2	/* Number of stages		*/ 
			
 
				+	};
			
 
				+	/* Fields		*/ 
			
 
				+	b3DynamicBvh					m_sets[2];					// Dbvt sets
			
 
				+	b3DbvtProxy*			m_stageRoots[STAGECOUNT+1];	// Stages list
			
 
				+
			
 
				+	b3AlignedObjectArray<b3DbvtProxy>	m_proxies;
			
 
				+	b3OverlappingPairCache*	m_paircache;				// Pair cache
			
 
				+	b3Scalar				m_prediction;				// Velocity prediction
			
 
				+	int						m_stageCurrent;				// Current stage
			
 
				+	int						m_fupdates;					// % of fixed updates per frame
			
 
				+	int						m_dupdates;					// % of dynamic updates per frame
			
 
				+	int						m_cupdates;					// % of cleanup updates per frame
			
 
				+	int						m_newpairs;					// Number of pairs created
			
 
				+	int						m_fixedleft;				// Fixed optimization left
			
 
				+	unsigned				m_updates_call;				// Number of updates call
			
 
				+	unsigned				m_updates_done;				// Number of updates done
			
 
				+	b3Scalar				m_updates_ratio;			// m_updates_done/m_updates_call
			
 
				+	int						m_pid;						// Parse id
			
 
				+	int						m_cid;						// Cleanup index
			
 
				+	bool					m_releasepaircache;			// Release pair cache on delete
			
 
				+	bool					m_deferedcollide;			// Defere dynamic/static collision to collide call
			
 
				+	bool					m_needcleanup;				// Need to run cleanup?
			
 
				+#if B3_DBVT_BP_PROFILE
			
 
				+	b3Clock					m_clock;
			
 
				+	struct	{
			
 
				+		unsigned long		m_total;
			
 
				+		unsigned long		m_ddcollide;
			
 
				+		unsigned long		m_fdcollide;
			
 
				+		unsigned long		m_cleanup;
			
 
				+		unsigned long		m_jobcount;
			
 
				+	}				m_profiling;
			
 
				+#endif
			
 
				+	/* Methods		*/ 
			
 
				+	b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache=0);
			
 
				+	~b3DynamicBvhBroadphase();
			
 
				+	void							collide(b3Dispatcher* dispatcher);
			
 
				+	void							optimize();
			
 
				+	
			
 
				+	/* b3BroadphaseInterface Implementation	*/
			
 
				+	b3BroadphaseProxy*				createProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,int objectIndex,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask);
			
 
				+	virtual void					destroyProxy(b3BroadphaseProxy* proxy,b3Dispatcher* dispatcher);
			
 
				+	virtual void					setAabb(int objectId,const b3Vector3& aabbMin,const b3Vector3& aabbMax,b3Dispatcher* dispatcher);
			
 
				+	virtual void					rayTest(const b3Vector3& rayFrom,const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin=b3MakeVector3(0,0,0), const b3Vector3& aabbMax = b3MakeVector3(0,0,0));
			
 
				+	virtual void					aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& callback);
			
 
				+
			
 
				+	//virtual void					getAabb(b3BroadphaseProxy* proxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const;
			
 
				+	virtual void					getAabb(int objectId,b3Vector3& aabbMin, b3Vector3& aabbMax ) const;
			
 
				+	virtual	void					calculateOverlappingPairs(b3Dispatcher* dispatcher=0);
			
 
				+	virtual	b3OverlappingPairCache*	getOverlappingPairCache();
			
 
				+	virtual	const b3OverlappingPairCache*	getOverlappingPairCache() const;
			
 
				+	virtual	void					getBroadphaseAabb(b3Vector3& aabbMin,b3Vector3& aabbMax) const;
			
 
				+	virtual	void					printStats();
			
 
				+
			
 
				+
			
 
				+	///reset broadphase internal structures, to ensure determinism/reproducability
			
 
				+	virtual void resetPool(b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	void	performDeferredRemoval(b3Dispatcher* dispatcher);
			
 
				+	
			
 
				+	void	setVelocityPrediction(b3Scalar prediction)
			
 
				+	{
			
 
				+		m_prediction = prediction;
			
 
				+	}
			
 
				+	b3Scalar getVelocityPrediction() const
			
 
				+	{
			
 
				+		return m_prediction;
			
 
				+	}
			
 
				+
			
 
				+	///this setAabbForceUpdate is similar to setAabb but always forces the aabb update. 
			
 
				+	///it is not part of the b3BroadphaseInterface but specific to b3DynamicBvhBroadphase.
			
 
				+	///it bypasses certain optimizations that prevent aabb updates (when the aabb shrinks), see
			
 
				+	///http://code.google.com/p/bullet/issues/detail?id=223
			
 
				+	void							setAabbForceUpdate(		b3BroadphaseProxy* absproxy,const b3Vector3& aabbMin,const b3Vector3& aabbMax,b3Dispatcher* /*dispatcher*/);
			
 
				+
			
 
				+	//static void						benchmark(b3BroadphaseInterface*);
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/include/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h
+++ b/include/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h
@@ -0,0 +1,72 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_OVERLAPPING_PAIR_H
			
 
				+#define B3_OVERLAPPING_PAIR_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+
			
 
				+#define B3_NEW_PAIR_MARKER -1
			
 
				+#define B3_REMOVED_PAIR_MARKER -2
			
 
				+
			
 
				+typedef b3Int4 b3BroadphasePair;
			
 
				+
			
 
				+inline b3Int4 b3MakeBroadphasePair(int xx,int yy)
			
 
				+{
			
 
				+	b3Int4 pair;
			
 
				+
			
 
				+	if (xx < yy)
			
 
				+    { 
			
 
				+        pair.x = xx; 
			
 
				+        pair.y = yy;
			
 
				+    }
			
 
				+    else 
			
 
				+    { 
			
 
				+		pair.x = yy;
			
 
				+        pair.y = xx;
			
 
				+    }
			
 
				+	pair.z = B3_NEW_PAIR_MARKER;
			
 
				+	pair.w = B3_NEW_PAIR_MARKER;
			
 
				+	return pair;
			
 
				+}
			
 
				+
			
 
				+/*struct b3BroadphasePair : public b3Int4
			
 
				+{
			
 
				+	explicit b3BroadphasePair(){}
			
 
				+	
			
 
				+};
			
 
				+*/
			
 
				+
			
 
				+class b3BroadphasePairSortPredicate
			
 
				+{
			
 
				+	public:
			
 
				+
			
 
				+		bool operator() ( const b3BroadphasePair& a, const b3BroadphasePair& b ) const
			
 
				+		{
			
 
				+			const int uidA0 = a.x;
			
 
				+			const int uidB0 = b.x;
			
 
				+			const int uidA1 = a.y;
			
 
				+			const int uidB1 = b.y;
			
 
				+			return uidA0 > uidB0 || (uidA0 == uidB0 && uidA1 > uidB1); 
			
 
				+		}
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE bool operator==(const b3BroadphasePair& a, const b3BroadphasePair& b) 
			
 
				+{
			
 
				+	 return (a.x == b.x ) && (a.y == b.y );
			
 
				+}
			
 
				+
			
 
				+#endif //B3_OVERLAPPING_PAIR_H
			
 
				+
			
--- a/include/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h
+++ b/include/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h
@@ -0,0 +1,474 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_OVERLAPPING_PAIR_CACHE_H
			
 
				+#define B3_OVERLAPPING_PAIR_CACHE_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+class b3Dispatcher;
			
 
				+#include "b3OverlappingPair.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+typedef b3AlignedObjectArray<b3BroadphasePair>	b3BroadphasePairArray;
			
 
				+
			
 
				+struct	b3OverlapCallback
			
 
				+{
			
 
				+	virtual ~b3OverlapCallback()
			
 
				+	{}
			
 
				+	//return true for deletion of the pair
			
 
				+	virtual bool	processOverlap(b3BroadphasePair& pair) = 0;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+struct b3OverlapFilterCallback
			
 
				+{
			
 
				+	virtual ~b3OverlapFilterCallback()
			
 
				+	{}
			
 
				+	// return true when pairs need collision
			
 
				+	virtual bool	needBroadphaseCollision(int proxy0,int proxy1) const = 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+extern int b3g_removePairs;
			
 
				+extern int b3g_addedPairs;
			
 
				+extern int b3g_findPairs;
			
 
				+
			
 
				+const int B3_NULL_PAIR=0xffffffff;
			
 
				+
			
 
				+///The b3OverlappingPairCache provides an interface for overlapping pair management (add, remove, storage), used by the b3BroadphaseInterface broadphases.
			
 
				+///The b3HashedOverlappingPairCache and b3SortedOverlappingPairCache classes are two implementations.
			
 
				+class b3OverlappingPairCache 
			
 
				+{
			
 
				+public:
			
 
				+	virtual ~b3OverlappingPairCache() {} // this is needed so we can get to the derived class destructor
			
 
				+
			
 
				+	virtual b3BroadphasePair*	getOverlappingPairArrayPtr() = 0;
			
 
				+	
			
 
				+	virtual const b3BroadphasePair*	getOverlappingPairArrayPtr() const = 0;
			
 
				+
			
 
				+	virtual b3BroadphasePairArray&	getOverlappingPairArray() = 0;
			
 
				+
			
 
				+	virtual	void	cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+	virtual int getNumOverlappingPairs() const = 0;
			
 
				+
			
 
				+	virtual void	cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+	virtual	void setOverlapFilterCallback(b3OverlapFilterCallback* callback) = 0;
			
 
				+
			
 
				+	virtual void	processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+	virtual b3BroadphasePair* findPair(int proxy0, int proxy1) = 0;
			
 
				+
			
 
				+	virtual bool	hasDeferredRemoval() = 0;
			
 
				+
			
 
				+	//virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)=0;
			
 
				+
			
 
				+	virtual b3BroadphasePair* 	addOverlappingPair(int proxy0,int proxy1)=0;
			
 
				+	virtual void*	removeOverlappingPair(int proxy0,int proxy1,b3Dispatcher* dispatcher)=0;
			
 
				+	virtual void	removeOverlappingPairsContainingProxy(int /*proxy0*/,b3Dispatcher* /*dispatcher*/)=0;
			
 
				+
			
 
				+	virtual void	sortOverlappingPairs(b3Dispatcher* dispatcher) = 0;
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+/// Hash-space based Pair Cache, thanks to Erin Catto, Box2D, http://www.box2d.org, and Pierre Terdiman, Codercorner, http://codercorner.com
			
 
				+class b3HashedOverlappingPairCache : public b3OverlappingPairCache
			
 
				+{
			
 
				+	b3BroadphasePairArray	m_overlappingPairArray;
			
 
				+	b3OverlapFilterCallback* m_overlapFilterCallback;
			
 
				+	bool		m_blockedForChanges;
			
 
				+
			
 
				+
			
 
				+public:
			
 
				+	b3HashedOverlappingPairCache();
			
 
				+	virtual ~b3HashedOverlappingPairCache();
			
 
				+
			
 
				+	
			
 
				+	virtual void	removeOverlappingPairsContainingProxy(int proxy,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	virtual void*	removeOverlappingPair(int proxy0,int proxy1,b3Dispatcher* dispatcher);
			
 
				+	
			
 
				+	B3_FORCE_INLINE bool needsBroadphaseCollision(int proxy0,int proxy1) const
			
 
				+	{
			
 
				+		if (m_overlapFilterCallback)
			
 
				+			return m_overlapFilterCallback->needBroadphaseCollision(proxy0,proxy1);
			
 
				+
			
 
				+		bool collides = true;//(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
			
 
				+		//collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
			
 
				+		
			
 
				+		return collides;
			
 
				+	}
			
 
				+
			
 
				+	// Add a pair and return the new pair. If the pair already exists,
			
 
				+	// no new pair is created and the old one is returned.
			
 
				+	virtual b3BroadphasePair* 	addOverlappingPair(int proxy0,int proxy1)
			
 
				+	{
			
 
				+		b3g_addedPairs++;
			
 
				+
			
 
				+		if (!needsBroadphaseCollision(proxy0,proxy1))
			
 
				+			return 0;
			
 
				+
			
 
				+		return internalAddPair(proxy0,proxy1);
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	void	cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	
			
 
				+	virtual void	processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+	virtual b3BroadphasePair*	getOverlappingPairArrayPtr()
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+
			
 
				+	const b3BroadphasePair*	getOverlappingPairArrayPtr() const
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+
			
 
				+	b3BroadphasePairArray&	getOverlappingPairArray()
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	const b3BroadphasePairArray&	getOverlappingPairArray() const
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+
			
 
				+	void	cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+
			
 
				+
			
 
				+	b3BroadphasePair* findPair(int proxy0, int proxy1);
			
 
				+
			
 
				+	int GetCount() const { return m_overlappingPairArray.size(); }
			
 
				+//	b3BroadphasePair* GetPairs() { return m_pairs; }
			
 
				+
			
 
				+	b3OverlapFilterCallback* getOverlapFilterCallback()
			
 
				+	{
			
 
				+		return m_overlapFilterCallback;
			
 
				+	}
			
 
				+
			
 
				+	void setOverlapFilterCallback(b3OverlapFilterCallback* callback)
			
 
				+	{
			
 
				+		m_overlapFilterCallback = callback;
			
 
				+	}
			
 
				+
			
 
				+	int	getNumOverlappingPairs() const
			
 
				+	{
			
 
				+		return m_overlappingPairArray.size();
			
 
				+	}
			
 
				+private:
			
 
				+	
			
 
				+	b3BroadphasePair* 	internalAddPair(int proxy0,int proxy1);
			
 
				+
			
 
				+	void	growTables();
			
 
				+
			
 
				+	B3_FORCE_INLINE bool equalsPair(const b3BroadphasePair& pair, int proxyId1, int proxyId2)
			
 
				+	{	
			
 
				+		return pair.x == proxyId1 && pair.y  == proxyId2;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	// Thomas Wang's hash, see: http://www.concentric.net/~Ttwang/tech/inthash.htm
			
 
				+	// This assumes proxyId1 and proxyId2 are 16-bit.
			
 
				+	B3_FORCE_INLINE int getHash(int proxyId1, int proxyId2)
			
 
				+	{
			
 
				+		int key = (proxyId2 << 16) | proxyId1;
			
 
				+		key = ~key + (key << 15);
			
 
				+		key = key ^ (key >> 12);
			
 
				+		key = key + (key << 2);
			
 
				+		key = key ^ (key >> 4);
			
 
				+		key = key * 2057;
			
 
				+		key = key ^ (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+
			
 
				+	
			
 
				+	B3_FORCE_INLINE	unsigned int getHash(unsigned int proxyId1, unsigned int proxyId2)
			
 
				+	{
			
 
				+		int key = static_cast<int>(((unsigned int)proxyId1) | (((unsigned int)proxyId2) <<16));
			
 
				+		// Thomas Wang's hash
			
 
				+
			
 
				+		key += ~(key << 15);
			
 
				+		key ^=  (key >> 10);
			
 
				+		key +=  (key << 3);
			
 
				+		key ^=  (key >> 6);
			
 
				+		key += ~(key << 11);
			
 
				+		key ^=  (key >> 16);
			
 
				+		return static_cast<unsigned int>(key);
			
 
				+	}
			
 
				+	
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	B3_FORCE_INLINE b3BroadphasePair* internalFindPair(int proxy0, int proxy1, int hash)
			
 
				+	{
			
 
				+		int proxyId1 = proxy0;
			
 
				+		int proxyId2 = proxy1;
			
 
				+		#if 0 // wrong, 'equalsPair' use unsorted uids, copy-past devil striked again. Nat.
			
 
				+		if (proxyId1 > proxyId2) 
			
 
				+			b3Swap(proxyId1, proxyId2);
			
 
				+		#endif
			
 
				+
			
 
				+		int index = m_hashTable[hash];
			
 
				+		
			
 
				+		while( index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
			
 
				+		{
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+
			
 
				+		if ( index == B3_NULL_PAIR )
			
 
				+		{
			
 
				+			return NULL;
			
 
				+		}
			
 
				+
			
 
				+		b3Assert(index < m_overlappingPairArray.size());
			
 
				+
			
 
				+		return &m_overlappingPairArray[index];
			
 
				+	}
			
 
				+
			
 
				+	virtual bool	hasDeferredRemoval()
			
 
				+	{
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+/*	virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)
			
 
				+	{
			
 
				+		m_ghostPairCallback = ghostPairCallback;
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+	virtual void	sortOverlappingPairs(b3Dispatcher* dispatcher);
			
 
				+	
			
 
				+
			
 
				+protected:
			
 
				+	
			
 
				+	b3AlignedObjectArray<int>	m_hashTable;
			
 
				+	b3AlignedObjectArray<int>	m_next;
			
 
				+//	b3OverlappingPairCallback*	m_ghostPairCallback;
			
 
				+	
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+///b3SortedOverlappingPairCache maintains the objects with overlapping AABB
			
 
				+///Typically managed by the Broadphase, Axis3Sweep or b3SimpleBroadphase
			
 
				+class	b3SortedOverlappingPairCache : public b3OverlappingPairCache
			
 
				+{
			
 
				+	protected:
			
 
				+		//avoid brute-force finding all the time
			
 
				+		b3BroadphasePairArray	m_overlappingPairArray;
			
 
				+
			
 
				+		//during the dispatch, check that user doesn't destroy/create proxy
			
 
				+		bool		m_blockedForChanges;
			
 
				+
			
 
				+		///by default, do the removal during the pair traversal
			
 
				+		bool		m_hasDeferredRemoval;
			
 
				+		
			
 
				+		//if set, use the callback instead of the built in filter in needBroadphaseCollision
			
 
				+		b3OverlapFilterCallback* m_overlapFilterCallback;
			
 
				+
			
 
				+//		b3OverlappingPairCallback*	m_ghostPairCallback;
			
 
				+
			
 
				+	public:
			
 
				+			
			
 
				+		b3SortedOverlappingPairCache();	
			
 
				+		virtual ~b3SortedOverlappingPairCache();
			
 
				+
			
 
				+		virtual void	processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+		void*	removeOverlappingPair(int proxy0,int proxy1,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+		void	cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher);
			
 
				+		
			
 
				+		b3BroadphasePair*	addOverlappingPair(int proxy0,int proxy1);
			
 
				+
			
 
				+		b3BroadphasePair*	findPair(int proxy0,int proxy1);
			
 
				+			
			
 
				+		
			
 
				+		void	cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+		virtual void	removeOverlappingPairsContainingProxy(int proxy,b3Dispatcher* dispatcher);
			
 
				+
			
 
				+
			
 
				+		inline bool needsBroadphaseCollision(int proxy0,int proxy1) const
			
 
				+		{
			
 
				+			if (m_overlapFilterCallback)
			
 
				+				return m_overlapFilterCallback->needBroadphaseCollision(proxy0,proxy1);
			
 
				+
			
 
				+			bool collides = true;//(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
			
 
				+			//collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
			
 
				+			
			
 
				+			return collides;
			
 
				+		}
			
 
				+		
			
 
				+		b3BroadphasePairArray&	getOverlappingPairArray()
			
 
				+		{
			
 
				+			return m_overlappingPairArray;
			
 
				+		}
			
 
				+
			
 
				+		const b3BroadphasePairArray&	getOverlappingPairArray() const
			
 
				+		{
			
 
				+			return m_overlappingPairArray;
			
 
				+		}
			
 
				+
			
 
				+		
			
 
				+
			
 
				+
			
 
				+		b3BroadphasePair*	getOverlappingPairArrayPtr()
			
 
				+		{
			
 
				+			return &m_overlappingPairArray[0];
			
 
				+		}
			
 
				+
			
 
				+		const b3BroadphasePair*	getOverlappingPairArrayPtr() const
			
 
				+		{
			
 
				+			return &m_overlappingPairArray[0];
			
 
				+		}
			
 
				+
			
 
				+		int	getNumOverlappingPairs() const
			
 
				+		{
			
 
				+			return m_overlappingPairArray.size();
			
 
				+		}
			
 
				+		
			
 
				+		b3OverlapFilterCallback* getOverlapFilterCallback()
			
 
				+		{
			
 
				+			return m_overlapFilterCallback;
			
 
				+		}
			
 
				+
			
 
				+		void setOverlapFilterCallback(b3OverlapFilterCallback* callback)
			
 
				+		{
			
 
				+			m_overlapFilterCallback = callback;
			
 
				+		}
			
 
				+
			
 
				+		virtual bool	hasDeferredRemoval()
			
 
				+		{
			
 
				+			return m_hasDeferredRemoval;
			
 
				+		}
			
 
				+
			
 
				+/*		virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)
			
 
				+		{
			
 
				+			m_ghostPairCallback = ghostPairCallback;
			
 
				+		}
			
 
				+		*/
			
 
				+		virtual void	sortOverlappingPairs(b3Dispatcher* dispatcher);
			
 
				+		
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+///b3NullPairCache skips add/removal of overlapping pairs. Userful for benchmarking and unit testing.
			
 
				+class b3NullPairCache : public b3OverlappingPairCache
			
 
				+{
			
 
				+
			
 
				+	b3BroadphasePairArray	m_overlappingPairArray;
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	virtual b3BroadphasePair*	getOverlappingPairArrayPtr()
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+	const b3BroadphasePair*	getOverlappingPairArrayPtr() const
			
 
				+	{
			
 
				+		return &m_overlappingPairArray[0];
			
 
				+	}
			
 
				+	b3BroadphasePairArray&	getOverlappingPairArray()
			
 
				+	{
			
 
				+		return m_overlappingPairArray;
			
 
				+	}
			
 
				+	
			
 
				+	virtual	void	cleanOverlappingPair(b3BroadphasePair& /*pair*/,b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	virtual int getNumOverlappingPairs() const
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual void	cleanProxyFromPairs(int /*proxy*/,b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	virtual	void setOverlapFilterCallback(b3OverlapFilterCallback* /*callback*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual void	processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual b3BroadphasePair* findPair(int /*proxy0*/, int /*proxy1*/)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual bool	hasDeferredRemoval()
			
 
				+	{
			
 
				+		return true;
			
 
				+	}
			
 
				+
			
 
				+//	virtual	void	setInternalGhostPairCallback(b3OverlappingPairCallback* /* ghostPairCallback */)
			
 
				+//	{
			
 
				+//
			
 
				+//	}
			
 
				+
			
 
				+	virtual b3BroadphasePair*	addOverlappingPair(int /*proxy0*/,int /*proxy1*/)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual void*	removeOverlappingPair(int /*proxy0*/,int /*proxy1*/,b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	virtual void	removeOverlappingPairsContainingProxy(int /*proxy0*/,b3Dispatcher* /*dispatcher*/)
			
 
				+	{
			
 
				+	}
			
 
				+	
			
 
				+	virtual void	sortOverlappingPairs(b3Dispatcher* dispatcher)
			
 
				+	{
			
 
				+        (void) dispatcher;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_OVERLAPPING_PAIR_CACHE_H
			
 
				+
			
 
				+
			
--- a/include/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h
+++ b/include/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h
@@ -0,0 +1,59 @@
 
				+
			
 
				+#ifndef B3_AABB_H
			
 
				+#define B3_AABB_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Mat3x3.h"
			
 
				+
			
 
				+typedef struct b3Aabb b3Aabb_t;
			
 
				+
			
 
				+struct b3Aabb
			
 
				+{
			
 
				+	union
			
 
				+	{
			
 
				+		float m_min[4];
			
 
				+		b3Float4 m_minVec;
			
 
				+		int m_minIndices[4];
			
 
				+	};
			
 
				+	union
			
 
				+	{
			
 
				+		float	m_max[4];
			
 
				+		b3Float4 m_maxVec;
			
 
				+		int m_signedMaxIndices[4];
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,
			
 
				+						b3Float4ConstArg pos,
			
 
				+						b3QuatConstArg orn,
			
 
				+						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)
			
 
				+{
			
 
				+		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);
			
 
				+		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);
			
 
				+		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);
			
 
				+		b3Mat3x3 m;
			
 
				+		m = b3QuatGetRotationMatrix(orn);
			
 
				+		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);
			
 
				+		b3Float4 center = b3TransformPoint(localCenter,pos,orn);
			
 
				+		
			
 
				+		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),
			
 
				+										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),
			
 
				+										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),
			
 
				+										 0.f);
			
 
				+		*aabbMinOut = center-extent;
			
 
				+		*aabbMaxOut = center+extent;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between two aabbs
			
 
				+inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,
			
 
				+								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;
			
 
				+	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;
			
 
				+	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+#endif //B3_AABB_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/b3Config.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/b3Config.h
@@ -0,0 +1,41 @@
 
				+#ifndef B3_CONFIG_H
			
 
				+#define B3_CONFIG_H
			
 
				+
			
 
				+struct	b3Config
			
 
				+{
			
 
				+	int	m_maxConvexBodies;
			
 
				+	int	m_maxConvexShapes;
			
 
				+	int	m_maxBroadphasePairs;
			
 
				+	int m_maxContactCapacity;
			
 
				+	int m_compoundPairCapacity;
			
 
				+
			
 
				+	int m_maxVerticesPerFace;
			
 
				+	int m_maxFacesPerShape;
			
 
				+	int	m_maxConvexVertices;
			
 
				+	int m_maxConvexIndices;
			
 
				+	int m_maxConvexUniqueEdges;
			
 
				+	
			
 
				+	int	m_maxCompoundChildShapes;
			
 
				+	
			
 
				+	int m_maxTriConvexPairCapacity;
			
 
				+
			
 
				+	b3Config()
			
 
				+		:m_maxConvexBodies(32*1024),
			
 
				+		m_maxVerticesPerFace(64),
			
 
				+		m_maxFacesPerShape(12),
			
 
				+		m_maxConvexVertices(8192),
			
 
				+		m_maxConvexIndices(81920),
			
 
				+		m_maxConvexUniqueEdges(8192),
			
 
				+		m_maxCompoundChildShapes(8192),
			
 
				+		m_maxTriConvexPairCapacity(256*1024)
			
 
				+	{
			
 
				+		m_maxConvexShapes = m_maxConvexBodies;
			
 
				+		m_maxBroadphasePairs = 16*m_maxConvexBodies;
			
 
				+		m_maxContactCapacity = m_maxBroadphasePairs;
			
 
				+		m_compoundPairCapacity = 1024*1024;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif//B3_CONFIG_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h
@@ -0,0 +1,46 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONTACT4_H
			
 
				+#define B3_CONTACT4_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3Contact4 : public b3Contact4Data
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	int getBodyA()const {return abs(m_bodyAPtrAndSignBit);}
			
 
				+	int getBodyB()const {return abs(m_bodyBPtrAndSignBit);}
			
 
				+	bool isBodyAFixed()const { return m_bodyAPtrAndSignBit<0;}
			
 
				+	bool isBodyBFixed()const { return m_bodyBPtrAndSignBit<0;}
			
 
				+	//	todo. make it safer
			
 
				+	int& getBatchIdx() { return m_batchIdx; }
			
 
				+	const int& getBatchIdx() const { return m_batchIdx; }
			
 
				+	float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp/(float)0xffff); }
			
 
				+	void setRestituitionCoeff( float c ) { b3Assert( c >= 0.f && c <= 1.f ); m_restituitionCoeffCmp = (unsigned short)(c*0xffff); }
			
 
				+	float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp/(float)0xffff); }
			
 
				+	void setFrictionCoeff( float c ) { b3Assert( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (unsigned short)(c*0xffff); }
			
 
				+
			
 
				+	//float& getNPoints() { return m_worldNormal[3]; }
			
 
				+	int getNPoints() const { return (int) m_worldNormalOnB.w; }
			
 
				+
			
 
				+	float getPenetration(int idx) const { return m_worldPosB[idx].w; }
			
 
				+
			
 
				+	bool isInvalid() const { return (getBodyA()==0 || getBodyB()==0); }
			
 
				+};
			
 
				+
			
 
				+#endif //B3_CONTACT4_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h
@@ -0,0 +1,62 @@
 
				+
			
 
				+/*
			
 
				+Copyright (c) 2012 Advanced Micro Devices, Inc.  
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Originally written by Erwin Coumans
			
 
				+
			
 
				+#ifndef _BT_CONVEX_UTILITY_H
			
 
				+#define _BT_CONVEX_UTILITY_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+struct b3MyFace
			
 
				+{
			
 
				+	b3AlignedObjectArray<int>	m_indices;
			
 
				+	b3Scalar	m_plane[4];
			
 
				+};
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3ConvexUtility
			
 
				+{
			
 
				+	public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3Vector3		m_localCenter;
			
 
				+	b3Vector3		m_extents;
			
 
				+	b3Vector3		mC;
			
 
				+	b3Vector3		mE;
			
 
				+	b3Scalar		m_radius;
			
 
				+	
			
 
				+	b3AlignedObjectArray<b3Vector3>	m_vertices;
			
 
				+	b3AlignedObjectArray<b3MyFace>	m_faces;
			
 
				+	b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
			
 
				+
			
 
				+		
			
 
				+	b3ConvexUtility()
			
 
				+	{
			
 
				+	}
			
 
				+	virtual ~b3ConvexUtility();
			
 
				+
			
 
				+	bool	initializePolyhedralFeatures(const b3Vector3* orgVertices, int numVertices, bool mergeCoplanarTriangles=true);
			
 
				+		
			
 
				+	void	initialize();
			
 
				+	bool testContainment() const;
			
 
				+
			
 
				+
			
 
				+
			
 
				+};
			
 
				+#endif
			
 
				+	
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h
@@ -0,0 +1,105 @@
 
				+#ifndef B3_CPU_NARROWPHASE_H
			
 
				+#define B3_CPU_NARROWPHASE_H
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+
			
 
				+class b3CpuNarrowPhase
			
 
				+{
			
 
				+protected:
			
 
				+
			
 
				+	struct b3CpuNarrowPhaseInternalData*	m_data;
			
 
				+	int m_acceleratedCompanionShapeIndex;
			
 
				+	int m_planeBodyIndex;
			
 
				+	int	m_static0Index;
			
 
				+
			
 
				+	int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr,b3Collidable& col);
			
 
				+	int registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling);
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+	b3CpuNarrowPhase(const struct b3Config& config);
			
 
				+
			
 
				+	virtual ~b3CpuNarrowPhase(void);
			
 
				+
			
 
				+	int		registerSphereShape(float radius);
			
 
				+	int		registerPlaneShape(const b3Vector3& planeNormal, float planeConstant);
			
 
				+
			
 
				+	int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes);
			
 
				+	int registerFace(const b3Vector3& faceNormal, float faceConstant);
			
 
				+	
			
 
				+	int	registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices,const float* scaling);
			
 
				+	
			
 
				+	//do they need to be merged?
			
 
				+	
			
 
				+	int	registerConvexHullShape(b3ConvexUtility* utilPtr);
			
 
				+	int	registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
			
 
				+
			
 
				+	//int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu);
			
 
				+	void setObjectTransform(const float* position, const float* orientation , int bodyIndex);
			
 
				+
			
 
				+	void	writeAllBodiesToGpu();
			
 
				+	void  reset();
			
 
				+	void	readbackAllBodiesToCpu();
			
 
				+	bool	getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const;
			
 
				+
			
 
				+	void setObjectTransformCpu(float* position, float* orientation , int bodyIndex);
			
 
				+	void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex);
			
 
				+
			
 
				+	
			
 
				+	//virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects);
			
 
				+	virtual void computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies);
			
 
				+
			
 
				+
			
 
				+	
			
 
				+	const struct b3RigidBodyData* getBodiesCpu() const;
			
 
				+	//struct b3RigidBodyData* getBodiesCpu();
			
 
				+
			
 
				+	int	getNumBodiesGpu() const;
			
 
				+
			
 
				+	
			
 
				+	int	getNumBodyInertiasGpu() const;
			
 
				+
			
 
				+	
			
 
				+	const struct b3Collidable* getCollidablesCpu() const;
			
 
				+	int		getNumCollidablesGpu() const;
			
 
				+
			
 
				+
			
 
				+	/*const struct b3Contact4* getContactsCPU() const;
			
 
				+
			
 
				+	
			
 
				+	int	getNumContactsGpu() const;
			
 
				+	*/
			
 
				+
			
 
				+	const b3AlignedObjectArray<b3Contact4Data>& getContacts() const;
			
 
				+	
			
 
				+	
			
 
				+	int getNumRigidBodies() const;
			
 
				+
			
 
				+	int allocateCollidable();
			
 
				+
			
 
				+	int getStatic0Index() const
			
 
				+	{
			
 
				+		return m_static0Index;
			
 
				+	}
			
 
				+	b3Collidable& getCollidableCpu(int collidableIndex);
			
 
				+	const b3Collidable& getCollidableCpu(int collidableIndex) const;
			
 
				+
			
 
				+	const b3CpuNarrowPhaseInternalData*	getInternalData() const
			
 
				+	{
			
 
				+			return m_data;
			
 
				+	}
			
 
				+
			
 
				+	const struct b3Aabb& getLocalSpaceAabb(int collidableIndex) const;
			
 
				+};
			
 
				+
			
 
				+#endif //B3_CPU_NARROWPHASE_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h
@@ -0,0 +1,24 @@
 
				+
			
 
				+#ifndef B3_RAYCAST_INFO_H
			
 
				+#define B3_RAYCAST_INFO_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3RayInfo
			
 
				+{
			
 
				+	b3Vector3 m_from;
			
 
				+	b3Vector3 m_to;
			
 
				+};
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3RayHit
			
 
				+{
			
 
				+		b3Scalar	m_hitFraction;
			
 
				+		int	m_hitBody;
			
 
				+		int	m_hitResult1;
			
 
				+		int	m_hitResult2;
			
 
				+		b3Vector3 m_hitPoint;
			
 
				+		b3Vector3 m_hitNormal;
			
 
				+};
			
 
				+
			
 
				+#endif //B3_RAYCAST_INFO_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h
@@ -0,0 +1,30 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_RIGID_BODY_CL
			
 
				+#define B3_RIGID_BODY_CL
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+
			
 
				+inline float	b3GetInvMass(const b3RigidBodyData& body)
			
 
				+{
			
 
				+		return body.m_invMass;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif//B3_RIGID_BODY_CL
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h
@@ -0,0 +1,20 @@
 
				+
			
 
				+#ifndef B3_BVH_SUBTREE_INFO_DATA_H
			
 
				+#define B3_BVH_SUBTREE_INFO_DATA_H
			
 
				+
			
 
				+typedef struct b3BvhSubtreeInfoData b3BvhSubtreeInfoData_t;
			
 
				+
			
 
				+struct b3BvhSubtreeInfoData
			
 
				+{
			
 
				+	//12 bytes
			
 
				+	unsigned short int	m_quantizedAabbMin[3];
			
 
				+	unsigned short int	m_quantizedAabbMax[3];
			
 
				+	//4 bytes, points to the root of the subtree
			
 
				+	int			m_rootNodeIndex;
			
 
				+	//4 bytes
			
 
				+	int			m_subtreeSize;
			
 
				+	int			m_padding[3];
			
 
				+};
			
 
				+
			
 
				+#endif //B3_BVH_SUBTREE_INFO_DATA_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h
@@ -0,0 +1,126 @@
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+// work-in-progress
			
 
				+void   b3BvhTraversal( __global const b3Int4* pairs, 
			
 
				+									__global const b3RigidBodyData* rigidBodies, 
			
 
				+									__global const b3Collidable* collidables,
			
 
				+									__global b3Aabb* aabbs,
			
 
				+									__global b3Int4* concavePairsOut,
			
 
				+									__global volatile int* numConcavePairsOut,
			
 
				+									__global const b3BvhSubtreeInfo* subtreeHeadersRoot,
			
 
				+									__global const b3QuantizedBvhNode* quantizedNodesRoot,
			
 
				+									__global const b3BvhInfo* bvhInfos,
			
 
				+									int numPairs,
			
 
				+									int maxNumConcavePairsCapacity,
			
 
				+									int id)
			
 
				+{
			
 
				+	
			
 
				+	int bodyIndexA = pairs[id].x;
			
 
				+	int bodyIndexB = pairs[id].y;
			
 
				+	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
			
 
				+	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
			
 
				+	
			
 
				+	//once the broadphase avoids static-static pairs, we can remove this test
			
 
				+	if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
			
 
				+	{
			
 
				+		return;
			
 
				+	}
			
 
				+		
			
 
				+	if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)
			
 
				+		return;
			
 
				+
			
 
				+	int shapeTypeB = collidables[collidableIndexB].m_shapeType;
			
 
				+		
			
 
				+	if (shapeTypeB!=SHAPE_CONVEX_HULL &&
			
 
				+		shapeTypeB!=SHAPE_SPHERE	&&
			
 
				+		shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS
			
 
				+		)
			
 
				+		return;
			
 
				+
			
 
				+	b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];
			
 
				+
			
 
				+	b3Float4	bvhAabbMin = bvhInfo.m_aabbMin;
			
 
				+	b3Float4	bvhAabbMax = bvhInfo.m_aabbMax;
			
 
				+	b3Float4	bvhQuantization = bvhInfo.m_quantization;
			
 
				+	int numSubtreeHeaders = bvhInfo.m_numSubTrees;
			
 
				+	__global const b3BvhSubtreeInfoData* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];
			
 
				+	__global const b3QuantizedBvhNodeData* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];
			
 
				+	
			
 
				+
			
 
				+	unsigned short int quantizedQueryAabbMin[3];
			
 
				+	unsigned short int quantizedQueryAabbMax[3];
			
 
				+	b3QuantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_minVec,false,bvhAabbMin, bvhAabbMax,bvhQuantization);
			
 
				+	b3QuantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_maxVec,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);
			
 
				+	
			
 
				+	for (int i=0;i<numSubtreeHeaders;i++)
			
 
				+	{
			
 
				+		b3BvhSubtreeInfoData subtree = subtreeHeaders[i];
			
 
				+				
			
 
				+		int overlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
			
 
				+		if (overlap != 0)
			
 
				+		{
			
 
				+			int startNodeIndex = subtree.m_rootNodeIndex;
			
 
				+			int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;
			
 
				+			int curIndex = startNodeIndex;
			
 
				+			int escapeIndex;
			
 
				+			int isLeafNode;
			
 
				+			int aabbOverlap;
			
 
				+			while (curIndex < endNodeIndex)
			
 
				+			{
			
 
				+				b3QuantizedBvhNodeData rootNode = quantizedNodes[curIndex];
			
 
				+				aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);
			
 
				+				isLeafNode = b3IsLeaf(&rootNode);
			
 
				+				if (aabbOverlap)
			
 
				+				{
			
 
				+					if (isLeafNode)
			
 
				+					{
			
 
				+						int triangleIndex = b3GetTriangleIndex(&rootNode);
			
 
				+						if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+						{
			
 
				+								int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
			
 
				+								int pairIdx = b3AtomicAdd (numConcavePairsOut,numChildrenB);
			
 
				+								for (int b=0;b<numChildrenB;b++)
			
 
				+								{
			
 
				+									if ((pairIdx+b)<maxNumConcavePairsCapacity)
			
 
				+									{
			
 
				+										int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
			
 
				+										b3Int4 newPair = b3MakeInt4(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);
			
 
				+										concavePairsOut[pairIdx+b] = newPair;
			
 
				+									}
			
 
				+								}
			
 
				+						} else
			
 
				+						{
			
 
				+							int pairIdx = b3AtomicInc(numConcavePairsOut);
			
 
				+							if (pairIdx<maxNumConcavePairsCapacity)
			
 
				+							{
			
 
				+								b3Int4 newPair = b3MakeInt4(bodyIndexA,bodyIndexB,triangleIndex,0);
			
 
				+								concavePairsOut[pairIdx] = newPair;
			
 
				+							}
			
 
				+						}
			
 
				+					} 
			
 
				+					curIndex++;
			
 
				+				} else
			
 
				+				{
			
 
				+					if (isLeafNode)
			
 
				+					{
			
 
				+						curIndex++;
			
 
				+					} else
			
 
				+					{
			
 
				+						escapeIndex = b3GetEscapeIndex(&rootNode);
			
 
				+						curIndex += escapeIndex;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+}
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h
@@ -0,0 +1,188 @@
 
				+#ifndef B3_CLIP_FACES_H
			
 
				+#define B3_CLIP_FACES_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+
			
 
				+
			
 
				+inline b3Float4 b3Lerp3(b3Float4ConstArg a,b3Float4ConstArg b, float  t)
			
 
				+{
			
 
				+	return b3MakeFloat4(	a.x + (b.x - a.x) * t,
			
 
				+						a.y + (b.y - a.y) * t,
			
 
				+						a.z + (b.z - a.z) * t,
			
 
				+						0.f);
			
 
				+}
			
 
				+
			
 
				+// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
			
 
				+int clipFaceGlobal(__global const b3Float4* pVtxIn, int numVertsIn, b3Float4ConstArg planeNormalWS,float planeEqWS, __global b3Float4* ppVtxOut)
			
 
				+{
			
 
				+	
			
 
				+	int ve;
			
 
				+	float ds, de;
			
 
				+	int numVertsOut = 0;
			
 
				+    //double-check next test
			
 
				+    //	if (numVertsIn < 2)
			
 
				+    //		return 0;
			
 
				+    
			
 
				+	b3Float4 firstVertex=pVtxIn[numVertsIn-1];
			
 
				+	b3Float4 endVertex = pVtxIn[0];
			
 
				+	
			
 
				+	ds = b3Dot(planeNormalWS,firstVertex)+planeEqWS;
			
 
				+    
			
 
				+	for (ve = 0; ve < numVertsIn; ve++)
			
 
				+	{
			
 
				+		endVertex=pVtxIn[ve];
			
 
				+		de = b3Dot(planeNormalWS,endVertex)+planeEqWS;
			
 
				+		if (ds<0)
			
 
				+		{
			
 
				+			if (de<0)
			
 
				+			{
			
 
				+				// Start < 0, end < 0, so output endVertex
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// Start < 0, end >= 0, so output intersection
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (de<0)
			
 
				+			{
			
 
				+				// Start >= 0, end < 0 so output intersection and end
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+		}
			
 
				+		firstVertex = endVertex;
			
 
				+		ds = de;
			
 
				+	}
			
 
				+	return numVertsOut;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+__kernel void   clipFacesAndFindContactsKernel(    __global const b3Float4* separatingNormals,
			
 
				+                                                   __global const int* hasSeparatingAxis,
			
 
				+                                                   __global b3Int4* clippingFacesOut,
			
 
				+                                                   __global b3Float4* worldVertsA1,
			
 
				+                                                   __global b3Float4* worldNormalsA1,
			
 
				+                                                   __global b3Float4* worldVertsB1,
			
 
				+                                                   __global b3Float4* worldVertsB2,
			
 
				+                                                    int vertexFaceCapacity,
			
 
				+															int pairIndex
			
 
				+                                                   )
			
 
				+{
			
 
				+//    int i = get_global_id(0);
			
 
				+	//int pairIndex = i;
			
 
				+	int i = pairIndex;
			
 
				+    
			
 
				+	float minDist = -1e30f;
			
 
				+	float maxDist = 0.02f;
			
 
				+    
			
 
				+//	if (i<numPairs)
			
 
				+	{
			
 
				+        
			
 
				+		if (hasSeparatingAxis[i])
			
 
				+		{
			
 
				+            
			
 
				+//			int bodyIndexA = pairs[i].x;
			
 
				+	//		int bodyIndexB = pairs[i].y;
			
 
				+		    
			
 
				+            int numLocalContactsOut = 0;
			
 
				+
			
 
				+            int capacityWorldVertsB2 = vertexFaceCapacity;
			
 
				+            
			
 
				+            __global b3Float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];
			
 
				+            __global b3Float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];
			
 
				+            
			
 
				+
			
 
				+            {
			
 
				+                __global b3Int4* clippingFaces = clippingFacesOut;
			
 
				+            
			
 
				+                
			
 
				+                int closestFaceA = clippingFaces[pairIndex].x;
			
 
				+                int closestFaceB = clippingFaces[pairIndex].y;
			
 
				+                int numVertsInA = clippingFaces[pairIndex].z;
			
 
				+                int numVertsInB = clippingFaces[pairIndex].w;
			
 
				+                
			
 
				+                int numVertsOut = 0;
			
 
				+                
			
 
				+                if (closestFaceA>=0)
			
 
				+                {
			
 
				+                    
			
 
				+                    
			
 
				+                    
			
 
				+                    // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
			
 
				+                    
			
 
				+                    for(int e0=0;e0<numVertsInA;e0++)
			
 
				+                    {
			
 
				+                        const b3Float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];
			
 
				+                        const b3Float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];
			
 
				+                        const b3Float4 WorldEdge0 = aw - bw;
			
 
				+                        b3Float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
			
 
				+                        b3Float4 planeNormalWS1 = -b3Cross(WorldEdge0,worldPlaneAnormal1);
			
 
				+                        b3Float4 worldA1 = aw;
			
 
				+                        float planeEqWS1 = -b3Dot(worldA1,planeNormalWS1);
			
 
				+                        b3Float4 planeNormalWS = planeNormalWS1;
			
 
				+                        float planeEqWS=planeEqWS1;
			
 
				+                        numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);
			
 
				+                        __global b3Float4* tmp = pVtxOut;
			
 
				+                        pVtxOut = pVtxIn;
			
 
				+                        pVtxIn = tmp;
			
 
				+                        numVertsInB = numVertsOut;
			
 
				+                        numVertsOut = 0;
			
 
				+                    }
			
 
				+                    
			
 
				+                    b3Float4 planeNormalWS = worldNormalsA1[pairIndex];
			
 
				+                    float planeEqWS=-b3Dot(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
			
 
				+                    
			
 
				+                    for (int i=0;i<numVertsInB;i++)
			
 
				+                    {
			
 
				+                        float depth = b3Dot(planeNormalWS,pVtxIn[i])+planeEqWS;
			
 
				+                        if (depth <=minDist)
			
 
				+                        {
			
 
				+                            depth = minDist;
			
 
				+                        }
			
 
				+/*
			
 
				+						static float maxDepth = 0.f;
			
 
				+						if (depth < maxDepth)
			
 
				+						{
			
 
				+							maxDepth = depth;
			
 
				+							if (maxDepth < -10)
			
 
				+							{
			
 
				+								printf("error at framecount %d?\n",myframecount);
			
 
				+							}
			
 
				+							printf("maxDepth = %f\n", maxDepth);
			
 
				+
			
 
				+						}
			
 
				+*/
			
 
				+                        if (depth <=maxDist)
			
 
				+                        {
			
 
				+                            b3Float4 pointInWorld = pVtxIn[i];
			
 
				+                            pVtxOut[numLocalContactsOut++] = b3MakeFloat4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
			
 
				+                        }
			
 
				+                    }
			
 
				+                    
			
 
				+                }
			
 
				+                clippingFaces[pairIndex].w =numLocalContactsOut;
			
 
				+                
			
 
				+
			
 
				+            }
			
 
				+            
			
 
				+            for (int i=0;i<numLocalContactsOut;i++)
			
 
				+                pVtxIn[i] = pVtxOut[i];
			
 
				+                
			
 
				+		}//		if (hasSeparatingAxis[i])
			
 
				+	}//	if (i<numPairs)
			
 
				+    
			
 
				+}
			
 
				+
			
 
				+#endif //B3_CLIP_FACES_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h
@@ -0,0 +1,60 @@
 
				+
			
 
				+#ifndef B3_COLLIDABLE_H
			
 
				+#define B3_COLLIDABLE_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+
			
 
				+enum b3ShapeTypes
			
 
				+{
			
 
				+	SHAPE_HEIGHT_FIELD=1,
			
 
				+
			
 
				+	SHAPE_CONVEX_HULL=3,
			
 
				+	SHAPE_PLANE=4,
			
 
				+	SHAPE_CONCAVE_TRIMESH=5,
			
 
				+	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,
			
 
				+	SHAPE_SPHERE=7,
			
 
				+	MAX_NUM_SHAPE_TYPES,
			
 
				+};
			
 
				+
			
 
				+typedef struct b3Collidable b3Collidable_t;
			
 
				+
			
 
				+
			
 
				+struct b3Collidable
			
 
				+{
			
 
				+	union {
			
 
				+		int m_numChildShapes;
			
 
				+		int m_bvhIndex;
			
 
				+	};
			
 
				+	union
			
 
				+	{
			
 
				+		float m_radius;
			
 
				+		int	m_compoundBvhIndex;
			
 
				+	};
			
 
				+
			
 
				+	int m_shapeType;
			
 
				+	int m_shapeIndex;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+typedef struct b3GpuChildShape b3GpuChildShape_t;
			
 
				+struct b3GpuChildShape
			
 
				+{
			
 
				+	b3Float4	m_childPosition;
			
 
				+	b3Quat		m_childOrientation;
			
 
				+	int m_shapeIndex;
			
 
				+	int m_unused0;
			
 
				+	int m_unused1;
			
 
				+	int m_unused2;
			
 
				+};
			
 
				+
			
 
				+struct b3CompoundOverlappingPair
			
 
				+{
			
 
				+	int m_bodyIndexA;
			
 
				+	int m_bodyIndexB;
			
 
				+//	int	m_pairType;
			
 
				+	int m_childShapeIndexA;
			
 
				+	int m_childShapeIndexB;
			
 
				+};
			
 
				+#endif //B3_COLLIDABLE_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h
@@ -0,0 +1,40 @@
 
				+#ifndef B3_CONTACT4DATA_H
			
 
				+#define B3_CONTACT4DATA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+typedef  struct b3Contact4Data b3Contact4Data_t;
			
 
				+
			
 
				+struct b3Contact4Data
			
 
				+{
			
 
				+	b3Float4	m_worldPosB[4];
			
 
				+//	b3Float4	m_localPosA[4];
			
 
				+//	b3Float4	m_localPosB[4];
			
 
				+	b3Float4	m_worldNormalOnB;	//	w: m_nPoints
			
 
				+	unsigned short  m_restituitionCoeffCmp;
			
 
				+	unsigned short  m_frictionCoeffCmp;
			
 
				+	int m_batchIdx;
			
 
				+	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr
			
 
				+	int m_bodyBPtrAndSignBit;
			
 
				+
			
 
				+	int	m_childIndexA;
			
 
				+	int	m_childIndexB;
			
 
				+	int m_unused1;
			
 
				+	int m_unused2;
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)
			
 
				+{
			
 
				+	return (int)contact->m_worldNormalOnB.w;
			
 
				+};
			
 
				+
			
 
				+inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)
			
 
				+{
			
 
				+	contact->m_worldNormalOnB.w = (float)numPoints;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_CONTACT4DATA_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h
@@ -0,0 +1,523 @@
 
				+
			
 
				+#ifndef B3_CONTACT_CONVEX_CONVEX_SAT_H
			
 
				+#define B3_CONTACT_CONVEX_CONVEX_SAT_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h"
			
 
				+
			
 
				+#define B3_MAX_VERTS 1024
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline b3Float4 b3Lerp3(const b3Float4& a,const b3Float4& b, float  t)
			
 
				+{
			
 
				+	return b3MakeVector3(	a.x + (b.x - a.x) * t,
			
 
				+						a.y + (b.y - a.y) * t,
			
 
				+						a.z + (b.z - a.z) * t,
			
 
				+						0.f);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
			
 
				+inline int b3ClipFace(const b3Float4* pVtxIn, int numVertsIn, b3Float4& planeNormalWS,float planeEqWS, b3Float4* ppVtxOut)
			
 
				+{
			
 
				+	
			
 
				+	int ve;
			
 
				+	float ds, de;
			
 
				+	int numVertsOut = 0;
			
 
				+	if (numVertsIn < 2)
			
 
				+		return 0;
			
 
				+
			
 
				+	b3Float4 firstVertex=pVtxIn[numVertsIn-1];
			
 
				+	b3Float4 endVertex = pVtxIn[0];
			
 
				+	
			
 
				+	ds = b3Dot3F4(planeNormalWS,firstVertex)+planeEqWS;
			
 
				+
			
 
				+	for (ve = 0; ve < numVertsIn; ve++)
			
 
				+	{
			
 
				+		endVertex=pVtxIn[ve];
			
 
				+
			
 
				+		de = b3Dot3F4(planeNormalWS,endVertex)+planeEqWS;
			
 
				+
			
 
				+		if (ds<0)
			
 
				+		{
			
 
				+			if (de<0)
			
 
				+			{
			
 
				+				// Start < 0, end < 0, so output endVertex
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// Start < 0, end >= 0, so output intersection
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if (de<0)
			
 
				+			{
			
 
				+				// Start >= 0, end < 0 so output intersection and end
			
 
				+				ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
			
 
				+				ppVtxOut[numVertsOut++] = endVertex;
			
 
				+			}
			
 
				+		}
			
 
				+		firstVertex = endVertex;
			
 
				+		ds = de;
			
 
				+	}
			
 
				+	return numVertsOut;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int b3ClipFaceAgainstHull(const b3Float4& separatingNormal, const b3ConvexPolyhedronData* hullA,  
			
 
				+	const b3Float4& posA, const b3Quaternion& ornA, b3Float4* worldVertsB1, int numWorldVertsB1,
			
 
				+	b3Float4* worldVertsB2, int capacityWorldVertsB2,
			
 
				+	const float minDist, float maxDist,
			
 
				+	const b3AlignedObjectArray<b3Float4>& verticesA,	const b3AlignedObjectArray<b3GpuFace>& facesA,	const b3AlignedObjectArray<int>& indicesA,
			
 
				+	//const b3Float4* verticesB,	const b3GpuFace* facesB,	const int* indicesB,
			
 
				+	b3Float4* contactsOut,
			
 
				+	int contactCapacity)
			
 
				+{
			
 
				+	int numContactsOut = 0;
			
 
				+
			
 
				+	b3Float4* pVtxIn = worldVertsB1;
			
 
				+	b3Float4* pVtxOut = worldVertsB2;
			
 
				+	
			
 
				+	int numVertsIn = numWorldVertsB1;
			
 
				+	int numVertsOut = 0;
			
 
				+
			
 
				+	int closestFaceA=-1;
			
 
				+	{
			
 
				+		float dmin = FLT_MAX;
			
 
				+		for(int face=0;face<hullA->m_numFaces;face++)
			
 
				+		{
			
 
				+			const b3Float4 Normal = b3MakeVector3(
			
 
				+				facesA[hullA->m_faceOffset+face].m_plane.x, 
			
 
				+				facesA[hullA->m_faceOffset+face].m_plane.y, 
			
 
				+				facesA[hullA->m_faceOffset+face].m_plane.z,0.f);
			
 
				+			const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal);
			
 
				+		
			
 
				+			float d = b3Dot3F4(faceANormalWS,separatingNormal);
			
 
				+			if (d < dmin)
			
 
				+			{
			
 
				+				dmin = d;
			
 
				+				closestFaceA = face;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if (closestFaceA<0)
			
 
				+		return numContactsOut;
			
 
				+
			
 
				+	b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];
			
 
				+
			
 
				+	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
			
 
				+	int numContacts = numWorldVertsB1;
			
 
				+	int numVerticesA = polyA.m_numIndices;
			
 
				+	for(int e0=0;e0<numVerticesA;e0++)
			
 
				+	{
			
 
				+		const b3Float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];
			
 
				+		const b3Float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];
			
 
				+		const b3Float4 edge0 = a - b;
			
 
				+		const b3Float4 WorldEdge0 = b3QuatRotate(ornA,edge0);
			
 
				+		b3Float4 planeNormalA = b3MakeFloat4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
			
 
				+		b3Float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA);
			
 
				+
			
 
				+		b3Float4 planeNormalWS1 = -b3Cross3(WorldEdge0,worldPlaneAnormal1);
			
 
				+		b3Float4 worldA1 = b3TransformPoint(a,posA,ornA);
			
 
				+		float planeEqWS1 = -b3Dot3F4(worldA1,planeNormalWS1);
			
 
				+		
			
 
				+		b3Float4 planeNormalWS = planeNormalWS1;
			
 
				+		float planeEqWS=planeEqWS1;
			
 
				+		
			
 
				+		//clip face
			
 
				+		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
			
 
				+		numVertsOut = b3ClipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);
			
 
				+
			
 
				+		//btSwap(pVtxIn,pVtxOut);
			
 
				+		b3Float4* tmp = pVtxOut;
			
 
				+		pVtxOut = pVtxIn;
			
 
				+		pVtxIn = tmp;
			
 
				+		numVertsIn = numVertsOut;
			
 
				+		numVertsOut = 0;
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	// only keep points that are behind the witness face
			
 
				+	{
			
 
				+		b3Float4 localPlaneNormal  = b3MakeFloat4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
			
 
				+		float localPlaneEq = polyA.m_plane.w;
			
 
				+		b3Float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal);
			
 
				+		float planeEqWS=localPlaneEq-b3Dot3F4(planeNormalWS,posA);
			
 
				+		for (int i=0;i<numVertsIn;i++)
			
 
				+		{
			
 
				+			float depth = b3Dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
			
 
				+			if (depth <=minDist)
			
 
				+			{
			
 
				+				depth = minDist;
			
 
				+			}
			
 
				+			if (numContactsOut<contactCapacity)
			
 
				+			{
			
 
				+				if (depth <=maxDist)
			
 
				+				{
			
 
				+					b3Float4 pointInWorld = pVtxIn[i];
			
 
				+					//resultOut.addContactPoint(separatingNormal,point,depth);
			
 
				+					contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
			
 
				+					//printf("depth=%f\n",depth);
			
 
				+				}
			
 
				+			} else
			
 
				+			{
			
 
				+				b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut,contactCapacity);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return numContactsOut;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline int	b3ClipHullAgainstHull(const b3Float4& separatingNormal, 
			
 
				+	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
			
 
				+	const b3Float4& posA, const b3Quaternion& ornA,const b3Float4& posB, const b3Quaternion& ornB, 
			
 
				+	b3Float4* worldVertsB1, b3Float4* worldVertsB2, int capacityWorldVerts,
			
 
				+	const float minDist, float maxDist,
			
 
				+	const b3AlignedObjectArray<b3Float4>& verticesA,	const b3AlignedObjectArray<b3GpuFace>& facesA,	const b3AlignedObjectArray<int>& indicesA,
			
 
				+	const b3AlignedObjectArray<b3Float4>& verticesB,	const b3AlignedObjectArray<b3GpuFace>& facesB,	const b3AlignedObjectArray<int>& indicesB,
			
 
				+
			
 
				+	b3Float4*	contactsOut,
			
 
				+	int contactCapacity)
			
 
				+{
			
 
				+	int numContactsOut = 0;
			
 
				+	int numWorldVertsB1= 0;
			
 
				+	
			
 
				+	B3_PROFILE("clipHullAgainstHull");
			
 
				+
			
 
				+	float curMaxDist=maxDist;
			
 
				+	int closestFaceB=-1;
			
 
				+	float dmax = -FLT_MAX;
			
 
				+
			
 
				+	{
			
 
				+		//B3_PROFILE("closestFaceB");
			
 
				+		if (hullB.m_numFaces!=1)
			
 
				+		{
			
 
				+			//printf("wtf\n");
			
 
				+		}
			
 
				+		static bool once = true;
			
 
				+		//printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
			
 
				+		
			
 
				+		for(int face=0;face<hullB.m_numFaces;face++)
			
 
				+		{
			
 
				+#ifdef BT_DEBUG_SAT_FACE
			
 
				+			if (once)
			
 
				+				printf("face %d\n",face);
			
 
				+			const b3GpuFace* faceB = &facesB[hullB.m_faceOffset+face];
			
 
				+			if (once)
			
 
				+			{
			
 
				+				for (int i=0;i<faceB->m_numIndices;i++)
			
 
				+				{
			
 
				+					b3Float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]];
			
 
				+					printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z);
			
 
				+				}
			
 
				+			}
			
 
				+#endif //BT_DEBUG_SAT_FACE
			
 
				+			//if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
			
 
				+			{
			
 
				+				const b3Float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, 
			
 
				+					facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f);
			
 
				+				const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
			
 
				+#ifdef BT_DEBUG_SAT_FACE
			
 
				+				if (once)
			
 
				+					printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z);
			
 
				+#endif
			
 
				+				float d = b3Dot3F4(WorldNormal,separatingNormal);
			
 
				+				if (d > dmax)
			
 
				+				{
			
 
				+					dmax = d;
			
 
				+					closestFaceB = face;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		once = false;
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	b3Assert(closestFaceB>=0);
			
 
				+	{
			
 
				+		//B3_PROFILE("worldVertsB1");
			
 
				+		const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB];
			
 
				+		const int numVertices = polyB.m_numIndices;
			
 
				+		for(int e0=0;e0<numVertices;e0++)
			
 
				+		{
			
 
				+			const b3Float4& b = verticesB[hullB.m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
			
 
				+			worldVertsB1[numWorldVertsB1++] = b3TransformPoint(b,posB,ornB);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (closestFaceB>=0)
			
 
				+	{
			
 
				+		//B3_PROFILE("clipFaceAgainstHull");
			
 
				+		numContactsOut = b3ClipFaceAgainstHull((b3Float4&)separatingNormal, &hullA, 
			
 
				+				posA,ornA,
			
 
				+				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,
			
 
				+				verticesA,				facesA,				indicesA,
			
 
				+				contactsOut,contactCapacity);
			
 
				+	}
			
 
				+
			
 
				+	return numContactsOut;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline int b3ClipHullHullSingle(
			
 
				+			int bodyIndexA, int bodyIndexB,
			
 
				+										 const b3Float4& posA,
			
 
				+										 const b3Quaternion& ornA,
			
 
				+										 const b3Float4& posB,
			
 
				+										 const b3Quaternion& ornB,
			
 
				+
			
 
				+			int collidableIndexA, int collidableIndexB,
			
 
				+
			
 
				+			const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, 
			
 
				+			b3AlignedObjectArray<b3Contact4Data>* globalContactOut, 
			
 
				+			int& nContacts,
			
 
				+			
			
 
				+			const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
			
 
				+			const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
			
 
				+	
			
 
				+			const b3AlignedObjectArray<b3Vector3>& verticesA, 
			
 
				+			const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, 
			
 
				+			const b3AlignedObjectArray<b3GpuFace>& facesA,
			
 
				+			const b3AlignedObjectArray<int>& indicesA,
			
 
				+	
			
 
				+			const b3AlignedObjectArray<b3Vector3>& verticesB,
			
 
				+			const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
			
 
				+			const b3AlignedObjectArray<b3GpuFace>& facesB,
			
 
				+			const b3AlignedObjectArray<int>& indicesB,
			
 
				+
			
 
				+			const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
			
 
				+			const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
			
 
				+			const b3Vector3& sepNormalWorldSpace,
			
 
				+			int maxContactCapacity			)
			
 
				+{
			
 
				+	int contactIndex = -1;
			
 
				+	b3ConvexPolyhedronData hullA, hullB;
			
 
				+    
			
 
				+    b3Collidable colA = hostCollidablesA[collidableIndexA];
			
 
				+    hullA = hostConvexDataA[colA.m_shapeIndex];
			
 
				+    //printf("numvertsA = %d\n",hullA.m_numVertices);
			
 
				+    
			
 
				+    
			
 
				+    b3Collidable colB = hostCollidablesB[collidableIndexB];
			
 
				+    hullB = hostConvexDataB[colB.m_shapeIndex];
			
 
				+    //printf("numvertsB = %d\n",hullB.m_numVertices);
			
 
				+    
			
 
				+	
			
 
				+	b3Float4 contactsOut[B3_MAX_VERTS];
			
 
				+	int localContactCapacity = B3_MAX_VERTS;
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+	b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
			
 
				+	b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
			
 
				+#endif
			
 
				+	
			
 
				+	
			
 
				+	{
			
 
				+		
			
 
				+		b3Float4 worldVertsB1[B3_MAX_VERTS];
			
 
				+		b3Float4 worldVertsB2[B3_MAX_VERTS];
			
 
				+		int capacityWorldVerts = B3_MAX_VERTS;
			
 
				+
			
 
				+		b3Float4 hostNormal = b3MakeFloat4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f);
			
 
				+		int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
			
 
				+		int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;
			
 
				+
			
 
				+		b3Scalar minDist = -1;
			
 
				+		b3Scalar maxDist = 0.;
			
 
				+
			
 
				+		        
			
 
				+
			
 
				+		b3Transform trA,trB;
			
 
				+		{
			
 
				+		//B3_PROFILE("b3TransformPoint computation");
			
 
				+		//trA.setIdentity();
			
 
				+		trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z));
			
 
				+		trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w));
			
 
				+				
			
 
				+		//trB.setIdentity();
			
 
				+		trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z));
			
 
				+		trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w));
			
 
				+		}
			
 
				+
			
 
				+		b3Quaternion trAorn = trA.getRotation();
			
 
				+        b3Quaternion trBorn = trB.getRotation();
			
 
				+        
			
 
				+		int numContactsOut = b3ClipHullAgainstHull(hostNormal, 
			
 
				+						hostConvexDataA.at(shapeA), 
			
 
				+						hostConvexDataB.at(shapeB),
			
 
				+								(b3Float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
			
 
				+								(b3Float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
			
 
				+								worldVertsB1,worldVertsB2,capacityWorldVerts,
			
 
				+								minDist, maxDist,
			
 
				+								verticesA,	facesA,indicesA,
			
 
				+								verticesB,	facesB,indicesB,
			
 
				+								
			
 
				+								contactsOut,localContactCapacity);
			
 
				+
			
 
				+		if (numContactsOut>0)
			
 
				+		{
			
 
				+			B3_PROFILE("overlap");
			
 
				+
			
 
				+			b3Float4 normalOnSurfaceB = (b3Float4&)hostNormal;
			
 
				+//			b3Float4 centerOut;
			
 
				+			
			
 
				+			b3Int4 contactIdx;
			
 
				+			contactIdx.x = 0;
			
 
				+			contactIdx.y = 1;
			
 
				+			contactIdx.z = 2;
			
 
				+			contactIdx.w = 3;
			
 
				+			
			
 
				+			int numPoints = 0;
			
 
				+					
			
 
				+			{
			
 
				+				B3_PROFILE("extractManifold");
			
 
				+				numPoints = b3ReduceContacts(contactsOut, numContactsOut, normalOnSurfaceB,  &contactIdx);
			
 
				+			}
			
 
				+					
			
 
				+			b3Assert(numPoints);
			
 
				+					
			
 
				+			if (nContacts<maxContactCapacity)
			
 
				+			{
			
 
				+				contactIndex = nContacts;
			
 
				+				globalContactOut->expand();
			
 
				+				b3Contact4Data& contact = globalContactOut->at(nContacts);
			
 
				+				contact.m_batchIdx = 0;//i;
			
 
				+				contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA;
			
 
				+				contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB;
			
 
				+
			
 
				+				contact.m_frictionCoeffCmp = 45874;
			
 
				+				contact.m_restituitionCoeffCmp = 0;
			
 
				+					
			
 
				+				float distance = 0.f;
			
 
				+				for (int p=0;p<numPoints;p++)
			
 
				+				{
			
 
				+					contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];//check if it is actually on B
			
 
				+					contact.m_worldNormalOnB = normalOnSurfaceB; 
			
 
				+				}
			
 
				+				//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
			
 
				+				contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
			
 
				+				nContacts++;
			
 
				+			} else
			
 
				+			{
			
 
				+				b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts,maxContactCapacity);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return contactIndex;
			
 
				+}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline int b3ContactConvexConvexSAT(
			
 
				+																int pairIndex,
			
 
				+																int bodyIndexA, int bodyIndexB, 
			
 
				+																int collidableIndexA, int collidableIndexB, 
			
 
				+																const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, 
			
 
				+																const b3AlignedObjectArray<b3Collidable>& collidables,
			
 
				+																const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
			
 
				+																const b3AlignedObjectArray<b3Float4>& convexVertices,
			
 
				+																const b3AlignedObjectArray<b3Float4>& uniqueEdges,
			
 
				+																const b3AlignedObjectArray<int>& convexIndices,
			
 
				+																const b3AlignedObjectArray<b3GpuFace>& faces,
			
 
				+																b3AlignedObjectArray<b3Contact4Data>& globalContactsOut,
			
 
				+																int& nGlobalContactsOut,
			
 
				+																int maxContactCapacity)
			
 
				+{
			
 
				+	int contactIndex = -1;
			
 
				+
			
 
				+
			
 
				+	b3Float4 posA = rigidBodies[bodyIndexA].m_pos;
			
 
				+	b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
			
 
				+	b3Float4 posB = rigidBodies[bodyIndexB].m_pos;
			
 
				+	b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
			
 
				+	
			
 
				+
			
 
				+	b3ConvexPolyhedronData hullA, hullB;
			
 
				+    
			
 
				+	b3Float4 sepNormalWorldSpace;
			
 
				+
			
 
				+	
			
 
				+
			
 
				+    b3Collidable colA = collidables[collidableIndexA];
			
 
				+    hullA = convexShapes[colA.m_shapeIndex];
			
 
				+    //printf("numvertsA = %d\n",hullA.m_numVertices);
			
 
				+    
			
 
				+    
			
 
				+    b3Collidable colB = collidables[collidableIndexB];
			
 
				+    hullB = convexShapes[colB.m_shapeIndex];
			
 
				+    //printf("numvertsB = %d\n",hullB.m_numVertices);
			
 
				+    
			
 
				+	
			
 
				+//	b3Float4 contactsOut[B3_MAX_VERTS];
			
 
				+	int contactCapacity = B3_MAX_VERTS;
			
 
				+	int numContactsOut=0;
			
 
				+
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+	b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
			
 
				+	b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
			
 
				+#endif
			
 
				+	
			
 
				+		bool foundSepAxis = b3FindSeparatingAxis(hullA,hullB,
			
 
				+							posA,
			
 
				+							ornA,
			
 
				+							posB,
			
 
				+							ornB,
			
 
				+
			
 
				+							convexVertices,uniqueEdges,faces,convexIndices,
			
 
				+							convexVertices,uniqueEdges,faces,convexIndices,
			
 
				+							
			
 
				+							sepNormalWorldSpace
			
 
				+							);
			
 
				+
			
 
				+	
			
 
				+	if (foundSepAxis)
			
 
				+	{
			
 
				+		
			
 
				+		
			
 
				+		contactIndex = b3ClipHullHullSingle(
			
 
				+			bodyIndexA, bodyIndexB,
			
 
				+						   posA,ornA,
			
 
				+						   posB,ornB,
			
 
				+			collidableIndexA, collidableIndexB,
			
 
				+			&rigidBodies, 
			
 
				+			&globalContactsOut,
			
 
				+			nGlobalContactsOut,
			
 
				+			
			
 
				+			convexShapes,
			
 
				+			convexShapes,
			
 
				+	
			
 
				+			convexVertices, 
			
 
				+			uniqueEdges, 
			
 
				+			faces,
			
 
				+			convexIndices,
			
 
				+	
			
 
				+			convexVertices,
			
 
				+			uniqueEdges,
			
 
				+			faces,
			
 
				+			convexIndices,
			
 
				+
			
 
				+			collidables,
			
 
				+			collidables,
			
 
				+			sepNormalWorldSpace,
			
 
				+			maxContactCapacity);
			
 
				+			
			
 
				+	}
			
 
				+
			
 
				+	return contactIndex;
			
 
				+}
			
 
				+
			
 
				+#endif //B3_CONTACT_CONVEX_CONVEX_SAT_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h
@@ -0,0 +1,162 @@
 
				+
			
 
				+#ifndef B3_CONTACT_SPHERE_SPHERE_H
			
 
				+#define B3_CONTACT_SPHERE_SPHERE_H
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+void	computeContactSphereConvex(int pairIndex,
			
 
				+																int bodyIndexA, int bodyIndexB, 
			
 
				+																int collidableIndexA, int collidableIndexB, 
			
 
				+																const b3RigidBodyData* rigidBodies, 
			
 
				+																const b3Collidable* collidables,
			
 
				+																const b3ConvexPolyhedronData* convexShapes,
			
 
				+																const b3Vector3* convexVertices,
			
 
				+																const int* convexIndices,
			
 
				+																const b3GpuFace* faces,
			
 
				+																b3Contact4* globalContactsOut,
			
 
				+																int& nGlobalContactsOut,
			
 
				+																int maxContactCapacity)
			
 
				+{
			
 
				+
			
 
				+	float radius = collidables[collidableIndexA].m_radius;
			
 
				+	float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
			
 
				+	b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;
			
 
				+
			
 
				+
			
 
				+
			
 
				+	float4 pos = rigidBodies[bodyIndexB].m_pos;
			
 
				+	
			
 
				+
			
 
				+	b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;
			
 
				+
			
 
				+	b3Transform tr;
			
 
				+	tr.setIdentity();
			
 
				+	tr.setOrigin(pos);
			
 
				+	tr.setRotation(quat);
			
 
				+	b3Transform trInv = tr.inverse();
			
 
				+
			
 
				+	float4 spherePos = trInv(spherePos1);
			
 
				+
			
 
				+	int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
			
 
				+	int shapeIndex = collidables[collidableIndex].m_shapeIndex;
			
 
				+	int numFaces = convexShapes[shapeIndex].m_numFaces;
			
 
				+	float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
			
 
				+	float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
			
 
				+	float minDist = -1000000.f; // TODO: What is the largest/smallest float?
			
 
				+	bool bCollide = true;
			
 
				+	int region = -1;
			
 
				+	float4 localHitNormal;
			
 
				+	for ( int f = 0; f < numFaces; f++ )
			
 
				+	{
			
 
				+		b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];
			
 
				+		float4 planeEqn;
			
 
				+		float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
			
 
				+		float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal);
			
 
				+		planeEqn = n1;
			
 
				+		planeEqn[3] = face.m_plane.w;
			
 
				+
			
 
				+		float4 pntReturn;
			
 
				+		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);
			
 
				+
			
 
				+		if ( dist > radius)
			
 
				+		{
			
 
				+			bCollide = false;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		if ( dist > 0 )
			
 
				+		{
			
 
				+			//might hit an edge or vertex
			
 
				+			b3Vector3 out;
			
 
				+
			
 
				+			bool isInPoly = IsPointInPolygon(spherePos,
			
 
				+					&face,
			
 
				+					&convexVertices[convexShapes[shapeIndex].m_vertexOffset],
			
 
				+					convexIndices,
			
 
				+                    &out);
			
 
				+			if (isInPoly)
			
 
				+			{
			
 
				+				if (dist>minDist)
			
 
				+				{
			
 
				+					minDist = dist;
			
 
				+					closestPnt = pntReturn;
			
 
				+					localHitNormal = planeEqn;
			
 
				+					region=1;
			
 
				+				}
			
 
				+			} else
			
 
				+			{
			
 
				+				b3Vector3 tmp = spherePos-out;
			
 
				+				b3Scalar l2 = tmp.length2();
			
 
				+				if (l2<radius*radius)
			
 
				+				{
			
 
				+					dist  = b3Sqrt(l2);
			
 
				+					if (dist>minDist)
			
 
				+					{
			
 
				+						minDist = dist;
			
 
				+						closestPnt = out;
			
 
				+						localHitNormal = tmp/dist;
			
 
				+						region=2;
			
 
				+					}
			
 
				+					
			
 
				+				} else
			
 
				+				{
			
 
				+					bCollide = false;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			if ( dist > minDist )
			
 
				+			{
			
 
				+				minDist = dist;
			
 
				+				closestPnt = pntReturn;
			
 
				+				localHitNormal = planeEqn;
			
 
				+				region=3;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	static int numChecks = 0;
			
 
				+	numChecks++;
			
 
				+
			
 
				+	if (bCollide && minDist > -10000)
			
 
				+	{
			
 
				+		
			
 
				+		float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld;
			
 
				+		float4 pOnB1 = tr(closestPnt);
			
 
				+		//printf("dist ,%f,",minDist);
			
 
				+		float actualDepth = minDist-radius;
			
 
				+		if (actualDepth<0)
			
 
				+		{
			
 
				+		//printf("actualDepth = ,%f,", actualDepth);
			
 
				+		//printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
			
 
				+		//printf("region=,%d,\n", region);
			
 
				+		pOnB1[3] = actualDepth;
			
 
				+
			
 
				+		int dstIdx;
			
 
				+//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
			
 
				+		
			
 
				+		if (nGlobalContactsOut < maxContactCapacity)
			
 
				+		{
			
 
				+			dstIdx=nGlobalContactsOut;
			
 
				+			nGlobalContactsOut++;
			
 
				+
			
 
				+			b3Contact4* c = &globalContactsOut[dstIdx];
			
 
				+			c->m_worldNormalOnB = normalOnSurfaceB1;
			
 
				+			c->setFrictionCoeff(0.7);
			
 
				+			c->setRestituitionCoeff(0.f);
			
 
				+
			
 
				+			c->m_batchIdx = pairIndex;
			
 
				+			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;
			
 
				+			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;
			
 
				+			c->m_worldPosB[0] = pOnB1;
			
 
				+			int numPoints = 1;
			
 
				+			c->m_worldNormalOnB.w = (b3Scalar)numPoints;
			
 
				+		}//if (dstIdx < numPairs)
			
 
				+		}
			
 
				+	}//if (hasCollision)
			
 
				+	
			
 
				+}
			
 
				+#endif //B3_CONTACT_SPHERE_SPHERE_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h
@@ -0,0 +1,40 @@
 
				+
			
 
				+#ifndef B3_CONVEX_POLYHEDRON_DATA_H
			
 
				+#define B3_CONVEX_POLYHEDRON_DATA_H
			
 
				+
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+
			
 
				+typedef struct b3GpuFace b3GpuFace_t;
			
 
				+struct b3GpuFace
			
 
				+{
			
 
				+	b3Float4 m_plane;
			
 
				+	int m_indexOffset;
			
 
				+	int m_numIndices;
			
 
				+	int m_unusedPadding1;
			
 
				+	int m_unusedPadding2;
			
 
				+};
			
 
				+
			
 
				+typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;
			
 
				+
			
 
				+struct b3ConvexPolyhedronData
			
 
				+{
			
 
				+	b3Float4		m_localCenter;
			
 
				+	b3Float4		m_extents;
			
 
				+	b3Float4		mC;
			
 
				+	b3Float4		mE;
			
 
				+
			
 
				+	float			m_radius;
			
 
				+	int	m_faceOffset;
			
 
				+	int m_numFaces;
			
 
				+	int	m_numVertices;
			
 
				+
			
 
				+	int m_vertexOffset;
			
 
				+	int	m_uniqueEdgesOffset;
			
 
				+	int	m_numUniqueEdges;
			
 
				+	int m_unused;
			
 
				+};
			
 
				+
			
 
				+#endif //B3_CONVEX_POLYHEDRON_DATA_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h
@@ -0,0 +1,832 @@
 
				+#ifndef B3_FIND_CONCAVE_SEPARATING_AXIS_H
			
 
				+#define B3_FIND_CONCAVE_SEPARATING_AXIS_H
			
 
				+
			
 
				+#define B3_TRIANGLE_NUM_CONVEX_FACES 5
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+
			
 
				+
			
 
				+inline void b3Project(__global const b3ConvexPolyhedronData* hull,  b3Float4ConstArg pos, b3QuatConstArg orn, 
			
 
				+const b3Float4* dir, __global const b3Float4* vertices, float* min, float* max)
			
 
				+{
			
 
				+	min[0] = FLT_MAX;
			
 
				+	max[0] = -FLT_MAX;
			
 
				+	int numVerts = hull->m_numVertices;
			
 
				+
			
 
				+	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),*dir);
			
 
				+	float offset = b3Dot(pos,*dir);
			
 
				+	for(int i=0;i<numVerts;i++)
			
 
				+	{
			
 
				+		float dp = b3Dot(vertices[hull->m_vertexOffset+i],localDir);
			
 
				+		if(dp < min[0])	
			
 
				+			min[0] = dp;
			
 
				+		if(dp > max[0])	
			
 
				+			max[0] = dp;
			
 
				+	}
			
 
				+	if(min[0]>max[0])
			
 
				+	{
			
 
				+		float tmp = min[0];
			
 
				+		min[0] = max[0];
			
 
				+		max[0] = tmp;
			
 
				+	}
			
 
				+	min[0] += offset;
			
 
				+	max[0] += offset;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline bool b3TestSepAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, 
			
 
				+	b3Float4ConstArg posA,b3QuatConstArg ornA,
			
 
				+	b3Float4ConstArg posB,b3QuatConstArg ornB,
			
 
				+	b3Float4* sep_axis, const b3Float4* verticesA, __global const b3Float4* verticesB,float* depth)
			
 
				+{
			
 
				+	float Min0,Max0;
			
 
				+	float Min1,Max1;
			
 
				+	b3Project(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);
			
 
				+	b3Project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);
			
 
				+
			
 
				+	if(Max0<Min1 || Max1<Min0)
			
 
				+		return false;
			
 
				+
			
 
				+	float d0 = Max0 - Min1;
			
 
				+	float d1 = Max1 - Min0;
			
 
				+	*depth = d0<d1 ? d0:d1;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool b3FindSeparatingAxis(	const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, 
			
 
				+	b3Float4ConstArg posA1,
			
 
				+	b3QuatConstArg ornA,
			
 
				+	b3Float4ConstArg posB1,
			
 
				+	b3QuatConstArg ornB,
			
 
				+	b3Float4ConstArg DeltaC2,
			
 
				+	
			
 
				+	const b3Float4* verticesA, 
			
 
				+	const b3Float4* uniqueEdgesA, 
			
 
				+	const b3GpuFace* facesA,
			
 
				+	const int*  indicesA,
			
 
				+
			
 
				+	__global const b3Float4* verticesB, 
			
 
				+	__global const b3Float4* uniqueEdgesB, 
			
 
				+	__global const b3GpuFace* facesB,
			
 
				+	__global const int*  indicesB,
			
 
				+	b3Float4* sep,
			
 
				+	float* dmin)
			
 
				+{
			
 
				+	
			
 
				+
			
 
				+	b3Float4 posA = posA1;
			
 
				+	posA.w = 0.f;
			
 
				+	b3Float4 posB = posB1;
			
 
				+	posB.w = 0.f;
			
 
				+/*
			
 
				+	static int maxFaceVertex = 0;
			
 
				+
			
 
				+	int curFaceVertexAB = hullA->m_numFaces*hullB->m_numVertices;
			
 
				+	curFaceVertexAB+= hullB->m_numFaces*hullA->m_numVertices;
			
 
				+
			
 
				+	if (curFaceVertexAB>maxFaceVertex)
			
 
				+	{
			
 
				+		maxFaceVertex = curFaceVertexAB;
			
 
				+		printf("curFaceVertexAB = %d\n",curFaceVertexAB);
			
 
				+		printf("hullA->m_numFaces = %d\n",hullA->m_numFaces);
			
 
				+		printf("hullA->m_numVertices = %d\n",hullA->m_numVertices);
			
 
				+		printf("hullB->m_numVertices = %d\n",hullB->m_numVertices);
			
 
				+	}
			
 
				+*/
			
 
				+
			
 
				+	int curPlaneTests=0;
			
 
				+	{
			
 
				+		int numFacesA = hullA->m_numFaces;
			
 
				+		// Test normals from hullA
			
 
				+		for(int i=0;i<numFacesA;i++)
			
 
				+		{
			
 
				+			const b3Float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
			
 
				+			b3Float4 faceANormalWS = b3QuatRotate(ornA,normal);
			
 
				+			if (b3Dot(DeltaC2,faceANormalWS)<0)
			
 
				+				faceANormalWS*=-1.f;
			
 
				+			curPlaneTests++;
			
 
				+			float d;
			
 
				+			if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))
			
 
				+				return false;
			
 
				+			if(d<*dmin)
			
 
				+			{
			
 
				+				*dmin = d;
			
 
				+				*sep = faceANormalWS;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if((b3Dot(-DeltaC2,*sep))>0.0f)
			
 
				+	{
			
 
				+		*sep = -(*sep);
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+b3Vector3 unitSphere162[]=
			
 
				+{
			
 
				+	b3MakeVector3(0.000000,-1.000000,0.000000),
			
 
				+b3MakeVector3(0.203181,-0.967950,0.147618),
			
 
				+b3MakeVector3(-0.077607,-0.967950,0.238853),
			
 
				+b3MakeVector3(0.723607,-0.447220,0.525725),
			
 
				+b3MakeVector3(0.609547,-0.657519,0.442856),
			
 
				+b3MakeVector3(0.812729,-0.502301,0.295238),
			
 
				+b3MakeVector3(-0.251147,-0.967949,0.000000),
			
 
				+b3MakeVector3(-0.077607,-0.967950,-0.238853),
			
 
				+b3MakeVector3(0.203181,-0.967950,-0.147618),
			
 
				+b3MakeVector3(0.860698,-0.251151,0.442858),
			
 
				+b3MakeVector3(-0.276388,-0.447220,0.850649),
			
 
				+b3MakeVector3(-0.029639,-0.502302,0.864184),
			
 
				+b3MakeVector3(-0.155215,-0.251152,0.955422),
			
 
				+b3MakeVector3(-0.894426,-0.447216,0.000000),
			
 
				+b3MakeVector3(-0.831051,-0.502299,0.238853),
			
 
				+b3MakeVector3(-0.956626,-0.251149,0.147618),
			
 
				+b3MakeVector3(-0.276388,-0.447220,-0.850649),
			
 
				+b3MakeVector3(-0.483971,-0.502302,-0.716565),
			
 
				+b3MakeVector3(-0.436007,-0.251152,-0.864188),
			
 
				+b3MakeVector3(0.723607,-0.447220,-0.525725),
			
 
				+b3MakeVector3(0.531941,-0.502302,-0.681712),
			
 
				+b3MakeVector3(0.687159,-0.251152,-0.681715),
			
 
				+b3MakeVector3(0.687159,-0.251152,0.681715),
			
 
				+b3MakeVector3(-0.436007,-0.251152,0.864188),
			
 
				+b3MakeVector3(-0.956626,-0.251149,-0.147618),
			
 
				+b3MakeVector3(-0.155215,-0.251152,-0.955422),
			
 
				+b3MakeVector3(0.860698,-0.251151,-0.442858),
			
 
				+b3MakeVector3(0.276388,0.447220,0.850649),
			
 
				+b3MakeVector3(0.483971,0.502302,0.716565),
			
 
				+b3MakeVector3(0.232822,0.657519,0.716563),
			
 
				+b3MakeVector3(-0.723607,0.447220,0.525725),
			
 
				+b3MakeVector3(-0.531941,0.502302,0.681712),
			
 
				+b3MakeVector3(-0.609547,0.657519,0.442856),
			
 
				+b3MakeVector3(-0.723607,0.447220,-0.525725),
			
 
				+b3MakeVector3(-0.812729,0.502301,-0.295238),
			
 
				+b3MakeVector3(-0.609547,0.657519,-0.442856),
			
 
				+b3MakeVector3(0.276388,0.447220,-0.850649),
			
 
				+b3MakeVector3(0.029639,0.502302,-0.864184),
			
 
				+b3MakeVector3(0.232822,0.657519,-0.716563),
			
 
				+b3MakeVector3(0.894426,0.447216,0.000000),
			
 
				+b3MakeVector3(0.831051,0.502299,-0.238853),
			
 
				+b3MakeVector3(0.753442,0.657515,0.000000),
			
 
				+b3MakeVector3(-0.232822,-0.657519,0.716563),
			
 
				+b3MakeVector3(-0.162456,-0.850654,0.499995),
			
 
				+b3MakeVector3(0.052790,-0.723612,0.688185),
			
 
				+b3MakeVector3(0.138199,-0.894429,0.425321),
			
 
				+b3MakeVector3(0.262869,-0.525738,0.809012),
			
 
				+b3MakeVector3(0.361805,-0.723611,0.587779),
			
 
				+b3MakeVector3(0.531941,-0.502302,0.681712),
			
 
				+b3MakeVector3(0.425323,-0.850654,0.309011),
			
 
				+b3MakeVector3(0.812729,-0.502301,-0.295238),
			
 
				+b3MakeVector3(0.609547,-0.657519,-0.442856),
			
 
				+b3MakeVector3(0.850648,-0.525736,0.000000),
			
 
				+b3MakeVector3(0.670817,-0.723611,-0.162457),
			
 
				+b3MakeVector3(0.670817,-0.723610,0.162458),
			
 
				+b3MakeVector3(0.425323,-0.850654,-0.309011),
			
 
				+b3MakeVector3(0.447211,-0.894428,0.000001),
			
 
				+b3MakeVector3(-0.753442,-0.657515,0.000000),
			
 
				+b3MakeVector3(-0.525730,-0.850652,0.000000),
			
 
				+b3MakeVector3(-0.638195,-0.723609,0.262864),
			
 
				+b3MakeVector3(-0.361801,-0.894428,0.262864),
			
 
				+b3MakeVector3(-0.688189,-0.525736,0.499997),
			
 
				+b3MakeVector3(-0.447211,-0.723610,0.525729),
			
 
				+b3MakeVector3(-0.483971,-0.502302,0.716565),
			
 
				+b3MakeVector3(-0.232822,-0.657519,-0.716563),
			
 
				+b3MakeVector3(-0.162456,-0.850654,-0.499995),
			
 
				+b3MakeVector3(-0.447211,-0.723611,-0.525727),
			
 
				+b3MakeVector3(-0.361801,-0.894429,-0.262863),
			
 
				+b3MakeVector3(-0.688189,-0.525736,-0.499997),
			
 
				+b3MakeVector3(-0.638195,-0.723609,-0.262863),
			
 
				+b3MakeVector3(-0.831051,-0.502299,-0.238853),
			
 
				+b3MakeVector3(0.361804,-0.723612,-0.587779),
			
 
				+b3MakeVector3(0.138197,-0.894429,-0.425321),
			
 
				+b3MakeVector3(0.262869,-0.525738,-0.809012),
			
 
				+b3MakeVector3(0.052789,-0.723611,-0.688186),
			
 
				+b3MakeVector3(-0.029639,-0.502302,-0.864184),
			
 
				+b3MakeVector3(0.956626,0.251149,0.147618),
			
 
				+b3MakeVector3(0.956626,0.251149,-0.147618),
			
 
				+b3MakeVector3(0.951058,-0.000000,0.309013),
			
 
				+b3MakeVector3(1.000000,0.000000,0.000000),
			
 
				+b3MakeVector3(0.947213,-0.276396,0.162458),
			
 
				+b3MakeVector3(0.951058,0.000000,-0.309013),
			
 
				+b3MakeVector3(0.947213,-0.276396,-0.162458),
			
 
				+b3MakeVector3(0.155215,0.251152,0.955422),
			
 
				+b3MakeVector3(0.436007,0.251152,0.864188),
			
 
				+b3MakeVector3(-0.000000,-0.000000,1.000000),
			
 
				+b3MakeVector3(0.309017,0.000000,0.951056),
			
 
				+b3MakeVector3(0.138199,-0.276398,0.951055),
			
 
				+b3MakeVector3(0.587786,0.000000,0.809017),
			
 
				+b3MakeVector3(0.447216,-0.276398,0.850648),
			
 
				+b3MakeVector3(-0.860698,0.251151,0.442858),
			
 
				+b3MakeVector3(-0.687159,0.251152,0.681715),
			
 
				+b3MakeVector3(-0.951058,-0.000000,0.309013),
			
 
				+b3MakeVector3(-0.809018,0.000000,0.587783),
			
 
				+b3MakeVector3(-0.861803,-0.276396,0.425324),
			
 
				+b3MakeVector3(-0.587786,0.000000,0.809017),
			
 
				+b3MakeVector3(-0.670819,-0.276397,0.688191),
			
 
				+b3MakeVector3(-0.687159,0.251152,-0.681715),
			
 
				+b3MakeVector3(-0.860698,0.251151,-0.442858),
			
 
				+b3MakeVector3(-0.587786,-0.000000,-0.809017),
			
 
				+b3MakeVector3(-0.809018,-0.000000,-0.587783),
			
 
				+b3MakeVector3(-0.670819,-0.276397,-0.688191),
			
 
				+b3MakeVector3(-0.951058,0.000000,-0.309013),
			
 
				+b3MakeVector3(-0.861803,-0.276396,-0.425324),
			
 
				+b3MakeVector3(0.436007,0.251152,-0.864188),
			
 
				+b3MakeVector3(0.155215,0.251152,-0.955422),
			
 
				+b3MakeVector3(0.587786,-0.000000,-0.809017),
			
 
				+b3MakeVector3(0.309017,-0.000000,-0.951056),
			
 
				+b3MakeVector3(0.447216,-0.276398,-0.850648),
			
 
				+b3MakeVector3(0.000000,0.000000,-1.000000),
			
 
				+b3MakeVector3(0.138199,-0.276398,-0.951055),
			
 
				+b3MakeVector3(0.670820,0.276396,0.688190),
			
 
				+b3MakeVector3(0.809019,-0.000002,0.587783),
			
 
				+b3MakeVector3(0.688189,0.525736,0.499997),
			
 
				+b3MakeVector3(0.861804,0.276394,0.425323),
			
 
				+b3MakeVector3(0.831051,0.502299,0.238853),
			
 
				+b3MakeVector3(-0.447216,0.276397,0.850649),
			
 
				+b3MakeVector3(-0.309017,-0.000001,0.951056),
			
 
				+b3MakeVector3(-0.262869,0.525738,0.809012),
			
 
				+b3MakeVector3(-0.138199,0.276397,0.951055),
			
 
				+b3MakeVector3(0.029639,0.502302,0.864184),
			
 
				+b3MakeVector3(-0.947213,0.276396,-0.162458),
			
 
				+b3MakeVector3(-1.000000,0.000001,0.000000),
			
 
				+b3MakeVector3(-0.850648,0.525736,-0.000000),
			
 
				+b3MakeVector3(-0.947213,0.276397,0.162458),
			
 
				+b3MakeVector3(-0.812729,0.502301,0.295238),
			
 
				+b3MakeVector3(-0.138199,0.276397,-0.951055),
			
 
				+b3MakeVector3(-0.309016,-0.000000,-0.951057),
			
 
				+b3MakeVector3(-0.262869,0.525738,-0.809012),
			
 
				+b3MakeVector3(-0.447215,0.276397,-0.850649),
			
 
				+b3MakeVector3(-0.531941,0.502302,-0.681712),
			
 
				+b3MakeVector3(0.861804,0.276396,-0.425322),
			
 
				+b3MakeVector3(0.809019,0.000000,-0.587782),
			
 
				+b3MakeVector3(0.688189,0.525736,-0.499997),
			
 
				+b3MakeVector3(0.670821,0.276397,-0.688189),
			
 
				+b3MakeVector3(0.483971,0.502302,-0.716565),
			
 
				+b3MakeVector3(0.077607,0.967950,0.238853),
			
 
				+b3MakeVector3(0.251147,0.967949,0.000000),
			
 
				+b3MakeVector3(0.000000,1.000000,0.000000),
			
 
				+b3MakeVector3(0.162456,0.850654,0.499995),
			
 
				+b3MakeVector3(0.361800,0.894429,0.262863),
			
 
				+b3MakeVector3(0.447209,0.723612,0.525728),
			
 
				+b3MakeVector3(0.525730,0.850652,0.000000),
			
 
				+b3MakeVector3(0.638194,0.723610,0.262864),
			
 
				+b3MakeVector3(-0.203181,0.967950,0.147618),
			
 
				+b3MakeVector3(-0.425323,0.850654,0.309011),
			
 
				+b3MakeVector3(-0.138197,0.894430,0.425320),
			
 
				+b3MakeVector3(-0.361804,0.723612,0.587778),
			
 
				+b3MakeVector3(-0.052790,0.723612,0.688185),
			
 
				+b3MakeVector3(-0.203181,0.967950,-0.147618),
			
 
				+b3MakeVector3(-0.425323,0.850654,-0.309011),
			
 
				+b3MakeVector3(-0.447210,0.894429,0.000000),
			
 
				+b3MakeVector3(-0.670817,0.723611,-0.162457),
			
 
				+b3MakeVector3(-0.670817,0.723611,0.162457),
			
 
				+b3MakeVector3(0.077607,0.967950,-0.238853),
			
 
				+b3MakeVector3(0.162456,0.850654,-0.499995),
			
 
				+b3MakeVector3(-0.138197,0.894430,-0.425320),
			
 
				+b3MakeVector3(-0.052790,0.723612,-0.688185),
			
 
				+b3MakeVector3(-0.361804,0.723612,-0.587778),
			
 
				+b3MakeVector3(0.361800,0.894429,-0.262863),
			
 
				+b3MakeVector3(0.638194,0.723610,-0.262864),
			
 
				+b3MakeVector3(0.447209,0.723612,-0.525728)
			
 
				+};
			
 
				+
			
 
				+
			
 
				+bool b3FindSeparatingAxisEdgeEdge(	const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, 
			
 
				+	b3Float4ConstArg posA1,
			
 
				+	b3QuatConstArg ornA,
			
 
				+	b3Float4ConstArg posB1,
			
 
				+	b3QuatConstArg ornB,
			
 
				+	b3Float4ConstArg DeltaC2,
			
 
				+	const b3Float4* verticesA, 
			
 
				+	const b3Float4* uniqueEdgesA, 
			
 
				+	const b3GpuFace* facesA,
			
 
				+	const int*  indicesA,
			
 
				+	__global const b3Float4* verticesB, 
			
 
				+	__global const b3Float4* uniqueEdgesB, 
			
 
				+	__global const b3GpuFace* facesB,
			
 
				+	__global const int*  indicesB,
			
 
				+		b3Float4* sep,
			
 
				+	float* dmin,
			
 
				+	bool searchAllEdgeEdge)
			
 
				+{
			
 
				+
			
 
				+
			
 
				+	b3Float4 posA = posA1;
			
 
				+	posA.w = 0.f;
			
 
				+	b3Float4 posB = posB1;
			
 
				+	posB.w = 0.f;
			
 
				+
			
 
				+	int curPlaneTests=0;
			
 
				+
			
 
				+	int curEdgeEdge = 0;
			
 
				+	// Test edges
			
 
				+	static int maxEdgeTests = 0;
			
 
				+	int curEdgeTests = hullA->m_numUniqueEdges * hullB->m_numUniqueEdges;
			
 
				+	if (curEdgeTests >maxEdgeTests )
			
 
				+	{
			
 
				+		maxEdgeTests  = curEdgeTests ;
			
 
				+		printf("maxEdgeTests = %d\n",maxEdgeTests );
			
 
				+		printf("hullA->m_numUniqueEdges = %d\n",hullA->m_numUniqueEdges);
			
 
				+		printf("hullB->m_numUniqueEdges = %d\n",hullB->m_numUniqueEdges);
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	if (searchAllEdgeEdge)
			
 
				+	{
			
 
				+		for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
			
 
				+		{
			
 
				+			const b3Float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];
			
 
				+			b3Float4 edge0World = b3QuatRotate(ornA,edge0);
			
 
				+
			
 
				+			for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
			
 
				+			{
			
 
				+				const b3Float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];
			
 
				+				b3Float4 edge1World = b3QuatRotate(ornB,edge1);
			
 
				+
			
 
				+
			
 
				+				b3Float4 crossje = b3Cross(edge0World,edge1World);
			
 
				+
			
 
				+				curEdgeEdge++;
			
 
				+				if(!b3IsAlmostZero(crossje))
			
 
				+				{
			
 
				+					crossje = b3Normalized(crossje);
			
 
				+					if (b3Dot(DeltaC2,crossje)<0)
			
 
				+						crossje *= -1.f;
			
 
				+
			
 
				+					float dist;
			
 
				+					bool result = true;
			
 
				+					{
			
 
				+						float Min0,Max0;
			
 
				+						float Min1,Max1;
			
 
				+						b3Project(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
			
 
				+						b3Project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
			
 
				+				
			
 
				+						if(Max0<Min1 || Max1<Min0)
			
 
				+							return false;
			
 
				+                    
			
 
				+						float d0 = Max0 - Min1;
			
 
				+						float d1 = Max1 - Min0;
			
 
				+						dist = d0<d1 ? d0:d1;
			
 
				+						result = true;
			
 
				+
			
 
				+					}
			
 
				+				
			
 
				+
			
 
				+					if(dist<*dmin)
			
 
				+					{
			
 
				+						*dmin = dist;
			
 
				+						*sep = crossje;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+		}
			
 
				+	} else
			
 
				+	{
			
 
				+		int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3);
			
 
				+		//printf("numDirections =%d\n",numDirections );
			
 
				+
			
 
				+
			
 
				+		for(int i=0;i<numDirections;i++)
			
 
				+		{
			
 
				+			b3Float4 crossje = unitSphere162[i];
			
 
				+			{
			
 
				+				//if (b3Dot(DeltaC2,crossje)>0)
			
 
				+				{
			
 
				+					float dist;
			
 
				+					bool result = true;
			
 
				+					{
			
 
				+						float Min0,Max0;
			
 
				+						float Min1,Max1;
			
 
				+						b3Project(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
			
 
				+						b3Project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
			
 
				+				
			
 
				+						if(Max0<Min1 || Max1<Min0)
			
 
				+							return false;
			
 
				+                    
			
 
				+						float d0 = Max0 - Min1;
			
 
				+						float d1 = Max1 - Min0;
			
 
				+						dist = d0<d1 ? d0:d1;
			
 
				+						result = true;
			
 
				+
			
 
				+					}
			
 
				+				
			
 
				+
			
 
				+					if(dist<*dmin)
			
 
				+					{
			
 
				+						*dmin = dist;
			
 
				+						*sep = crossje;
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	if((b3Dot(-DeltaC2,*sep))>0.0f)
			
 
				+	{
			
 
				+		*sep = -(*sep);
			
 
				+	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline int	b3FindClippingFaces(b3Float4ConstArg separatingNormal,
			
 
				+                      __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
			
 
				+                      b3Float4ConstArg posA, b3QuatConstArg ornA,b3Float4ConstArg posB, b3QuatConstArg ornB,
			
 
				+                       __global b3Float4* worldVertsA1,
			
 
				+                      __global b3Float4* worldNormalsA1,
			
 
				+                      __global b3Float4* worldVertsB1,
			
 
				+                      int capacityWorldVerts,
			
 
				+                      const float minDist, float maxDist,
			
 
				+                      __global const b3Float4* verticesA,
			
 
				+                      __global const b3GpuFace_t* facesA,
			
 
				+                      __global const int* indicesA,
			
 
				+						__global const b3Float4* verticesB,
			
 
				+                      __global const b3GpuFace_t* facesB,
			
 
				+                      __global const int* indicesB,
			
 
				+
			
 
				+                      __global b3Int4* clippingFaces, int pairIndex)
			
 
				+{
			
 
				+	int numContactsOut = 0;
			
 
				+	int numWorldVertsB1= 0;
			
 
				+    
			
 
				+    
			
 
				+	int closestFaceB=-1;
			
 
				+	float dmax = -FLT_MAX;
			
 
				+    
			
 
				+	{
			
 
				+		for(int face=0;face<hullB->m_numFaces;face++)
			
 
				+		{
			
 
				+			const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset+face].m_plane.x,
			
 
				+                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
			
 
				+			const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
			
 
				+			float d = b3Dot(WorldNormal,separatingNormal);
			
 
				+			if (d > dmax)
			
 
				+			{
			
 
				+				dmax = d;
			
 
				+				closestFaceB = face;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+    
			
 
				+	{
			
 
				+		const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];
			
 
				+		const int numVertices = polyB.m_numIndices;
			
 
				+		for(int e0=0;e0<numVertices;e0++)
			
 
				+		{
			
 
				+			const b3Float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
			
 
				+			worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = b3TransformPoint(b,posB,ornB);
			
 
				+		}
			
 
				+	}
			
 
				+    
			
 
				+    int closestFaceA=-1;
			
 
				+	{
			
 
				+		float dmin = FLT_MAX;
			
 
				+		for(int face=0;face<hullA->m_numFaces;face++)
			
 
				+		{
			
 
				+			const b3Float4 Normal = b3MakeFloat4(
			
 
				+                                              facesA[hullA->m_faceOffset+face].m_plane.x,
			
 
				+                                              facesA[hullA->m_faceOffset+face].m_plane.y,
			
 
				+                                              facesA[hullA->m_faceOffset+face].m_plane.z,
			
 
				+                                              0.f);
			
 
				+			const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal);
			
 
				+            
			
 
				+			float d = b3Dot(faceANormalWS,separatingNormal);
			
 
				+			if (d < dmin)
			
 
				+			{
			
 
				+				dmin = d;
			
 
				+				closestFaceA = face;
			
 
				+                worldNormalsA1[pairIndex] = faceANormalWS;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+    
			
 
				+    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
			
 
				+	for(int e0=0;e0<numVerticesA;e0++)
			
 
				+	{
			
 
				+        const b3Float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
			
 
				+        worldVertsA1[pairIndex*capacityWorldVerts+e0] = b3TransformPoint(a, posA,ornA);
			
 
				+    }
			
 
				+    
			
 
				+    clippingFaces[pairIndex].x = closestFaceA;
			
 
				+    clippingFaces[pairIndex].y = closestFaceB;
			
 
				+    clippingFaces[pairIndex].z = numVerticesA;
			
 
				+    clippingFaces[pairIndex].w = numWorldVertsB1;
			
 
				+    
			
 
				+    
			
 
				+	return numContactsOut;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+        
			
 
				+
			
 
				+__kernel void   b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs,
			
 
				+																					__global const b3RigidBodyData* rigidBodies,
			
 
				+																					__global const b3Collidable* collidables,
			
 
				+																					__global const b3ConvexPolyhedronData* convexShapes, 
			
 
				+																					__global const b3Float4* vertices,
			
 
				+																					__global const b3Float4* uniqueEdges,
			
 
				+																					__global const b3GpuFace* faces,
			
 
				+																					__global const int* indices,
			
 
				+																					__global const b3GpuChildShape* gpuChildShapes,
			
 
				+																					__global b3Aabb* aabbs,
			
 
				+																					__global b3Float4* concaveSeparatingNormalsOut,
			
 
				+																					__global b3Int4* clippingFacesOut,
			
 
				+																					__global b3Vector3* worldVertsA1Out,
			
 
				+																					__global b3Vector3* worldNormalsA1Out,
			
 
				+																					__global b3Vector3* worldVertsB1Out,
			
 
				+																					__global int* hasSeparatingNormals,
			
 
				+																					int vertexFaceCapacity,
			
 
				+																					int numConcavePairs,
			
 
				+																					int pairIdx
			
 
				+																					)
			
 
				+{
			
 
				+	int i = pairIdx;
			
 
				+/*	int i = get_global_id(0);
			
 
				+	if (i>=numConcavePairs)
			
 
				+		return;
			
 
				+	int pairIdx = i;
			
 
				+	*/
			
 
				+
			
 
				+	int bodyIndexA = concavePairs[i].x;
			
 
				+	int bodyIndexB = concavePairs[i].y;
			
 
				+
			
 
				+	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
			
 
				+	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
			
 
				+
			
 
				+	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
			
 
				+	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
			
 
				+
			
 
				+	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&
			
 
				+		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+	{
			
 
				+		concavePairs[pairIdx].w = -1;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	hasSeparatingNormals[i] = 0;
			
 
				+
			
 
				+	int numFacesA = convexShapes[shapeIndexA].m_numFaces;
			
 
				+	int numActualConcaveConvexTests = 0;
			
 
				+	
			
 
				+	int f = concavePairs[i].z;
			
 
				+	
			
 
				+	bool overlap = false;
			
 
				+	
			
 
				+	b3ConvexPolyhedronData convexPolyhedronA;
			
 
				+
			
 
				+	//add 3 vertices of the triangle
			
 
				+	convexPolyhedronA.m_numVertices = 3;
			
 
				+	convexPolyhedronA.m_vertexOffset = 0;
			
 
				+	b3Float4	localCenter = b3MakeFloat4(0.f,0.f,0.f,0.f);
			
 
				+
			
 
				+	b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
			
 
				+	b3Aabb triAabb;
			
 
				+	triAabb.m_minVec = b3MakeFloat4(1e30f,1e30f,1e30f,0.f);
			
 
				+	triAabb.m_maxVec = b3MakeFloat4(-1e30f,-1e30f,-1e30f,0.f);
			
 
				+	
			
 
				+	b3Float4 verticesA[3];
			
 
				+	for (int i=0;i<3;i++)
			
 
				+	{
			
 
				+		int index = indices[face.m_indexOffset+i];
			
 
				+		b3Float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
			
 
				+		verticesA[i] = vert;
			
 
				+		localCenter += vert;
			
 
				+			
			
 
				+		triAabb.m_minVec = b3MinFloat4(triAabb.m_minVec,vert);		
			
 
				+		triAabb.m_maxVec = b3MaxFloat4(triAabb.m_maxVec,vert);		
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	overlap = true;
			
 
				+	overlap = (triAabb.m_minVec.x > aabbs[bodyIndexB].m_maxVec.x || triAabb.m_maxVec.x < aabbs[bodyIndexB].m_minVec.x) ? false : overlap;
			
 
				+	overlap = (triAabb.m_minVec.z > aabbs[bodyIndexB].m_maxVec.z || triAabb.m_maxVec.z < aabbs[bodyIndexB].m_minVec.z) ? false : overlap;
			
 
				+	overlap = (triAabb.m_minVec.y > aabbs[bodyIndexB].m_maxVec.y || triAabb.m_maxVec.y < aabbs[bodyIndexB].m_minVec.y) ? false : overlap;
			
 
				+		
			
 
				+	if (overlap)
			
 
				+	{
			
 
				+		float dmin = FLT_MAX;
			
 
				+		int hasSeparatingAxis=5;
			
 
				+		b3Float4 sepAxis=b3MakeFloat4(1,2,3,4);
			
 
				+
			
 
				+		int localCC=0;
			
 
				+		numActualConcaveConvexTests++;
			
 
				+
			
 
				+		//a triangle has 3 unique edges
			
 
				+		convexPolyhedronA.m_numUniqueEdges = 3;
			
 
				+		convexPolyhedronA.m_uniqueEdgesOffset = 0;
			
 
				+		b3Float4 uniqueEdgesA[3];
			
 
				+		
			
 
				+		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
			
 
				+		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
			
 
				+		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
			
 
				+
			
 
				+
			
 
				+		convexPolyhedronA.m_faceOffset = 0;
			
 
				+                                  
			
 
				+		b3Float4 normal = b3MakeFloat4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
			
 
				+                             
			
 
				+		b3GpuFace facesA[B3_TRIANGLE_NUM_CONVEX_FACES];
			
 
				+		int indicesA[3+3+2+2+2];
			
 
				+		int curUsedIndices=0;
			
 
				+		int fidx=0;
			
 
				+
			
 
				+		//front size of triangle
			
 
				+		{
			
 
				+			facesA[fidx].m_indexOffset=curUsedIndices;
			
 
				+			indicesA[0] = 0;
			
 
				+			indicesA[1] = 1;
			
 
				+			indicesA[2] = 2;
			
 
				+			curUsedIndices+=3;
			
 
				+			float c = face.m_plane.w;
			
 
				+			facesA[fidx].m_plane.x = normal.x;
			
 
				+			facesA[fidx].m_plane.y = normal.y;
			
 
				+			facesA[fidx].m_plane.z = normal.z;
			
 
				+			facesA[fidx].m_plane.w = c;
			
 
				+			facesA[fidx].m_numIndices=3;
			
 
				+		}
			
 
				+		fidx++;
			
 
				+		//back size of triangle
			
 
				+		{
			
 
				+			facesA[fidx].m_indexOffset=curUsedIndices;
			
 
				+			indicesA[3]=2;
			
 
				+			indicesA[4]=1;
			
 
				+			indicesA[5]=0;
			
 
				+			curUsedIndices+=3;
			
 
				+			float c = b3Dot(normal,verticesA[0]);
			
 
				+			float c1 = -face.m_plane.w;
			
 
				+			facesA[fidx].m_plane.x = -normal.x;
			
 
				+			facesA[fidx].m_plane.y = -normal.y;
			
 
				+			facesA[fidx].m_plane.z = -normal.z;
			
 
				+			facesA[fidx].m_plane.w = c;
			
 
				+			facesA[fidx].m_numIndices=3;
			
 
				+		}
			
 
				+		fidx++;
			
 
				+
			
 
				+		bool addEdgePlanes = true;
			
 
				+		if (addEdgePlanes)
			
 
				+		{
			
 
				+			int numVertices=3;
			
 
				+			int prevVertex = numVertices-1;
			
 
				+			for (int i=0;i<numVertices;i++)
			
 
				+			{
			
 
				+				b3Float4 v0 = verticesA[i];
			
 
				+				b3Float4 v1 = verticesA[prevVertex];
			
 
				+                                            
			
 
				+				b3Float4 edgeNormal = b3Normalized(b3Cross(normal,v1-v0));
			
 
				+				float c = -b3Dot(edgeNormal,v0);
			
 
				+
			
 
				+				facesA[fidx].m_numIndices = 2;
			
 
				+				facesA[fidx].m_indexOffset=curUsedIndices;
			
 
				+				indicesA[curUsedIndices++]=i;
			
 
				+				indicesA[curUsedIndices++]=prevVertex;
			
 
				+                                            
			
 
				+				facesA[fidx].m_plane.x = edgeNormal.x;
			
 
				+				facesA[fidx].m_plane.y = edgeNormal.y;
			
 
				+				facesA[fidx].m_plane.z = edgeNormal.z;
			
 
				+				facesA[fidx].m_plane.w = c;
			
 
				+				fidx++;
			
 
				+				prevVertex = i;
			
 
				+			}
			
 
				+		}
			
 
				+		convexPolyhedronA.m_numFaces = B3_TRIANGLE_NUM_CONVEX_FACES;
			
 
				+		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
			
 
				+
			
 
				+
			
 
				+		b3Float4 posA = rigidBodies[bodyIndexA].m_pos;
			
 
				+		posA.w = 0.f;
			
 
				+		b3Float4 posB = rigidBodies[bodyIndexB].m_pos;
			
 
				+		posB.w = 0.f;
			
 
				+
			
 
				+		b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
			
 
				+		b3Quaternion ornB =rigidBodies[bodyIndexB].m_quat;
			
 
				+
			
 
				+		
			
 
				+
			
 
				+
			
 
				+		///////////////////
			
 
				+		///compound shape support
			
 
				+
			
 
				+		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
			
 
				+		{
			
 
				+			int compoundChild = concavePairs[pairIdx].w;
			
 
				+			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
			
 
				+			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
			
 
				+			b3Float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
			
 
				+			b3Quaternion childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
			
 
				+			b3Float4 newPosB = b3TransformPoint(childPosB,posB,ornB);
			
 
				+			b3Quaternion newOrnB = b3QuatMul(ornB,childOrnB);
			
 
				+			posB = newPosB;
			
 
				+			ornB = newOrnB;
			
 
				+			shapeIndexB = collidables[childColIndexB].m_shapeIndex;
			
 
				+		}
			
 
				+		//////////////////
			
 
				+
			
 
				+		b3Float4 c0local = convexPolyhedronA.m_localCenter;
			
 
				+		b3Float4 c0 = b3TransformPoint(c0local, posA, ornA);
			
 
				+		b3Float4 c1local = convexShapes[shapeIndexB].m_localCenter;
			
 
				+		b3Float4 c1 = b3TransformPoint(c1local,posB,ornB);
			
 
				+		const b3Float4 DeltaC2 = c0 - c1;
			
 
				+
			
 
				+
			
 
				+		bool sepA = b3FindSeparatingAxis(	&convexPolyhedronA, &convexShapes[shapeIndexB],
			
 
				+												posA,ornA,
			
 
				+												posB,ornB,
			
 
				+												DeltaC2,
			
 
				+												verticesA,uniqueEdgesA,facesA,indicesA,
			
 
				+												vertices,uniqueEdges,faces,indices,
			
 
				+												&sepAxis,&dmin);
			
 
				+		hasSeparatingAxis = 4;
			
 
				+		if (!sepA)
			
 
				+		{
			
 
				+			hasSeparatingAxis = 0;
			
 
				+		} else
			
 
				+		{
			
 
				+			bool sepB = b3FindSeparatingAxis(	&convexShapes[shapeIndexB],&convexPolyhedronA,
			
 
				+												posB,ornB,
			
 
				+												posA,ornA,
			
 
				+												DeltaC2,
			
 
				+												vertices,uniqueEdges,faces,indices,
			
 
				+												verticesA,uniqueEdgesA,facesA,indicesA,
			
 
				+												&sepAxis,&dmin);
			
 
				+
			
 
				+			if (!sepB)
			
 
				+			{
			
 
				+				hasSeparatingAxis = 0;
			
 
				+			} else
			
 
				+			{
			
 
				+				bool sepEE = b3FindSeparatingAxisEdgeEdge(	&convexPolyhedronA, &convexShapes[shapeIndexB],
			
 
				+															posA,ornA,
			
 
				+															posB,ornB,
			
 
				+															DeltaC2,
			
 
				+															verticesA,uniqueEdgesA,facesA,indicesA,
			
 
				+															vertices,uniqueEdges,faces,indices,
			
 
				+															&sepAxis,&dmin,true);
			
 
				+	
			
 
				+				if (!sepEE)
			
 
				+				{
			
 
				+					hasSeparatingAxis = 0;
			
 
				+				} else
			
 
				+				{
			
 
				+					hasSeparatingAxis = 1;
			
 
				+				}
			
 
				+			}
			
 
				+		}	
			
 
				+		
			
 
				+		if (hasSeparatingAxis)
			
 
				+		{
			
 
				+			hasSeparatingNormals[i]=1;
			
 
				+			sepAxis.w = dmin;
			
 
				+			concaveSeparatingNormalsOut[pairIdx]=sepAxis;
			
 
				+
			
 
				+			//now compute clipping faces A and B, and world-space clipping vertices A and B...
			
 
				+
			
 
				+			float minDist = -1e30f;
			
 
				+			float maxDist = 0.02f;
			
 
				+
			
 
				+			b3FindClippingFaces(sepAxis,
			
 
				+                     &convexPolyhedronA,
			
 
				+					 &convexShapes[shapeIndexB],
			
 
				+					 posA,ornA,
			
 
				+					 posB,ornB,
			
 
				+                       worldVertsA1Out,
			
 
				+                      worldNormalsA1Out,
			
 
				+                      worldVertsB1Out,
			
 
				+					  vertexFaceCapacity,
			
 
				+                      minDist, maxDist,
			
 
				+                      verticesA,
			
 
				+                      facesA,
			
 
				+                      indicesA,
			
 
				+ 
			
 
				+					  vertices,
			
 
				+                      faces,
			
 
				+                      indices,
			
 
				+                      clippingFacesOut, pairIdx);
			
 
				+
			
 
				+		} else
			
 
				+		{	
			
 
				+			//mark this pair as in-active
			
 
				+			concavePairs[pairIdx].w = -1;
			
 
				+		}
			
 
				+	}
			
 
				+	else
			
 
				+	{	
			
 
				+		//mark this pair as in-active
			
 
				+		concavePairs[pairIdx].w = -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif //B3_FIND_CONCAVE_SEPARATING_AXIS_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h
@@ -0,0 +1,206 @@
 
				+#ifndef B3_FIND_SEPARATING_AXIS_H
			
 
				+#define B3_FIND_SEPARATING_AXIS_H
			
 
				+
			
 
				+
			
 
				+inline void b3ProjectAxis(const b3ConvexPolyhedronData& hull,  const b3Float4& pos, const b3Quaternion& orn, const b3Float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
			
 
				+{
			
 
				+	min = FLT_MAX;
			
 
				+	max = -FLT_MAX;
			
 
				+	int numVerts = hull.m_numVertices;
			
 
				+
			
 
				+	const b3Float4 localDir = b3QuatRotate(orn.inverse(),dir);
			
 
				+
			
 
				+	b3Scalar offset = b3Dot3F4(pos,dir);
			
 
				+
			
 
				+	for(int i=0;i<numVerts;i++)
			
 
				+	{
			
 
				+		//b3Vector3 pt = trans * vertices[m_vertexOffset+i];
			
 
				+		//b3Scalar dp = pt.dot(dir);
			
 
				+		b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
			
 
				+		b3Scalar dp = b3Dot3F4((b3Float4&)vertices[hull.m_vertexOffset+i],localDir);
			
 
				+		//b3Assert(dp==dpL);
			
 
				+		if(dp < min)	min = dp;
			
 
				+		if(dp > max)	max = dp;
			
 
				+	}
			
 
				+	if(min>max)
			
 
				+	{
			
 
				+		b3Scalar tmp = min;
			
 
				+		min = max;
			
 
				+		max = tmp;
			
 
				+	}
			
 
				+	min += offset;
			
 
				+	max += offset;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline bool b3TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
			
 
				+	const b3Float4& posA,const b3Quaternion& ornA,
			
 
				+	const b3Float4& posB,const b3Quaternion& ornB,
			
 
				+	const b3Float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth)
			
 
				+{
			
 
				+	b3Scalar Min0,Max0;
			
 
				+	b3Scalar Min1,Max1;
			
 
				+	b3ProjectAxis(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0);
			
 
				+	b3ProjectAxis(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1);
			
 
				+
			
 
				+	if(Max0<Min1 || Max1<Min0)
			
 
				+		return false;
			
 
				+
			
 
				+	b3Scalar d0 = Max0 - Min1;
			
 
				+	b3Assert(d0>=0.0f);
			
 
				+	b3Scalar d1 = Max1 - Min0;
			
 
				+	b3Assert(d1>=0.0f);
			
 
				+	depth = d0<d1 ? d0:d1;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline bool b3FindSeparatingAxis(	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
			
 
				+	const b3Float4& posA1,
			
 
				+	const b3Quaternion& ornA,
			
 
				+	const b3Float4& posB1,
			
 
				+	const b3Quaternion& ornB,
			
 
				+	const b3AlignedObjectArray<b3Vector3>& verticesA,
			
 
				+	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, 
			
 
				+	const b3AlignedObjectArray<b3GpuFace>& facesA,
			
 
				+	const b3AlignedObjectArray<int>& indicesA,
			
 
				+	const b3AlignedObjectArray<b3Vector3>& verticesB, 
			
 
				+	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, 
			
 
				+	const b3AlignedObjectArray<b3GpuFace>& facesB,
			
 
				+	const b3AlignedObjectArray<int>& indicesB,
			
 
				+
			
 
				+	b3Vector3& sep)
			
 
				+{
			
 
				+	B3_PROFILE("findSeparatingAxis");
			
 
				+
			
 
				+	b3Float4 posA = posA1;
			
 
				+	posA.w = 0.f;
			
 
				+	b3Float4 posB = posB1;
			
 
				+	posB.w = 0.f;
			
 
				+//#ifdef TEST_INTERNAL_OBJECTS
			
 
				+	b3Float4 c0local = (b3Float4&)hullA.m_localCenter;
			
 
				+
			
 
				+	b3Float4 c0 = b3TransformPoint(c0local, posA, ornA);
			
 
				+	b3Float4 c1local = (b3Float4&)hullB.m_localCenter;
			
 
				+	b3Float4 c1 = b3TransformPoint(c1local,posB,ornB);
			
 
				+	const b3Float4 deltaC2 = c0 - c1;
			
 
				+//#endif
			
 
				+
			
 
				+	b3Scalar dmin = FLT_MAX;
			
 
				+	int curPlaneTests=0;
			
 
				+
			
 
				+	int numFacesA = hullA.m_numFaces;
			
 
				+	// Test normals from hullA
			
 
				+	for(int i=0;i<numFacesA;i++)
			
 
				+	{
			
 
				+		const b3Float4& normal = (b3Float4&)facesA[hullA.m_faceOffset+i].m_plane;
			
 
				+		b3Float4 faceANormalWS = b3QuatRotate(ornA,normal);
			
 
				+
			
 
				+		if (b3Dot3F4(deltaC2,faceANormalWS)<0)
			
 
				+			faceANormalWS*=-1.f;
			
 
				+
			
 
				+		curPlaneTests++;
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+		gExpectedNbTests++;
			
 
				+		if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
			
 
				+			continue;
			
 
				+		gActualNbTests++;
			
 
				+#endif
			
 
				+
			
 
				+		
			
 
				+		b3Scalar d;
			
 
				+		if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,faceANormalWS, verticesA, verticesB,d))
			
 
				+			return false;
			
 
				+
			
 
				+		if(d<dmin)
			
 
				+		{
			
 
				+			dmin = d;
			
 
				+			sep = (b3Vector3&)faceANormalWS;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	int numFacesB = hullB.m_numFaces;
			
 
				+	// Test normals from hullB
			
 
				+	for(int i=0;i<numFacesB;i++)
			
 
				+	{
			
 
				+		b3Float4 normal = (b3Float4&)facesB[hullB.m_faceOffset+i].m_plane;
			
 
				+		b3Float4 WorldNormal = b3QuatRotate(ornB, normal);
			
 
				+
			
 
				+		if (b3Dot3F4(deltaC2,WorldNormal)<0)
			
 
				+		{
			
 
				+			WorldNormal*=-1.f;
			
 
				+		}
			
 
				+		curPlaneTests++;
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+		gExpectedNbTests++;
			
 
				+		if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin))
			
 
				+			continue;
			
 
				+		gActualNbTests++;
			
 
				+#endif
			
 
				+
			
 
				+		b3Scalar d;
			
 
				+		if(!b3TestSepAxis(hullA, hullB,posA,ornA,posB,ornB,WorldNormal,verticesA,verticesB,d))
			
 
				+			return false;
			
 
				+
			
 
				+		if(d<dmin)
			
 
				+		{
			
 
				+			dmin = d;
			
 
				+			sep = (b3Vector3&)WorldNormal;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+//	b3Vector3 edgeAstart,edgeAend,edgeBstart,edgeBend;
			
 
				+
			
 
				+	int curEdgeEdge = 0;
			
 
				+	// Test edges
			
 
				+	for(int e0=0;e0<hullA.m_numUniqueEdges;e0++)
			
 
				+	{
			
 
				+		const b3Float4& edge0 = (b3Float4&) uniqueEdgesA[hullA.m_uniqueEdgesOffset+e0];
			
 
				+		b3Float4 edge0World = b3QuatRotate(ornA,(b3Float4&)edge0);
			
 
				+
			
 
				+		for(int e1=0;e1<hullB.m_numUniqueEdges;e1++)
			
 
				+		{
			
 
				+			const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset+e1];
			
 
				+			b3Float4 edge1World = b3QuatRotate(ornB,(b3Float4&)edge1);
			
 
				+
			
 
				+
			
 
				+			b3Float4 crossje = b3Cross3(edge0World,edge1World);
			
 
				+
			
 
				+			curEdgeEdge++;
			
 
				+			if(!b3IsAlmostZero((b3Vector3&)crossje))
			
 
				+			{
			
 
				+				crossje = b3FastNormalized3(crossje);
			
 
				+				if (b3Dot3F4(deltaC2,crossje)<0)
			
 
				+					crossje*=-1.f;
			
 
				+
			
 
				+
			
 
				+#ifdef TEST_INTERNAL_OBJECTS
			
 
				+				gExpectedNbTests++;
			
 
				+				if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin))
			
 
				+					continue;
			
 
				+				gActualNbTests++;
			
 
				+#endif
			
 
				+
			
 
				+				b3Scalar dist;
			
 
				+				if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,crossje, verticesA,verticesB,dist))
			
 
				+					return false;
			
 
				+
			
 
				+				if(dist<dmin)
			
 
				+				{
			
 
				+					dmin = dist;
			
 
				+					sep = (b3Vector3&)crossje;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	if((b3Dot3F4(-deltaC2,(b3Float4&)sep))>0.0f)
			
 
				+		sep = -sep;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+#endif //B3_FIND_SEPARATING_AXIS_H
			
 
				+
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h
@@ -0,0 +1,919 @@
 
				+
			
 
				+/***
			
 
				+ * ---------------------------------
			
 
				+ * Copyright (c)2012 Daniel Fiser <[email protected]>
			
 
				+ *
			
 
				+ *  This file was ported from mpr.c file, part of libccd.
			
 
				+ *  The Minkoski Portal Refinement implementation was ported 
			
 
				+ *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.
			
 
				+ *  at http://github.com/erwincoumans/bullet3
			
 
				+ *
			
 
				+ *  Distributed under the OSI-approved BSD License (the "License");
			
 
				+ *  see <http://www.opensource.org/licenses/bsd-license.php>.
			
 
				+ *  This software is distributed WITHOUT ANY WARRANTY; without even the
			
 
				+ *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *  See the License for more information.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_MPR_PENETRATION_H
			
 
				+#define B3_MPR_PENETRATION_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+#define B3_MPR_SQRT sqrtf
			
 
				+#else
			
 
				+#define B3_MPR_SQRT sqrt
			
 
				+#endif
			
 
				+#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))
			
 
				+#define B3_MPR_FABS fabs
			
 
				+
			
 
				+#define B3_MPR_TOLERANCE 1E-6f
			
 
				+#define B3_MPR_MAX_ITERATIONS 1000
			
 
				+
			
 
				+struct _b3MprSupport_t 
			
 
				+{
			
 
				+    b3Float4 v;  //!< Support point in minkowski sum
			
 
				+    b3Float4 v1; //!< Support point in obj1
			
 
				+    b3Float4 v2; //!< Support point in obj2
			
 
				+};
			
 
				+typedef struct _b3MprSupport_t b3MprSupport_t;
			
 
				+
			
 
				+struct _b3MprSimplex_t 
			
 
				+{
			
 
				+    b3MprSupport_t ps[4];
			
 
				+    int last; //!< index of last added point
			
 
				+};
			
 
				+typedef struct _b3MprSimplex_t b3MprSimplex_t;
			
 
				+
			
 
				+inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)
			
 
				+{
			
 
				+    return &s->ps[idx];
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)
			
 
				+{
			
 
				+    s->last = size - 1;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int b3MprSimplexSize(const b3MprSimplex_t *s)
			
 
				+{
			
 
				+    return s->last + 1;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)
			
 
				+{
			
 
				+    // here is no check on boundaries
			
 
				+    return &s->ps[idx];
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)
			
 
				+{
			
 
				+    *d = *s;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)
			
 
				+{
			
 
				+    b3MprSupportCopy(s->ps + pos, a);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)
			
 
				+{
			
 
				+    b3MprSupport_t supp;
			
 
				+
			
 
				+    b3MprSupportCopy(&supp, &s->ps[pos1]);
			
 
				+    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);
			
 
				+    b3MprSupportCopy(&s->ps[pos2], &supp);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int b3MprIsZero(float val)
			
 
				+{
			
 
				+    return B3_MPR_FABS(val) < FLT_EPSILON;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline int b3MprEq(float _a, float _b)
			
 
				+{
			
 
				+    float ab;
			
 
				+    float a, b;
			
 
				+
			
 
				+    ab = B3_MPR_FABS(_a - _b);
			
 
				+    if (B3_MPR_FABS(ab) < FLT_EPSILON)
			
 
				+        return 1;
			
 
				+
			
 
				+    a = B3_MPR_FABS(_a);
			
 
				+    b = B3_MPR_FABS(_b);
			
 
				+    if (b > a){
			
 
				+        return ab < FLT_EPSILON * b;
			
 
				+    }else{
			
 
				+        return ab < FLT_EPSILON * a;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)
			
 
				+{
			
 
				+    return b3MprEq((*a).x, (*b).x)
			
 
				+            && b3MprEq((*a).y, (*b).y)
			
 
				+            && b3MprEq((*a).z, (*b).z);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, 	b3ConstArray(b3Float4) verticesA)
			
 
				+{
			
 
				+	b3Float4 supVec = b3MakeFloat4(0,0,0,0);
			
 
				+	float maxDot = -B3_LARGE_FLOAT;
			
 
				+
			
 
				+    if( 0 < hull->m_numVertices )
			
 
				+    {
			
 
				+        const b3Float4 scaled = supportVec;
			
 
				+		int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);
			
 
				+        return verticesA[hull->m_vertexOffset+index];
			
 
				+    }
			
 
				+
			
 
				+    return supVec;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, 
			
 
				+													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, 
			
 
				+													b3ConstArray(b3Collidable_t)				cpuCollidables,
			
 
				+													b3ConstArray(b3Float4)					cpuVertices,
			
 
				+													__global b3Float4* sepAxis,
			
 
				+														const b3Float4* _dir, b3Float4* outp, int logme)
			
 
				+{
			
 
				+	//dir is in worldspace, move to local space
			
 
				+	
			
 
				+	b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;
			
 
				+	b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;
			
 
				+	
			
 
				+	b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);
			
 
				+	
			
 
				+	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);
			
 
				+	
			
 
				+
			
 
				+	//find local support vertex
			
 
				+	int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;
			
 
				+	
			
 
				+	b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);
			
 
				+	__global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];
			
 
				+	
			
 
				+	b3Float4 pInA;
			
 
				+	if (logme)
			
 
				+	{
			
 
				+
			
 
				+
			
 
				+		b3Float4 supVec = b3MakeFloat4(0,0,0,0);
			
 
				+		float maxDot = -B3_LARGE_FLOAT;
			
 
				+
			
 
				+		if( 0 < hull->m_numVertices )
			
 
				+		{
			
 
				+			const b3Float4 scaled = localDir;
			
 
				+			int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);
			
 
				+			pInA = cpuVertices[hull->m_vertexOffset+index];
			
 
				+			
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+	} else
			
 
				+	{
			
 
				+		pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);
			
 
				+	}
			
 
				+
			
 
				+	//move vertex to world space
			
 
				+	*outp = b3TransformPoint(pInA,pos,orn);
			
 
				+	
			
 
				+}
			
 
				+
			
 
				+inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, 
			
 
				+													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, 
			
 
				+													b3ConstArray(b3Collidable_t)				cpuCollidables,
			
 
				+													b3ConstArray(b3Float4)					cpuVertices,
			
 
				+													__global b3Float4* sepAxis,
			
 
				+													const b3Float4* _dir, b3MprSupport_t *supp)
			
 
				+{
			
 
				+    b3Float4 dir;
			
 
				+	dir = *_dir;
			
 
				+	b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);
			
 
				+    dir = *_dir*-1.f;
			
 
				+	b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);
			
 
				+    supp->v = supp->v1 - supp->v2;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)
			
 
				+{
			
 
				+
			
 
				+    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;
			
 
				+	center->v2 = cpuBodyBuf[bodyIndexB].m_pos;
			
 
				+    center->v = center->v1 - center->v2;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)
			
 
				+{
			
 
				+	(*v).x = x;
			
 
				+	(*v).y = y;
			
 
				+	(*v).z = z;
			
 
				+	(*v).w = 0.f;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)
			
 
				+{
			
 
				+    (*v).x += (*w).x;
			
 
				+    (*v).y += (*w).y;
			
 
				+    (*v).z += (*w).z;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)
			
 
				+{
			
 
				+    *v = *w;
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Scale(b3Float4 *d, float k)
			
 
				+{
			
 
				+    *d *= k;
			
 
				+}
			
 
				+
			
 
				+inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+    float dot;
			
 
				+
			
 
				+	dot = b3Dot3F4(*a,*b);
			
 
				+    return dot;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline float b3MprVec3Len2(const b3Float4 *v)
			
 
				+{
			
 
				+    return b3MprVec3Dot(v, v);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Normalize(b3Float4 *d)
			
 
				+{
			
 
				+    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));
			
 
				+    b3MprVec3Scale(d, k);
			
 
				+}
			
 
				+
			
 
				+inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+	*d = b3Cross3(*a,*b);
			
 
				+	
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)
			
 
				+{
			
 
				+	*d = *v - *w;
			
 
				+}
			
 
				+
			
 
				+inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)
			
 
				+{
			
 
				+    b3Float4 v2v1, v3v1;
			
 
				+
			
 
				+    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    b3MprVec3Cross(dir, &v2v1, &v3v1);
			
 
				+    b3MprVec3Normalize(dir);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,
			
 
				+                                       const b3Float4 *dir)
			
 
				+{
			
 
				+    float dot;
			
 
				+    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    return b3MprIsZero(dot) || dot > 0.f;
			
 
				+}
			
 
				+
			
 
				+inline int portalReachTolerance(const b3MprSimplex_t *portal,
			
 
				+                                     const b3MprSupport_t *v4,
			
 
				+                                     const b3Float4 *dir)
			
 
				+{
			
 
				+    float dv1, dv2, dv3, dv4;
			
 
				+    float dot1, dot2, dot3;
			
 
				+
			
 
				+    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}
			
 
				+
			
 
				+    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);
			
 
				+    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);
			
 
				+    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);
			
 
				+    dv4 = b3MprVec3Dot(&v4->v, dir);
			
 
				+
			
 
				+    dot1 = dv4 - dv1;
			
 
				+    dot2 = dv4 - dv2;
			
 
				+    dot3 = dv4 - dv3;
			
 
				+
			
 
				+    dot1 = B3_MPR_FMIN(dot1, dot2);
			
 
				+    dot1 = B3_MPR_FMIN(dot1, dot3);
			
 
				+
			
 
				+    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;
			
 
				+}
			
 
				+
			
 
				+inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   
			
 
				+                                         const b3MprSupport_t *v4,
			
 
				+                                         const b3Float4 *dir)
			
 
				+{
			
 
				+    float dot;
			
 
				+    dot = b3MprVec3Dot(&v4->v, dir);
			
 
				+    return b3MprIsZero(dot) || dot > 0.f;
			
 
				+}
			
 
				+
			
 
				+inline void b3ExpandPortal(b3MprSimplex_t *portal,
			
 
				+                              const b3MprSupport_t *v4)
			
 
				+{
			
 
				+    float dot;
			
 
				+    b3Float4 v4v0;
			
 
				+
			
 
				+    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);
			
 
				+    if (dot > 0.f){
			
 
				+        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);
			
 
				+        if (dot > 0.f){
			
 
				+            b3MprSimplexSet(portal, 1, v4);
			
 
				+        }else{
			
 
				+            b3MprSimplexSet(portal, 3, v4);
			
 
				+        }
			
 
				+    }else{
			
 
				+        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);
			
 
				+        if (dot > 0.f){
			
 
				+            b3MprSimplexSet(portal, 2, v4);
			
 
				+        }else{
			
 
				+            b3MprSimplexSet(portal, 1, v4);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+static int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, 
			
 
				+													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, 
			
 
				+													b3ConstArray(b3Collidable_t)				cpuCollidables,
			
 
				+													b3ConstArray(b3Float4)					cpuVertices,
			
 
				+													__global b3Float4* sepAxis,
			
 
				+													__global int*	hasSepAxis,
			
 
				+													b3MprSimplex_t *portal)
			
 
				+{
			
 
				+    b3Float4 dir, va, vb;
			
 
				+    float dot;
			
 
				+    int cont;
			
 
				+	
			
 
				+	
			
 
				+
			
 
				+    // vertex 0 is center of portal
			
 
				+    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));
			
 
				+    // vertex 0 is center of portal
			
 
				+    b3MprSimplexSetSize(portal, 1);
			
 
				+	
			
 
				+
			
 
				+
			
 
				+	b3Float4 zero = b3MakeFloat4(0,0,0,0);
			
 
				+	b3Float4* b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){
			
 
				+        // Portal's center lies on origin (0,0,0) => we know that objects
			
 
				+        // intersect but we would need to know penetration info.
			
 
				+        // So move center little bit...
			
 
				+        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);
			
 
				+        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    // vertex 1 = support in direction of origin
			
 
				+    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+    b3MprVec3Scale(&dir, -1.f);
			
 
				+    b3MprVec3Normalize(&dir);
			
 
				+
			
 
				+
			
 
				+    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));
			
 
				+
			
 
				+    b3MprSimplexSetSize(portal, 2);
			
 
				+
			
 
				+    // test if origin isn't outside of v1
			
 
				+    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);
			
 
				+	
			
 
				+
			
 
				+    if (b3MprIsZero(dot) || dot < 0.f)
			
 
				+        return -1;
			
 
				+
			
 
				+
			
 
				+    // vertex 2
			
 
				+    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    if (b3MprIsZero(b3MprVec3Len2(&dir))){
			
 
				+        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){
			
 
				+            // origin lies on v1
			
 
				+            return 1;
			
 
				+        }else{
			
 
				+            // origin lies on v0-v1 segment
			
 
				+            return 2;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    b3MprVec3Normalize(&dir);
			
 
				+	 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));
			
 
				+    
			
 
				+    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);
			
 
				+    if (b3MprIsZero(dot) || dot < 0.f)
			
 
				+        return -1;
			
 
				+
			
 
				+    b3MprSimplexSetSize(portal, 3);
			
 
				+
			
 
				+    // vertex 3 direction
			
 
				+    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+                     &b3MprSimplexPoint(portal, 0)->v);
			
 
				+    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+                     &b3MprSimplexPoint(portal, 0)->v);
			
 
				+    b3MprVec3Cross(&dir, &va, &vb);
			
 
				+    b3MprVec3Normalize(&dir);
			
 
				+
			
 
				+    // it is better to form portal faces to be oriented "outside" origin
			
 
				+    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+    if (dot > 0.f){
			
 
				+        b3MprSimplexSwap(portal, 1, 2);
			
 
				+        b3MprVec3Scale(&dir, -1.f);
			
 
				+    }
			
 
				+
			
 
				+    while (b3MprSimplexSize(portal) < 4){
			
 
				+		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));
			
 
				+        
			
 
				+        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);
			
 
				+        if (b3MprIsZero(dot) || dot < 0.f)
			
 
				+            return -1;
			
 
				+
			
 
				+        cont = 0;
			
 
				+
			
 
				+        // test if origin is outside (v1, v0, v3) - set v2 as v3 and
			
 
				+        // continue
			
 
				+        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+                          &b3MprSimplexPoint(portal, 3)->v);
			
 
				+        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+        if (dot < 0.f && !b3MprIsZero(dot)){
			
 
				+            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));
			
 
				+            cont = 1;
			
 
				+        }
			
 
				+
			
 
				+        if (!cont){
			
 
				+            // test if origin is outside (v3, v0, v2) - set v1 as v3 and
			
 
				+            // continue
			
 
				+            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+                              &b3MprSimplexPoint(portal, 2)->v);
			
 
				+            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+            if (dot < 0.f && !b3MprIsZero(dot)){
			
 
				+                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));
			
 
				+                cont = 1;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        if (cont){
			
 
				+            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+                             &b3MprSimplexPoint(portal, 0)->v);
			
 
				+            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+                             &b3MprSimplexPoint(portal, 0)->v);
			
 
				+            b3MprVec3Cross(&dir, &va, &vb);
			
 
				+            b3MprVec3Normalize(&dir);
			
 
				+        }else{
			
 
				+            b3MprSimplexSetSize(portal, 4);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, 
			
 
				+													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, 
			
 
				+													b3ConstArray(b3Collidable_t)				cpuCollidables,
			
 
				+													b3ConstArray(b3Float4)					cpuVertices,
			
 
				+													__global b3Float4* sepAxis,
			
 
				+													b3MprSimplex_t *portal)
			
 
				+{
			
 
				+    b3Float4 dir;
			
 
				+    b3MprSupport_t v4;
			
 
				+
			
 
				+	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)
			
 
				+    //while (1)
			
 
				+	{
			
 
				+        // compute direction outside the portal (from v0 throught v1,v2,v3
			
 
				+        // face)
			
 
				+        b3PortalDir(portal, &dir);
			
 
				+
			
 
				+        // test if origin is inside the portal
			
 
				+        if (portalEncapsulesOrigin(portal, &dir))
			
 
				+            return 0;
			
 
				+
			
 
				+        // get next support point
			
 
				+        
			
 
				+		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);
			
 
				+
			
 
				+
			
 
				+        // test if v4 can expand portal to contain origin and if portal
			
 
				+        // expanding doesn't reach given tolerance
			
 
				+        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)
			
 
				+                || portalReachTolerance(portal, &v4, &dir))
			
 
				+		{
			
 
				+            return -1;
			
 
				+        }
			
 
				+
			
 
				+        // v1-v2-v3 triangle must be rearranged to face outside Minkowski
			
 
				+        // difference (direction from v0).
			
 
				+        b3ExpandPortal(portal, &v4);
			
 
				+    }
			
 
				+
			
 
				+    return -1;
			
 
				+}
			
 
				+
			
 
				+static void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)
			
 
				+{
			
 
				+
			
 
				+	b3Float4 zero = b3MakeFloat4(0,0,0,0);
			
 
				+	b3Float4* b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+    b3Float4 dir;
			
 
				+    size_t i;
			
 
				+    float b[4], sum, inv;
			
 
				+    b3Float4 vec, p1, p2;
			
 
				+
			
 
				+    b3PortalDir(portal, &dir);
			
 
				+
			
 
				+    // use barycentric coordinates of tetrahedron to find origin
			
 
				+    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 2)->v);
			
 
				+    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);
			
 
				+
			
 
				+    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 2)->v);
			
 
				+    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+
			
 
				+    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);
			
 
				+
			
 
				+    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+                       &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);
			
 
				+
			
 
				+	sum = b[0] + b[1] + b[2] + b[3];
			
 
				+
			
 
				+    if (b3MprIsZero(sum) || sum < 0.f){
			
 
				+		b[0] = 0.f;
			
 
				+
			
 
				+        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,
			
 
				+                           &b3MprSimplexPoint(portal, 3)->v);
			
 
				+        b[1] = b3MprVec3Dot(&vec, &dir);
			
 
				+        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,
			
 
				+                           &b3MprSimplexPoint(portal, 1)->v);
			
 
				+        b[2] = b3MprVec3Dot(&vec, &dir);
			
 
				+        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,
			
 
				+                           &b3MprSimplexPoint(portal, 2)->v);
			
 
				+        b[3] = b3MprVec3Dot(&vec, &dir);
			
 
				+
			
 
				+		sum = b[1] + b[2] + b[3];
			
 
				+	}
			
 
				+
			
 
				+	inv = 1.f / sum;
			
 
				+
			
 
				+    b3MprVec3Copy(&p1, b3mpr_vec3_origin);
			
 
				+    b3MprVec3Copy(&p2, b3mpr_vec3_origin);
			
 
				+    for (i = 0; i < 4; i++){
			
 
				+        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);
			
 
				+        b3MprVec3Scale(&vec, b[i]);
			
 
				+        b3MprVec3Add(&p1, &vec);
			
 
				+
			
 
				+        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);
			
 
				+        b3MprVec3Scale(&vec, b[i]);
			
 
				+        b3MprVec3Add(&p2, &vec);
			
 
				+    }
			
 
				+    b3MprVec3Scale(&p1, inv);
			
 
				+    b3MprVec3Scale(&p2, inv);
			
 
				+
			
 
				+    b3MprVec3Copy(pos, &p1);
			
 
				+    b3MprVec3Add(pos, &p2);
			
 
				+    b3MprVec3Scale(pos, 0.5);
			
 
				+}
			
 
				+
			
 
				+inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)
			
 
				+{
			
 
				+    b3Float4 ab;
			
 
				+    b3MprVec3Sub2(&ab, a, b);
			
 
				+    return b3MprVec3Len2(&ab);
			
 
				+}
			
 
				+
			
 
				+inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,
			
 
				+                                                  const b3Float4 *x0,
			
 
				+                                                  const b3Float4 *b,
			
 
				+                                                  b3Float4 *witness)
			
 
				+{
			
 
				+    // The computation comes from solving equation of segment:
			
 
				+    //      S(t) = x0 + t.d
			
 
				+    //          where - x0 is initial point of segment
			
 
				+    //                - d is direction of segment from x0 (|d| > 0)
			
 
				+    //                - t belongs to <0, 1> interval
			
 
				+    // 
			
 
				+    // Than, distance from a segment to some point P can be expressed:
			
 
				+    //      D(t) = |x0 + t.d - P|^2
			
 
				+    //          which is distance from any point on segment. Minimization
			
 
				+    //          of this function brings distance from P to segment.
			
 
				+    // Minimization of D(t) leads to simple quadratic equation that's
			
 
				+    // solving is straightforward.
			
 
				+    //
			
 
				+    // Bonus of this method is witness point for free.
			
 
				+
			
 
				+    float dist, t;
			
 
				+    b3Float4 d, a;
			
 
				+
			
 
				+    // direction of segment
			
 
				+    b3MprVec3Sub2(&d, b, x0);
			
 
				+
			
 
				+    // precompute vector from P to x0
			
 
				+    b3MprVec3Sub2(&a, x0, P);
			
 
				+
			
 
				+    t  = -1.f * b3MprVec3Dot(&a, &d);
			
 
				+    t /= b3MprVec3Len2(&d);
			
 
				+
			
 
				+    if (t < 0.f || b3MprIsZero(t)){
			
 
				+        dist = b3MprVec3Dist2(x0, P);
			
 
				+        if (witness)
			
 
				+            b3MprVec3Copy(witness, x0);
			
 
				+    }else if (t > 1.f || b3MprEq(t, 1.f)){
			
 
				+        dist = b3MprVec3Dist2(b, P);
			
 
				+        if (witness)
			
 
				+            b3MprVec3Copy(witness, b);
			
 
				+    }else{
			
 
				+        if (witness){
			
 
				+            b3MprVec3Copy(witness, &d);
			
 
				+            b3MprVec3Scale(witness, t);
			
 
				+            b3MprVec3Add(witness, x0);
			
 
				+            dist = b3MprVec3Dist2(witness, P);
			
 
				+        }else{
			
 
				+            // recycling variables
			
 
				+            b3MprVec3Scale(&d, t);
			
 
				+            b3MprVec3Add(&d, &a);
			
 
				+            dist = b3MprVec3Len2(&d);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return dist;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline float b3MprVec3PointTriDist2(const b3Float4 *P,
			
 
				+                                const b3Float4 *x0, const b3Float4 *B,
			
 
				+                                const b3Float4 *C,
			
 
				+                                b3Float4 *witness)
			
 
				+{
			
 
				+    // Computation comes from analytic expression for triangle (x0, B, C)
			
 
				+    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and
			
 
				+    // Then equation for distance is:
			
 
				+    //      D(s, t) = | T(s, t) - P |^2
			
 
				+    // This leads to minimization of quadratic function of two variables.
			
 
				+    // The solution from is taken only if s is between 0 and 1, t is
			
 
				+    // between 0 and 1 and t + s < 1, otherwise distance from segment is
			
 
				+    // computed.
			
 
				+
			
 
				+    b3Float4 d1, d2, a;
			
 
				+    float u, v, w, p, q, r;
			
 
				+    float s, t, dist, dist2;
			
 
				+    b3Float4 witness2;
			
 
				+
			
 
				+    b3MprVec3Sub2(&d1, B, x0);
			
 
				+    b3MprVec3Sub2(&d2, C, x0);
			
 
				+    b3MprVec3Sub2(&a, x0, P);
			
 
				+
			
 
				+    u = b3MprVec3Dot(&a, &a);
			
 
				+    v = b3MprVec3Dot(&d1, &d1);
			
 
				+    w = b3MprVec3Dot(&d2, &d2);
			
 
				+    p = b3MprVec3Dot(&a, &d1);
			
 
				+    q = b3MprVec3Dot(&a, &d2);
			
 
				+    r = b3MprVec3Dot(&d1, &d2);
			
 
				+
			
 
				+    s = (q * r - w * p) / (w * v - r * r);
			
 
				+    t = (-s * r - q) / w;
			
 
				+
			
 
				+    if ((b3MprIsZero(s) || s > 0.f)
			
 
				+            && (b3MprEq(s, 1.f) || s < 1.f)
			
 
				+            && (b3MprIsZero(t) || t > 0.f)
			
 
				+            && (b3MprEq(t, 1.f) || t < 1.f)
			
 
				+            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){
			
 
				+
			
 
				+        if (witness){
			
 
				+            b3MprVec3Scale(&d1, s);
			
 
				+            b3MprVec3Scale(&d2, t);
			
 
				+            b3MprVec3Copy(witness, x0);
			
 
				+            b3MprVec3Add(witness, &d1);
			
 
				+            b3MprVec3Add(witness, &d2);
			
 
				+
			
 
				+            dist = b3MprVec3Dist2(witness, P);
			
 
				+        }else{
			
 
				+            dist  = s * s * v;
			
 
				+            dist += t * t * w;
			
 
				+            dist += 2.f * s * t * r;
			
 
				+            dist += 2.f * s * p;
			
 
				+            dist += 2.f * t * q;
			
 
				+            dist += u;
			
 
				+        }
			
 
				+    }else{
			
 
				+        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);
			
 
				+
			
 
				+        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);
			
 
				+        if (dist2 < dist){
			
 
				+            dist = dist2;
			
 
				+            if (witness)
			
 
				+                b3MprVec3Copy(witness, &witness2);
			
 
				+        }
			
 
				+
			
 
				+        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);
			
 
				+        if (dist2 < dist){
			
 
				+            dist = dist2;
			
 
				+            if (witness)
			
 
				+                b3MprVec3Copy(witness, &witness2);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return dist;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, 
			
 
				+													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, 
			
 
				+													b3ConstArray(b3Collidable_t)				cpuCollidables,
			
 
				+													b3ConstArray(b3Float4)					cpuVertices,
			
 
				+													__global b3Float4* sepAxis,
			
 
				+                       b3MprSimplex_t *portal,
			
 
				+                       float *depth, b3Float4 *pdir, b3Float4 *pos)
			
 
				+{
			
 
				+    b3Float4 dir;
			
 
				+    b3MprSupport_t v4;
			
 
				+    unsigned long iterations;
			
 
				+
			
 
				+	b3Float4 zero = b3MakeFloat4(0,0,0,0);
			
 
				+	b3Float4* b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+
			
 
				+    iterations = 1UL;
			
 
				+	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)
			
 
				+    //while (1)
			
 
				+	{
			
 
				+        // compute portal direction and obtain next support point
			
 
				+        b3PortalDir(portal, &dir);
			
 
				+        
			
 
				+		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);
			
 
				+
			
 
				+
			
 
				+        // reached tolerance -> find penetration info
			
 
				+        if (portalReachTolerance(portal, &v4, &dir)
			
 
				+                || iterations ==B3_MPR_MAX_ITERATIONS)
			
 
				+		{
			
 
				+            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);
			
 
				+            *depth = B3_MPR_SQRT(*depth);
			
 
				+			
			
 
				+			if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))
			
 
				+			{
			
 
				+				
			
 
				+				*pdir = dir;
			
 
				+			} 
			
 
				+			b3MprVec3Normalize(pdir);
			
 
				+			
			
 
				+            // barycentric coordinates:
			
 
				+            b3FindPos(portal, pos);
			
 
				+
			
 
				+
			
 
				+            return;
			
 
				+        }
			
 
				+
			
 
				+        b3ExpandPortal(portal, &v4);
			
 
				+
			
 
				+        iterations++;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)
			
 
				+{
			
 
				+    // Touching contact on portal's v1 - so depth is zero and direction
			
 
				+    // is unimportant and pos can be guessed
			
 
				+    *depth = 0.f;
			
 
				+    b3Float4 zero = b3MakeFloat4(0,0,0,0);
			
 
				+	b3Float4* b3mpr_vec3_origin = &zero;
			
 
				+
			
 
				+
			
 
				+	b3MprVec3Copy(dir, b3mpr_vec3_origin);
			
 
				+
			
 
				+    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);
			
 
				+    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);
			
 
				+    b3MprVec3Scale(pos, 0.5);
			
 
				+}
			
 
				+
			
 
				+static void b3FindPenetrSegment(b3MprSimplex_t *portal,
			
 
				+                              float *depth, b3Float4 *dir, b3Float4 *pos)
			
 
				+{
			
 
				+    
			
 
				+    // Origin lies on v0-v1 segment.
			
 
				+    // Depth is distance to v1, direction also and position must be
			
 
				+    // computed
			
 
				+
			
 
				+    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);
			
 
				+    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);
			
 
				+    b3MprVec3Scale(pos, 0.5f);
			
 
				+
			
 
				+    
			
 
				+    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);
			
 
				+    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));
			
 
				+    b3MprVec3Normalize(dir);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,
			
 
				+					b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
			
 
				+					b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, 
			
 
				+					b3ConstArray(b3Collidable_t)	cpuCollidables,
			
 
				+					b3ConstArray(b3Float4)	cpuVertices,
			
 
				+					__global b3Float4* sepAxis,
			
 
				+					__global int*	hasSepAxis,
			
 
				+					float *depthOut, b3Float4* dirOut, b3Float4* posOut)
			
 
				+{
			
 
				+	
			
 
				+	 b3MprSimplex_t portal;
			
 
				+
			
 
				+	 
			
 
				+//	if (!hasSepAxis[pairIndex])
			
 
				+	//	return -1;
			
 
				+	
			
 
				+	hasSepAxis[pairIndex] = 0;
			
 
				+	 int res;
			
 
				+
			
 
				+    // Phase 1: Portal discovery
			
 
				+    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);
			
 
				+	
			
 
				+	  
			
 
				+	//sepAxis[pairIndex] = *pdir;//or -dir?
			
 
				+
			
 
				+	switch (res)
			
 
				+	{
			
 
				+	case 0:
			
 
				+		{
			
 
				+			// Phase 2: Portal refinement
			
 
				+		
			
 
				+			res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);
			
 
				+			if (res < 0)
			
 
				+				return -1;
			
 
				+
			
 
				+			// Phase 3. Penetration info
			
 
				+			b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);
			
 
				+			hasSepAxis[pairIndex] = 1;
			
 
				+			sepAxis[pairIndex] = -*dirOut;
			
 
				+			break;
			
 
				+		}
			
 
				+	case 1:
			
 
				+		{
			
 
				+			 // Touching contact on portal's v1.
			
 
				+			b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);
			
 
				+			break;
			
 
				+		}
			
 
				+	case 2:
			
 
				+		{
			
 
				+			
			
 
				+			b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);
			
 
				+			break;
			
 
				+		}
			
 
				+	default:
			
 
				+		{
			
 
				+			hasSepAxis[pairIndex]=0;
			
 
				+			//if (res < 0)
			
 
				+			//{
			
 
				+				// Origin isn't inside portal - no collision.
			
 
				+				return -1;
			
 
				+			//}
			
 
				+		}
			
 
				+	};
			
 
				+	
			
 
				+	return 0;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_MPR_PENETRATION_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h
@@ -0,0 +1,196 @@
 
				+
			
 
				+#ifndef B3_NEW_CONTACT_REDUCTION_H
			
 
				+#define B3_NEW_CONTACT_REDUCTION_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+
			
 
				+#define GET_NPOINTS(x) (x).m_worldNormalOnB.w
			
 
				+
			
 
				+
			
 
				+int b3ExtractManifoldSequentialGlobal(__global const b3Float4* p, int nPoints, b3Float4ConstArg nearNormal, b3Int4* contactIdx)
			
 
				+{
			
 
				+	if( nPoints == 0 )
			
 
				+        return 0;
			
 
				+    
			
 
				+    if (nPoints <=4)
			
 
				+        return nPoints;
			
 
				+    
			
 
				+    
			
 
				+    if (nPoints >64)
			
 
				+        nPoints = 64;
			
 
				+    
			
 
				+	b3Float4 center = b3MakeFloat4(0,0,0,0);
			
 
				+	{
			
 
				+		
			
 
				+		for (int i=0;i<nPoints;i++)
			
 
				+			center += p[i];
			
 
				+		center /= (float)nPoints;
			
 
				+	}
			
 
				+    
			
 
				+	
			
 
				+    
			
 
				+	//	sample 4 directions
			
 
				+    
			
 
				+    b3Float4 aVector = p[0] - center;
			
 
				+    b3Float4 u = b3Cross( nearNormal, aVector );
			
 
				+    b3Float4 v = b3Cross( nearNormal, u );
			
 
				+    u = b3Normalized( u );
			
 
				+    v = b3Normalized( v );
			
 
				+    
			
 
				+    
			
 
				+    //keep point with deepest penetration
			
 
				+    float minW= FLT_MAX;
			
 
				+    
			
 
				+    int minIndex=-1;
			
 
				+    
			
 
				+    b3Float4 maxDots;
			
 
				+    maxDots.x = FLT_MIN;
			
 
				+    maxDots.y = FLT_MIN;
			
 
				+    maxDots.z = FLT_MIN;
			
 
				+    maxDots.w = FLT_MIN;
			
 
				+    
			
 
				+    //	idx, distance
			
 
				+    for(int ie = 0; ie<nPoints; ie++ )
			
 
				+    {
			
 
				+        if (p[ie].w<minW)
			
 
				+        {
			
 
				+            minW = p[ie].w;
			
 
				+            minIndex=ie;
			
 
				+        }
			
 
				+        float f;
			
 
				+        b3Float4 r = p[ie]-center;
			
 
				+        f = b3Dot( u, r );
			
 
				+        if (f<maxDots.x)
			
 
				+        {
			
 
				+            maxDots.x = f;
			
 
				+            contactIdx[0].x = ie;
			
 
				+        }
			
 
				+        
			
 
				+        f = b3Dot( -u, r );
			
 
				+        if (f<maxDots.y)
			
 
				+        {
			
 
				+            maxDots.y = f;
			
 
				+            contactIdx[0].y = ie;
			
 
				+        }
			
 
				+        
			
 
				+        
			
 
				+        f = b3Dot( v, r );
			
 
				+        if (f<maxDots.z)
			
 
				+        {
			
 
				+            maxDots.z = f;
			
 
				+            contactIdx[0].z = ie;
			
 
				+        }
			
 
				+        
			
 
				+        f = b3Dot( -v, r );
			
 
				+        if (f<maxDots.w)
			
 
				+        {
			
 
				+            maxDots.w = f;
			
 
				+            contactIdx[0].w = ie;
			
 
				+        }
			
 
				+        
			
 
				+    }
			
 
				+    
			
 
				+    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
			
 
				+    {
			
 
				+        //replace the first contact with minimum (todo: replace contact with least penetration)
			
 
				+        contactIdx[0].x = minIndex;
			
 
				+    }
			
 
				+    
			
 
				+    return 4;
			
 
				+    
			
 
				+}
			
 
				+
			
 
				+__kernel void   b3NewContactReductionKernel( __global b3Int4* pairs,
			
 
				+                                                   __global const b3RigidBodyData_t* rigidBodies,
			
 
				+                                                   __global const b3Float4* separatingNormals,
			
 
				+                                                   __global const int* hasSeparatingAxis,
			
 
				+                                                   __global struct b3Contact4Data* globalContactsOut,
			
 
				+                                                   __global b3Int4* clippingFaces,
			
 
				+                                                   __global b3Float4* worldVertsB2,
			
 
				+                                                   volatile __global int* nGlobalContactsOut,
			
 
				+                                                   int vertexFaceCapacity,
			
 
				+												   int contactCapacity,
			
 
				+                                                   int numPairs,
			
 
				+												   int pairIndex
			
 
				+                                                   )
			
 
				+{
			
 
				+//    int i = get_global_id(0);
			
 
				+	//int pairIndex = i;
			
 
				+	int i = pairIndex;
			
 
				+
			
 
				+    b3Int4 contactIdx;
			
 
				+    contactIdx=b3MakeInt4(0,1,2,3);
			
 
				+    
			
 
				+	if (i<numPairs)
			
 
				+	{
			
 
				+        
			
 
				+		if (hasSeparatingAxis[i])
			
 
				+		{
			
 
				+            
			
 
				+			
			
 
				+            
			
 
				+            
			
 
				+			int nPoints = clippingFaces[pairIndex].w;
			
 
				+           
			
 
				+            if (nPoints>0)
			
 
				+            {
			
 
				+
			
 
				+                 __global b3Float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];
			
 
				+                b3Float4 normal = -separatingNormals[i];
			
 
				+                
			
 
				+                int nReducedContacts = b3ExtractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
			
 
				+            
			
 
				+                int dstIdx;
			
 
				+                dstIdx = b3AtomicInc( nGlobalContactsOut);
			
 
				+				
			
 
				+//#if 0
			
 
				+                b3Assert(dstIdx < contactCapacity);
			
 
				+				if (dstIdx < contactCapacity)
			
 
				+				{
			
 
				+
			
 
				+					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
			
 
				+					c->m_worldNormalOnB = -normal;
			
 
				+					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
			
 
				+					c->m_batchIdx = pairIndex;
			
 
				+					int bodyA = pairs[pairIndex].x;
			
 
				+					int bodyB = pairs[pairIndex].y;
			
 
				+
			
 
				+					pairs[pairIndex].w = dstIdx;
			
 
				+
			
 
				+					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
			
 
				+					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
			
 
				+                    c->m_childIndexA =-1;
			
 
				+					c->m_childIndexB =-1;
			
 
				+
			
 
				+                    switch (nReducedContacts)
			
 
				+                    {
			
 
				+                        case 4:
			
 
				+                            c->m_worldPosB[3] = pointsIn[contactIdx.w];
			
 
				+                        case 3:
			
 
				+                            c->m_worldPosB[2] = pointsIn[contactIdx.z];
			
 
				+                        case 2:
			
 
				+                            c->m_worldPosB[1] = pointsIn[contactIdx.y];
			
 
				+                        case 1:
			
 
				+                            c->m_worldPosB[0] = pointsIn[contactIdx.x];
			
 
				+                        default:
			
 
				+                        {
			
 
				+                        }
			
 
				+                    };
			
 
				+                    
			
 
				+					GET_NPOINTS(*c) = nReducedContacts;
			
 
				+                    
			
 
				+                 }
			
 
				+                 
			
 
				+                
			
 
				+//#endif
			
 
				+				
			
 
				+			}//		if (numContactsOut>0)
			
 
				+		}//		if (hasSeparatingAxis[i])
			
 
				+	}//	if (i<numPairs)
			
 
				+
			
 
				+    
			
 
				+    
			
 
				+}
			
 
				+#endif
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h
@@ -0,0 +1,90 @@
 
				+
			
 
				+
			
 
				+#ifndef B3_QUANTIZED_BVH_NODE_H
			
 
				+#define B3_QUANTIZED_BVH_NODE_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+#define B3_MAX_NUM_PARTS_IN_BITS 10
			
 
				+
			
 
				+///b3QuantizedBvhNodeData is a compressed aabb node, 16 bytes.
			
 
				+///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
			
 
				+typedef struct b3QuantizedBvhNodeData b3QuantizedBvhNodeData_t;
			
 
				+
			
 
				+struct b3QuantizedBvhNodeData
			
 
				+{
			
 
				+	//12 bytes
			
 
				+	unsigned short int	m_quantizedAabbMin[3];
			
 
				+	unsigned short int	m_quantizedAabbMax[3];
			
 
				+	//4 bytes
			
 
				+	int	m_escapeIndexOrTriangleIndex;
			
 
				+};
			
 
				+
			
 
				+inline int	b3GetTriangleIndex(const b3QuantizedBvhNodeData* rootNode)
			
 
				+{
			
 
				+	unsigned int x=0;
			
 
				+	unsigned int y = (~(x&0))<<(31-B3_MAX_NUM_PARTS_IN_BITS);
			
 
				+	// Get only the lower bits where the triangle index is stored
			
 
				+	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
			
 
				+}
			
 
				+
			
 
				+inline int b3IsLeaf(const b3QuantizedBvhNodeData* rootNode)
			
 
				+{
			
 
				+	//skipindex is negative (internal node), triangleindex >=0 (leafnode)
			
 
				+	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
			
 
				+}
			
 
				+	
			
 
				+inline int b3GetEscapeIndex(const b3QuantizedBvhNodeData* rootNode)
			
 
				+{
			
 
				+	return -rootNode->m_escapeIndexOrTriangleIndex;
			
 
				+}
			
 
				+
			
 
				+inline void b3QuantizeWithClamp(unsigned short* out, b3Float4ConstArg point2,int isMax, b3Float4ConstArg bvhAabbMin, b3Float4ConstArg bvhAabbMax, b3Float4ConstArg bvhQuantization)
			
 
				+{
			
 
				+	b3Float4 clampedPoint = b3MaxFloat4(point2,bvhAabbMin);
			
 
				+	clampedPoint = b3MinFloat4 (clampedPoint, bvhAabbMax);
			
 
				+
			
 
				+	b3Float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;
			
 
				+	if (isMax)
			
 
				+	{
			
 
				+		out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));
			
 
				+		out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));
			
 
				+		out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));
			
 
				+	} else
			
 
				+	{
			
 
				+		out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));
			
 
				+		out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));
			
 
				+		out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int b3TestQuantizedAabbAgainstQuantizedAabbSlow(
			
 
				+								const unsigned short int* aabbMin1,
			
 
				+								const unsigned short int* aabbMax1,
			
 
				+								const unsigned short int* aabbMin2,
			
 
				+								const unsigned short int* aabbMax2)
			
 
				+{
			
 
				+	//int overlap = 1;
			
 
				+	if (aabbMin1[0] > aabbMax2[0])
			
 
				+		return 0;
			
 
				+	if (aabbMax1[0] < aabbMin2[0])
			
 
				+		return 0;
			
 
				+	if (aabbMin1[1] > aabbMax2[1])
			
 
				+		return 0;
			
 
				+	if (aabbMax1[1] < aabbMin2[1])
			
 
				+		return 0;
			
 
				+	if (aabbMin1[2] > aabbMax2[2])
			
 
				+		return 0;
			
 
				+	if (aabbMax1[2] < aabbMin2[2])
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+	//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;
			
 
				+	//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;
			
 
				+	//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;
			
 
				+	//return overlap;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif //B3_QUANTIZED_BVH_NODE_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h
@@ -0,0 +1,97 @@
 
				+#ifndef B3_REDUCE_CONTACTS_H
			
 
				+#define B3_REDUCE_CONTACTS_H
			
 
				+
			
 
				+inline int b3ReduceContacts(const b3Float4* p, int nPoints, const b3Float4& nearNormal, b3Int4* contactIdx)
			
 
				+{
			
 
				+	if( nPoints == 0 )
			
 
				+        return 0;
			
 
				+    
			
 
				+    if (nPoints <=4)
			
 
				+        return nPoints;
			
 
				+    
			
 
				+    
			
 
				+    if (nPoints >64)
			
 
				+        nPoints = 64;
			
 
				+    
			
 
				+	b3Float4 center = b3MakeFloat4(0,0,0,0);
			
 
				+	{
			
 
				+		
			
 
				+		for (int i=0;i<nPoints;i++)
			
 
				+			center += p[i];
			
 
				+		center /= (float)nPoints;
			
 
				+	}
			
 
				+    
			
 
				+	
			
 
				+    
			
 
				+	//	sample 4 directions
			
 
				+    
			
 
				+    b3Float4 aVector = p[0] - center;
			
 
				+    b3Float4 u = b3Cross3( nearNormal, aVector );
			
 
				+    b3Float4 v = b3Cross3( nearNormal, u );
			
 
				+    u = b3FastNormalized3( u );
			
 
				+    v = b3FastNormalized3( v );
			
 
				+    
			
 
				+    
			
 
				+    //keep point with deepest penetration
			
 
				+    float minW= FLT_MAX;
			
 
				+    
			
 
				+    int minIndex=-1;
			
 
				+    
			
 
				+    b3Float4 maxDots;
			
 
				+    maxDots.x = FLT_MIN;
			
 
				+    maxDots.y = FLT_MIN;
			
 
				+    maxDots.z = FLT_MIN;
			
 
				+    maxDots.w = FLT_MIN;
			
 
				+    
			
 
				+    //	idx, distance
			
 
				+    for(int ie = 0; ie<nPoints; ie++ )
			
 
				+    {
			
 
				+        if (p[ie].w<minW)
			
 
				+        {
			
 
				+            minW = p[ie].w;
			
 
				+            minIndex=ie;
			
 
				+        }
			
 
				+        float f;
			
 
				+        b3Float4 r = p[ie]-center;
			
 
				+        f = b3Dot3F4( u, r );
			
 
				+        if (f<maxDots.x)
			
 
				+        {
			
 
				+            maxDots.x = f;
			
 
				+            contactIdx[0].x = ie;
			
 
				+        }
			
 
				+        
			
 
				+        f = b3Dot3F4( -u, r );
			
 
				+        if (f<maxDots.y)
			
 
				+        {
			
 
				+            maxDots.y = f;
			
 
				+            contactIdx[0].y = ie;
			
 
				+        }
			
 
				+        
			
 
				+        
			
 
				+        f = b3Dot3F4( v, r );
			
 
				+        if (f<maxDots.z)
			
 
				+        {
			
 
				+            maxDots.z = f;
			
 
				+            contactIdx[0].z = ie;
			
 
				+        }
			
 
				+        
			
 
				+        f = b3Dot3F4( -v, r );
			
 
				+        if (f<maxDots.w)
			
 
				+        {
			
 
				+            maxDots.w = f;
			
 
				+            contactIdx[0].w = ie;
			
 
				+        }
			
 
				+        
			
 
				+    }
			
 
				+    
			
 
				+    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
			
 
				+    {
			
 
				+        //replace the first contact with minimum (todo: replace contact with least penetration)
			
 
				+        contactIdx[0].x = minIndex;
			
 
				+    }
			
 
				+    
			
 
				+    return 4;
			
 
				+    
			
 
				+}
			
 
				+
			
 
				+#endif //B3_REDUCE_CONTACTS_H
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h
@@ -0,0 +1,34 @@
 
				+#ifndef B3_RIGIDBODY_DATA_H
			
 
				+#define B3_RIGIDBODY_DATA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+#include "Bullet3Common/shared/b3Mat3x3.h"
			
 
				+
			
 
				+typedef struct b3RigidBodyData b3RigidBodyData_t;
			
 
				+
			
 
				+
			
 
				+struct b3RigidBodyData
			
 
				+{
			
 
				+	b3Float4				m_pos;
			
 
				+	b3Quat					m_quat;
			
 
				+	b3Float4				m_linVel;
			
 
				+	b3Float4				m_angVel;
			
 
				+
			
 
				+	int 					m_collidableIdx;
			
 
				+	float 				m_invMass;
			
 
				+	float 				m_restituitionCoeff;
			
 
				+	float 				m_frictionCoeff;
			
 
				+};
			
 
				+
			
 
				+typedef struct b3InertiaData b3InertiaData_t;
			
 
				+
			
 
				+struct b3InertiaData
			
 
				+{
			
 
				+	b3Mat3x3 m_invInertiaWorld;
			
 
				+	b3Mat3x3 m_initInvInertia;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_RIGIDBODY_DATA_H
			
 
				+	
			
--- a/include/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h
+++ b/include/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h
@@ -0,0 +1,40 @@
 
				+#ifndef B3_UPDATE_AABBS_H
			
 
				+#define B3_UPDATE_AABBS_H
			
 
				+
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+void b3ComputeWorldAabb(  int bodyId, __global const b3RigidBodyData_t* bodies, __global const  b3Collidable_t* collidables, __global const  b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)
			
 
				+{
			
 
				+	__global const b3RigidBodyData_t* body = &bodies[bodyId];
			
 
				+
			
 
				+	b3Float4 position = body->m_pos;
			
 
				+	b3Quat	orientation = body->m_quat;
			
 
				+	
			
 
				+	int collidableIndex = body->m_collidableIdx;
			
 
				+	int shapeIndex = collidables[collidableIndex].m_shapeIndex;
			
 
				+		
			
 
				+	if (shapeIndex>=0)
			
 
				+	{
			
 
				+				
			
 
				+		b3Aabb_t localAabb = localShapeAABB[collidableIndex];
			
 
				+		b3Aabb_t worldAabb;
			
 
				+		
			
 
				+		b3Float4 aabbAMinOut,aabbAMaxOut;	
			
 
				+		float margin = 0.f;
			
 
				+		b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);
			
 
				+		
			
 
				+		worldAabb.m_minVec =aabbAMinOut;
			
 
				+		worldAabb.m_minIndices[3] = bodyId;
			
 
				+		worldAabb.m_maxVec = aabbAMaxOut;
			
 
				+		worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;
			
 
				+		worldAabbs[bodyId] = worldAabb;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif //B3_UPDATE_AABBS_H
			
--- a/include/Bullet3Common/b3AlignedAllocator.h
+++ b/include/Bullet3Common/b3AlignedAllocator.h
@@ -0,0 +1,107 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_ALIGNED_ALLOCATOR
			
 
				+#define B3_ALIGNED_ALLOCATOR
			
 
				+
			
 
				+///we probably replace this with our own aligned memory allocator
			
 
				+///so we replace _aligned_malloc and _aligned_free with our own
			
 
				+///that is better portable and more predictable
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+//#define B3_DEBUG_MEMORY_ALLOCATIONS 1
			
 
				+#ifdef B3_DEBUG_MEMORY_ALLOCATIONS
			
 
				+
			
 
				+#define b3AlignedAlloc(a,b) \
			
 
				+		b3AlignedAllocInternal(a,b,__LINE__,__FILE__)
			
 
				+
			
 
				+#define b3AlignedFree(ptr) \
			
 
				+		b3AlignedFreeInternal(ptr,__LINE__,__FILE__)
			
 
				+
			
 
				+void*	b3AlignedAllocInternal	(size_t size, int alignment,int line,char* filename);
			
 
				+
			
 
				+void	b3AlignedFreeInternal	(void* ptr,int line,char* filename);
			
 
				+
			
 
				+#else
			
 
				+	void*	b3AlignedAllocInternal	(size_t size, int alignment);
			
 
				+	void	b3AlignedFreeInternal	(void* ptr);
			
 
				+
			
 
				+	#define b3AlignedAlloc(size,alignment) b3AlignedAllocInternal(size,alignment)
			
 
				+	#define b3AlignedFree(ptr) b3AlignedFreeInternal(ptr)
			
 
				+
			
 
				+#endif
			
 
				+typedef int	btSizeType;
			
 
				+
			
 
				+typedef void *(b3AlignedAllocFunc)(size_t size, int alignment);
			
 
				+typedef void (b3AlignedFreeFunc)(void *memblock);
			
 
				+typedef void *(b3AllocFunc)(size_t size);
			
 
				+typedef void (b3FreeFunc)(void *memblock);
			
 
				+
			
 
				+///The developer can let all Bullet memory allocations go through a custom memory allocator, using b3AlignedAllocSetCustom
			
 
				+void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc);
			
 
				+///If the developer has already an custom aligned allocator, then b3AlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it.
			
 
				+void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc);
			
 
				+
			
 
				+
			
 
				+///The b3AlignedAllocator is a portable class for aligned memory allocations.
			
 
				+///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using b3AlignedAllocSetCustom and b3AlignedAllocSetCustomAligned.
			
 
				+template < typename T , unsigned Alignment >
			
 
				+class b3AlignedAllocator {
			
 
				+	
			
 
				+	typedef b3AlignedAllocator< T , Alignment > self_type;
			
 
				+	
			
 
				+public:
			
 
				+
			
 
				+	//just going down a list:
			
 
				+	b3AlignedAllocator() {}
			
 
				+	/*
			
 
				+	b3AlignedAllocator( const self_type & ) {}
			
 
				+	*/
			
 
				+
			
 
				+	template < typename Other >
			
 
				+	b3AlignedAllocator( const b3AlignedAllocator< Other , Alignment > & ) {}
			
 
				+
			
 
				+	typedef const T*         const_pointer;
			
 
				+	typedef const T&         const_reference;
			
 
				+	typedef T*               pointer;
			
 
				+	typedef T&               reference;
			
 
				+	typedef T                value_type;
			
 
				+
			
 
				+	pointer       address   ( reference        ref ) const                           { return &ref; }
			
 
				+	const_pointer address   ( const_reference  ref ) const                           { return &ref; }
			
 
				+	pointer       allocate  ( btSizeType        n   , const_pointer *      hint = 0 ) {
			
 
				+		(void)hint;
			
 
				+		return reinterpret_cast< pointer >(b3AlignedAlloc( sizeof(value_type) * n , Alignment ));
			
 
				+	}
			
 
				+	void          construct ( pointer          ptr , const value_type &   value    ) { new (ptr) value_type( value ); }
			
 
				+	void          deallocate( pointer          ptr ) {
			
 
				+		b3AlignedFree( reinterpret_cast< void * >( ptr ) );
			
 
				+	}
			
 
				+	void          destroy   ( pointer          ptr )                                 { ptr->~value_type(); }
			
 
				+	
			
 
				+
			
 
				+	template < typename O > struct rebind {
			
 
				+		typedef b3AlignedAllocator< O , Alignment > other;
			
 
				+	};
			
 
				+	template < typename O >
			
 
				+	self_type & operator=( const b3AlignedAllocator< O , Alignment > & ) { return *this; }
			
 
				+
			
 
				+	friend bool operator==( const self_type & , const self_type & ) { return true; }
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_ALIGNED_ALLOCATOR
			
 
				+
			
--- a/include/Bullet3Common/b3AlignedObjectArray.h
+++ b/include/Bullet3Common/b3AlignedObjectArray.h
@@ -0,0 +1,517 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_OBJECT_ARRAY__
			
 
				+#define B3_OBJECT_ARRAY__
			
 
				+
			
 
				+#include "b3Scalar.h" // has definitions like B3_FORCE_INLINE
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+///If the platform doesn't support placement new, you can disable B3_USE_PLACEMENT_NEW
			
 
				+///then the b3AlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors
			
 
				+///You can enable B3_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator=
			
 
				+///see discussion here: http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1231 and
			
 
				+///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240
			
 
				+
			
 
				+#define B3_USE_PLACEMENT_NEW 1
			
 
				+//#define B3_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise...
			
 
				+#define B3_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful
			
 
				+
			
 
				+#ifdef B3_USE_MEMCPY
			
 
				+#include <memory.h>
			
 
				+#include <string.h>
			
 
				+#endif //B3_USE_MEMCPY
			
 
				+
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+#include <new> //for placement new
			
 
				+#endif //B3_USE_PLACEMENT_NEW
			
 
				+
			
 
				+
			
 
				+///The b3AlignedObjectArray template class uses a subset of the stl::vector interface for its methods
			
 
				+///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data
			
 
				+template <typename T> 
			
 
				+//template <class T> 
			
 
				+class b3AlignedObjectArray
			
 
				+{
			
 
				+	b3AlignedAllocator<T , 16>	m_allocator;
			
 
				+
			
 
				+	int					m_size;
			
 
				+	int					m_capacity;
			
 
				+	T*					m_data;
			
 
				+	//PCK: added this line
			
 
				+	bool				m_ownsMemory;
			
 
				+
			
 
				+#ifdef B3_ALLOW_ARRAY_COPY_OPERATOR
			
 
				+public:
			
 
				+	B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T> &other)
			
 
				+	{
			
 
				+		copyFromArray(other);
			
 
				+		return *this;
			
 
				+	}
			
 
				+#else//B3_ALLOW_ARRAY_COPY_OPERATOR
			
 
				+private:
			
 
				+		B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T> &other);
			
 
				+#endif//B3_ALLOW_ARRAY_COPY_OPERATOR
			
 
				+
			
 
				+protected:
			
 
				+		B3_FORCE_INLINE	int	allocSize(int size)
			
 
				+		{
			
 
				+			return (size ? size*2 : 1);
			
 
				+		}
			
 
				+		B3_FORCE_INLINE	void	copy(int start,int end, T* dest) const
			
 
				+		{
			
 
				+			int i;
			
 
				+			for (i=start;i<end;++i)
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+				new (&dest[i]) T(m_data[i]);
			
 
				+#else
			
 
				+				dest[i] = m_data[i];
			
 
				+#endif //B3_USE_PLACEMENT_NEW
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE	void	init()
			
 
				+		{
			
 
				+			//PCK: added this line
			
 
				+			m_ownsMemory = true;
			
 
				+			m_data = 0;
			
 
				+			m_size = 0;
			
 
				+			m_capacity = 0;
			
 
				+		}
			
 
				+		B3_FORCE_INLINE	void	destroy(int first,int last)
			
 
				+		{
			
 
				+			int i;
			
 
				+			for (i=first; i<last;i++)
			
 
				+			{
			
 
				+				m_data[i].~T();
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE	void* allocate(int size)
			
 
				+		{
			
 
				+			if (size)
			
 
				+				return m_allocator.allocate(size);
			
 
				+			return 0;
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE	void	deallocate()
			
 
				+		{
			
 
				+			if(m_data)	{
			
 
				+				//PCK: enclosed the deallocation in this block
			
 
				+				if (m_ownsMemory)
			
 
				+				{
			
 
				+					m_allocator.deallocate(m_data);
			
 
				+				}
			
 
				+				m_data = 0;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+	public:
			
 
				+		
			
 
				+		b3AlignedObjectArray()
			
 
				+		{
			
 
				+			init();
			
 
				+		}
			
 
				+
			
 
				+		~b3AlignedObjectArray()
			
 
				+		{
			
 
				+			clear();
			
 
				+		}
			
 
				+
			
 
				+		///Generally it is best to avoid using the copy constructor of an b3AlignedObjectArray, and use a (const) reference to the array instead.
			
 
				+		b3AlignedObjectArray(const b3AlignedObjectArray& otherArray)
			
 
				+		{
			
 
				+			init();
			
 
				+
			
 
				+			int otherSize = otherArray.size();
			
 
				+			resize (otherSize);
			
 
				+			otherArray.copy(0, otherSize, m_data);
			
 
				+		}
			
 
				+
			
 
				+		
			
 
				+		
			
 
				+		/// return the number of elements in the array
			
 
				+		B3_FORCE_INLINE	int size() const
			
 
				+		{	
			
 
				+			return m_size;
			
 
				+		}
			
 
				+		
			
 
				+		B3_FORCE_INLINE const T& at(int n) const
			
 
				+		{
			
 
				+			b3Assert(n>=0);
			
 
				+			b3Assert(n<size());
			
 
				+			return m_data[n];
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE T& at(int n)
			
 
				+		{
			
 
				+			b3Assert(n>=0);
			
 
				+			b3Assert(n<size());
			
 
				+			return m_data[n];
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE const T& operator[](int n) const
			
 
				+		{
			
 
				+			b3Assert(n>=0);
			
 
				+			b3Assert(n<size());
			
 
				+			return m_data[n];
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE T& operator[](int n)
			
 
				+		{
			
 
				+			b3Assert(n>=0);
			
 
				+			b3Assert(n<size());
			
 
				+			return m_data[n];
			
 
				+		}
			
 
				+		
			
 
				+
			
 
				+		///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations.
			
 
				+		B3_FORCE_INLINE	void	clear()
			
 
				+		{
			
 
				+			destroy(0,size());
			
 
				+			
			
 
				+			deallocate();
			
 
				+			
			
 
				+			init();
			
 
				+		}
			
 
				+
			
 
				+		B3_FORCE_INLINE	void	pop_back()
			
 
				+		{
			
 
				+			b3Assert(m_size>0);
			
 
				+			m_size--;
			
 
				+			m_data[m_size].~T();
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument.
			
 
				+		///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations.
			
 
				+		B3_FORCE_INLINE	void	resizeNoInitialize(int newsize)
			
 
				+		{
			
 
				+			int curSize = size();
			
 
				+
			
 
				+			if (newsize < curSize)
			
 
				+			{
			
 
				+			} else
			
 
				+			{
			
 
				+				if (newsize > size())
			
 
				+				{
			
 
				+					reserve(newsize);
			
 
				+				}
			
 
				+				//leave this uninitialized
			
 
				+			}
			
 
				+			m_size = newsize;
			
 
				+		}
			
 
				+	
			
 
				+		B3_FORCE_INLINE	void	resize(int newsize, const T& fillData=T())
			
 
				+		{
			
 
				+			int curSize = size();
			
 
				+
			
 
				+			if (newsize < curSize)
			
 
				+			{
			
 
				+				for(int i = newsize; i < curSize; i++)
			
 
				+				{
			
 
				+					m_data[i].~T();
			
 
				+				}
			
 
				+			} else
			
 
				+			{
			
 
				+				if (newsize > size())
			
 
				+				{
			
 
				+					reserve(newsize);
			
 
				+				}
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+				for (int i=curSize;i<newsize;i++)
			
 
				+				{
			
 
				+					new ( &m_data[i]) T(fillData);
			
 
				+				}
			
 
				+#endif //B3_USE_PLACEMENT_NEW
			
 
				+
			
 
				+			}
			
 
				+
			
 
				+			m_size = newsize;
			
 
				+		}
			
 
				+		B3_FORCE_INLINE	T&  expandNonInitializing( )
			
 
				+		{	
			
 
				+			int sz = size();
			
 
				+			if( sz == capacity() )
			
 
				+			{
			
 
				+				reserve( allocSize(size()) );
			
 
				+			}
			
 
				+			m_size++;
			
 
				+
			
 
				+			return m_data[sz];		
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		B3_FORCE_INLINE	T&  expand( const T& fillValue=T())
			
 
				+		{	
			
 
				+			int sz = size();
			
 
				+			if( sz == capacity() )
			
 
				+			{
			
 
				+				reserve( allocSize(size()) );
			
 
				+			}
			
 
				+			m_size++;
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+			new (&m_data[sz]) T(fillValue); //use the in-place new (not really allocating heap memory)
			
 
				+#endif
			
 
				+
			
 
				+			return m_data[sz];		
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		B3_FORCE_INLINE	void push_back(const T& _Val)
			
 
				+		{	
			
 
				+			int sz = size();
			
 
				+			if( sz == capacity() )
			
 
				+			{
			
 
				+				reserve( allocSize(size()) );
			
 
				+			}
			
 
				+			
			
 
				+#ifdef B3_USE_PLACEMENT_NEW
			
 
				+			new ( &m_data[m_size] ) T(_Val);
			
 
				+#else
			
 
				+			m_data[size()] = _Val;			
			
 
				+#endif //B3_USE_PLACEMENT_NEW
			
 
				+
			
 
				+			m_size++;
			
 
				+		}
			
 
				+
			
 
				+	
			
 
				+		/// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve()
			
 
				+		B3_FORCE_INLINE	int capacity() const
			
 
				+		{	
			
 
				+			return m_capacity;
			
 
				+		}
			
 
				+		
			
 
				+		B3_FORCE_INLINE	void reserve(int _Count)
			
 
				+		{	// determine new minimum length of allocated storage
			
 
				+			if (capacity() < _Count)
			
 
				+			{	// not enough room, reallocate
			
 
				+				T*	s = (T*)allocate(_Count);
			
 
				+				b3Assert(s);
			
 
				+				if (s==0)
			
 
				+				{
			
 
				+					b3Error("b3AlignedObjectArray reserve out-of-memory\n");
			
 
				+					_Count=0;
			
 
				+					m_size=0;
			
 
				+				}
			
 
				+				copy(0, size(), s);
			
 
				+
			
 
				+				destroy(0,size());
			
 
				+
			
 
				+				deallocate();
			
 
				+				
			
 
				+				//PCK: added this line
			
 
				+				m_ownsMemory = true;
			
 
				+
			
 
				+				m_data = s;
			
 
				+				
			
 
				+				m_capacity = _Count;
			
 
				+
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		class less
			
 
				+		{
			
 
				+			public:
			
 
				+
			
 
				+				bool operator() ( const T& a, const T& b )
			
 
				+				{
			
 
				+					return ( a < b );
			
 
				+				}
			
 
				+		};
			
 
				+	
			
 
				+
			
 
				+		template <typename L>
			
 
				+		void quickSortInternal(const L& CompareFunc,int lo, int hi)
			
 
				+		{
			
 
				+		//  lo is the lower index, hi is the upper index
			
 
				+		//  of the region of array a that is to be sorted
			
 
				+			int i=lo, j=hi;
			
 
				+			T x=m_data[(lo+hi)/2];
			
 
				+
			
 
				+			//  partition
			
 
				+			do
			
 
				+			{    
			
 
				+				while (CompareFunc(m_data[i],x)) 
			
 
				+					i++; 
			
 
				+				while (CompareFunc(x,m_data[j])) 
			
 
				+					j--;
			
 
				+				if (i<=j)
			
 
				+				{
			
 
				+					swap(i,j);
			
 
				+					i++; j--;
			
 
				+				}
			
 
				+			} while (i<=j);
			
 
				+
			
 
				+			//  recursion
			
 
				+			if (lo<j) 
			
 
				+				quickSortInternal( CompareFunc, lo, j);
			
 
				+			if (i<hi) 
			
 
				+				quickSortInternal( CompareFunc, i, hi);
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		template <typename L>
			
 
				+		void quickSort(const L& CompareFunc)
			
 
				+		{
			
 
				+			//don't sort 0 or 1 elements
			
 
				+			if (size()>1)
			
 
				+			{
			
 
				+				quickSortInternal(CompareFunc,0,size()-1);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+		///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
			
 
				+		template <typename L>
			
 
				+		void downHeap(T *pArr, int k, int n, const L& CompareFunc)
			
 
				+		{
			
 
				+			/*  PRE: a[k+1..N] is a heap */
			
 
				+			/* POST:  a[k..N]  is a heap */
			
 
				+			
			
 
				+			T temp = pArr[k - 1];
			
 
				+			/* k has child(s) */
			
 
				+			while (k <= n/2) 
			
 
				+			{
			
 
				+				int child = 2*k;
			
 
				+				
			
 
				+				if ((child < n) && CompareFunc(pArr[child - 1] , pArr[child]))
			
 
				+				{
			
 
				+					child++;
			
 
				+				}
			
 
				+				/* pick larger child */
			
 
				+				if (CompareFunc(temp , pArr[child - 1]))
			
 
				+				{
			
 
				+					/* move child up */
			
 
				+					pArr[k - 1] = pArr[child - 1];
			
 
				+					k = child;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+			pArr[k - 1] = temp;
			
 
				+		} /*downHeap*/
			
 
				+
			
 
				+		void	swap(int index0,int index1)
			
 
				+		{
			
 
				+#ifdef B3_USE_MEMCPY
			
 
				+			char	temp[sizeof(T)];
			
 
				+			memcpy(temp,&m_data[index0],sizeof(T));
			
 
				+			memcpy(&m_data[index0],&m_data[index1],sizeof(T));
			
 
				+			memcpy(&m_data[index1],temp,sizeof(T));
			
 
				+#else
			
 
				+			T temp = m_data[index0];
			
 
				+			m_data[index0] = m_data[index1];
			
 
				+			m_data[index1] = temp;
			
 
				+#endif //B3_USE_PLACEMENT_NEW
			
 
				+
			
 
				+		}
			
 
				+
			
 
				+	template <typename L>
			
 
				+	void heapSort(const L& CompareFunc)
			
 
				+	{
			
 
				+		/* sort a[0..N-1],  N.B. 0 to N-1 */
			
 
				+		int k;
			
 
				+		int n = m_size;
			
 
				+		for (k = n/2; k > 0; k--) 
			
 
				+		{
			
 
				+			downHeap(m_data, k, n, CompareFunc);
			
 
				+		}
			
 
				+
			
 
				+		/* a[1..N] is now a heap */
			
 
				+		while ( n>=1 ) 
			
 
				+		{
			
 
				+			swap(0,n-1); /* largest of a[0..n-1] */
			
 
				+
			
 
				+
			
 
				+			n = n - 1;
			
 
				+			/* restore a[1..i-1] heap */
			
 
				+			downHeap(m_data, 1, n, CompareFunc);
			
 
				+		} 
			
 
				+	}
			
 
				+
			
 
				+	///non-recursive binary search, assumes sorted array
			
 
				+	int	findBinarySearch(const T& key) const
			
 
				+	{
			
 
				+		int first = 0;
			
 
				+		int last = size()-1;
			
 
				+
			
 
				+		//assume sorted array
			
 
				+		while (first <= last) {
			
 
				+			int mid = (first + last) / 2;  // compute mid point.
			
 
				+			if (key > m_data[mid]) 
			
 
				+				first = mid + 1;  // repeat search in top half.
			
 
				+			else if (key < m_data[mid]) 
			
 
				+				last = mid - 1; // repeat search in bottom half.
			
 
				+			else
			
 
				+				return mid;     // found it. return position /////
			
 
				+		}
			
 
				+		return size();    // failed to find key
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	int	findLinearSearch(const T& key) const
			
 
				+	{
			
 
				+		int index=size();
			
 
				+		int i;
			
 
				+
			
 
				+		for (i=0;i<size();i++)
			
 
				+		{
			
 
				+			if (m_data[i] == key)
			
 
				+			{
			
 
				+				index = i;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+		return index;
			
 
				+	}
			
 
				+
			
 
				+	void	remove(const T& key)
			
 
				+	{
			
 
				+
			
 
				+		int findIndex = findLinearSearch(key);
			
 
				+		if (findIndex<size())
			
 
				+		{
			
 
				+			swap( findIndex,size()-1);
			
 
				+			pop_back();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//PCK: whole function
			
 
				+	void initializeFromBuffer(void *buffer, int size, int capacity)
			
 
				+	{
			
 
				+		clear();
			
 
				+		m_ownsMemory = false;
			
 
				+		m_data = (T*)buffer;
			
 
				+		m_size = size;
			
 
				+		m_capacity = capacity;
			
 
				+	}
			
 
				+
			
 
				+	void copyFromArray(const b3AlignedObjectArray& otherArray)
			
 
				+	{
			
 
				+		int otherSize = otherArray.size();
			
 
				+		resize (otherSize);
			
 
				+		otherArray.copy(0, otherSize, m_data);
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_OBJECT_ARRAY__
			
--- a/include/Bullet3Common/b3CommandLineArgs.h
+++ b/include/Bullet3Common/b3CommandLineArgs.h
@@ -0,0 +1,93 @@
 
				+#ifndef COMMAND_LINE_ARGS_H
			
 
				+#define COMMAND_LINE_ARGS_H
			
 
				+
			
 
				+/******************************************************************************
			
 
				+ * Command-line parsing
			
 
				+ ******************************************************************************/
			
 
				+#include <map>
			
 
				+#include <algorithm>
			
 
				+#include <string>
			
 
				+#include <cstring>
			
 
				+#include <sstream>
			
 
				+class b3CommandLineArgs
			
 
				+{
			
 
				+protected:
			
 
				+
			
 
				+	std::map<std::string, std::string> pairs;
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	// Constructor
			
 
				+	b3CommandLineArgs(int argc, char **argv)
			
 
				+	{
			
 
				+		using namespace std;
			
 
				+
			
 
				+	    for (int i = 1; i < argc; i++)
			
 
				+	    {
			
 
				+	        string arg = argv[i];
			
 
				+
			
 
				+	        if ((arg[0] != '-') || (arg[1] != '-')) {
			
 
				+	        	continue;
			
 
				+	        }
			
 
				+
			
 
				+        	string::size_type pos;
			
 
				+		    string key, val;
			
 
				+	        if ((pos = arg.find( '=')) == string::npos) {
			
 
				+	        	key = string(arg, 2, arg.length() - 2);
			
 
				+	        	val = "";
			
 
				+	        } else {
			
 
				+	        	key = string(arg, 2, pos - 2);
			
 
				+	        	val = string(arg, pos + 1, arg.length() - 1);
			
 
				+	        }
			
 
				+        	pairs[key] = val;
			
 
				+	    }
			
 
				+	}
			
 
				+
			
 
				+	bool CheckCmdLineFlag(const char* arg_name)
			
 
				+	{
			
 
				+		using namespace std;
			
 
				+		map<string, string>::iterator itr;
			
 
				+		if ((itr = pairs.find(arg_name)) != pairs.end()) {
			
 
				+			return true;
			
 
				+	    }
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	template <typename T>
			
 
				+	void GetCmdLineArgument(const char *arg_name, T &val);
			
 
				+
			
 
				+	int ParsedArgc()
			
 
				+	{
			
 
				+		return pairs.size();
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val)
			
 
				+{
			
 
				+	using namespace std;
			
 
				+	map<string, string>::iterator itr;
			
 
				+	if ((itr = pairs.find(arg_name)) != pairs.end()) {
			
 
				+		istringstream strstream(itr->second);
			
 
				+		strstream >> val;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+inline void b3CommandLineArgs::GetCmdLineArgument<char*>(const char* arg_name, char* &val)
			
 
				+{
			
 
				+	using namespace std;
			
 
				+	map<string, string>::iterator itr;
			
 
				+	if ((itr = pairs.find(arg_name)) != pairs.end()) {
			
 
				+
			
 
				+		string s = itr->second;
			
 
				+		val = (char*) malloc(sizeof(char) * (s.length() + 1));
			
 
				+		std::strcpy(val, s.c_str());
			
 
				+
			
 
				+	} else {
			
 
				+    	val = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif //COMMAND_LINE_ARGS_H
			
--- a/include/Bullet3Common/b3FileUtils.h
+++ b/include/Bullet3Common/b3FileUtils.h
@@ -0,0 +1,137 @@
 
				+#ifndef B3_FILE_UTILS_H
			
 
				+#define B3_FILE_UTILS_H
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include "b3Scalar.h"
			
 
				+#include <stddef.h>//ptrdiff_h
			
 
				+#include <string.h>
			
 
				+
			
 
				+struct b3FileUtils
			
 
				+{
			
 
				+	b3FileUtils()
			
 
				+	{
			
 
				+	}
			
 
				+	virtual ~b3FileUtils()
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	bool findFile(const char* orgFileName, char* relativeFileName, int maxRelativeFileNameMaxLen)
			
 
				+	{
			
 
				+		FILE* f=0;
			
 
				+		f = fopen(orgFileName,"rb");
			
 
				+                if (f)
			
 
				+                {
			
 
				+			//printf("original file found: [%s]\n", orgFileName);
			
 
				+			sprintf(relativeFileName,"%s", orgFileName);
			
 
				+			fclose(f);
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				+		//printf("Trying various directories, relative to current working directory\n");	
			
 
				+			const char* prefix[]={"./","./data/","../data/","../../data/","../../../data/","../../../../data/"};
			
 
				+			int numPrefixes = sizeof(prefix)/sizeof(const char*);
			
 
				+	
			
 
				+			f=0;
			
 
				+			bool fileFound = false;
			
 
				+			int result = 0;
			
 
				+
			
 
				+			for (int i=0;!f && i<numPrefixes;i++)
			
 
				+			{
			
 
				+#ifdef _WIN32
			
 
				+				sprintf_s(relativeFileName,maxRelativeFileNameMaxLen,"%s%s",prefix[i],orgFileName);
			
 
				+#else
			
 
				+				sprintf(relativeFileName,"%s%s",prefix[i],orgFileName);
			
 
				+#endif
			
 
				+				f = fopen(relativeFileName,"rb");
			
 
				+				if (f)
			
 
				+				{
			
 
				+					fileFound = true;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+			if (f)
			
 
				+			{
			
 
				+				fclose(f);
			
 
				+			}
			
 
				+	
			
 
				+		return fileFound;
			
 
				+	}
			
 
				+
			
 
				+	static const char* strip2(const char* name, const char* pattern)
			
 
				+	{
			
 
				+		size_t const patlen = strlen(pattern);
			
 
				+		size_t patcnt = 0;
			
 
				+		const char * oriptr;
			
 
				+		const char * patloc;
			
 
				+		// find how many times the pattern occurs in the original string
			
 
				+		for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
			
 
				+		{
			
 
				+			patcnt++;
			
 
				+		}
			
 
				+		return oriptr;
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	static void extractPath(const char* fileName, char* path, int maxPathLength)
			
 
				+	{
			
 
				+		const char* stripped = strip2(fileName, "/");
			
 
				+		stripped = strip2(stripped, "\\");
			
 
				+
			
 
				+		ptrdiff_t len = stripped-fileName;
			
 
				+		b3Assert((len+1)<maxPathLength);
			
 
				+
			
 
				+		if (len && ((len+1)<maxPathLength))
			
 
				+		{
			
 
				+
			
 
				+			for (int i=0;i<len;i++)
			
 
				+			{
			
 
				+				path[i] = fileName[i];
			
 
				+			}
			
 
				+			path[len]=0;
			
 
				+		} else
			
 
				+		{
			
 
				+			b3Assert(maxPathLength>0);
			
 
				+			if (maxPathLength>0)
			
 
				+			{
			
 
				+				path[0] = 0;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static char toLowerChar(const char t)
			
 
				+	{
			
 
				+		if (t>=(char)'A' && t<=(char)'Z')
			
 
				+			return t + ((char)'a' - (char)'A');
			
 
				+		else
			
 
				+			return t;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	static void toLower(char* str)
			
 
				+	{
			
 
				+		int len=strlen(str);
			
 
				+		for (int i=0;i<len;i++)
			
 
				+		{
			
 
				+			str[i] = toLowerChar(str[i]);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/*static const char* strip2(const char* name, const char* pattern)
			
 
				+	{
			
 
				+		size_t const patlen = strlen(pattern);
			
 
				+		size_t patcnt = 0;
			
 
				+		const char * oriptr;
			
 
				+		const char * patloc;
			
 
				+		// find how many times the pattern occurs in the original string
			
 
				+		for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
			
 
				+		{
			
 
				+			patcnt++;
			
 
				+		}
			
 
				+		return oriptr;
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+};
			
 
				+#endif //B3_FILE_UTILS_H
			
--- a/include/Bullet3Common/b3HashMap.h
+++ b/include/Bullet3Common/b3HashMap.h
@@ -0,0 +1,450 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_HASH_MAP_H
			
 
				+#define B3_HASH_MAP_H
			
 
				+
			
 
				+#include "b3AlignedObjectArray.h"
			
 
				+
			
 
				+///very basic hashable string implementation, compatible with b3HashMap
			
 
				+struct b3HashString
			
 
				+{
			
 
				+	const char* m_string;
			
 
				+	unsigned int	m_hash;
			
 
				+
			
 
				+	B3_FORCE_INLINE	unsigned int getHash()const
			
 
				+	{
			
 
				+		return m_hash;
			
 
				+	}
			
 
				+
			
 
				+	b3HashString(const char* name)
			
 
				+		:m_string(name)
			
 
				+	{
			
 
				+		/* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */
			
 
				+		static const unsigned int  InitialFNV = 2166136261u;
			
 
				+		static const unsigned int FNVMultiple = 16777619u;
			
 
				+
			
 
				+		/* Fowler / Noll / Vo (FNV) Hash */
			
 
				+		unsigned int hash = InitialFNV;
			
 
				+		
			
 
				+		for(int i = 0; m_string[i]; i++)
			
 
				+		{
			
 
				+			hash = hash ^ (m_string[i]);       /* xor  the low 8 bits */
			
 
				+			hash = hash * FNVMultiple;  /* multiply by the magic number */
			
 
				+		}
			
 
				+		m_hash = hash;
			
 
				+	}
			
 
				+
			
 
				+	int portableStringCompare(const char* src,	const char* dst) const
			
 
				+	{
			
 
				+			int ret = 0 ;
			
 
				+
			
 
				+			while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
			
 
				+					++src, ++dst;
			
 
				+
			
 
				+			if ( ret < 0 )
			
 
				+					ret = -1 ;
			
 
				+			else if ( ret > 0 )
			
 
				+					ret = 1 ;
			
 
				+
			
 
				+			return( ret );
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashString& other) const
			
 
				+	{
			
 
				+		return (m_string == other.m_string) ||
			
 
				+			(0==portableStringCompare(m_string,other.m_string));
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+const int B3_HASH_NULL=0xffffffff;
			
 
				+
			
 
				+
			
 
				+class b3HashInt
			
 
				+{
			
 
				+	int	m_uid;
			
 
				+public:
			
 
				+	b3HashInt(int uid)	:m_uid(uid)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	int	getUid1() const
			
 
				+	{
			
 
				+		return m_uid;
			
 
				+	}
			
 
				+
			
 
				+	void	setUid1(int uid)
			
 
				+	{
			
 
				+		m_uid = uid;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashInt& other) const
			
 
				+	{
			
 
				+		return getUid1() == other.getUid1();
			
 
				+	}
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE	unsigned int getHash()const
			
 
				+	{
			
 
				+		int key = m_uid;
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+class b3HashPtr
			
 
				+{
			
 
				+
			
 
				+	union
			
 
				+	{
			
 
				+		const void*	m_pointer;
			
 
				+		int	m_hashValues[2];
			
 
				+	};
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	b3HashPtr(const void* ptr)
			
 
				+		:m_pointer(ptr)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	const void*	getPointer() const
			
 
				+	{
			
 
				+		return m_pointer;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashPtr& other) const
			
 
				+	{
			
 
				+		return getPointer() == other.getPointer();
			
 
				+	}
			
 
				+
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE	unsigned int getHash()const
			
 
				+	{
			
 
				+		const bool VOID_IS_8 = ((sizeof(void*)==8));
			
 
				+		
			
 
				+		int key = VOID_IS_8? m_hashValues[0]+m_hashValues[1] : m_hashValues[0];
			
 
				+	
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+};
			
 
				+
			
 
				+
			
 
				+template <class Value>
			
 
				+class b3HashKeyPtr
			
 
				+{
			
 
				+        int     m_uid;
			
 
				+public:
			
 
				+
			
 
				+        b3HashKeyPtr(int uid)    :m_uid(uid)
			
 
				+        {
			
 
				+        }
			
 
				+
			
 
				+        int     getUid1() const
			
 
				+        {
			
 
				+                return m_uid;
			
 
				+        }
			
 
				+
			
 
				+        bool equals(const b3HashKeyPtr<Value>& other) const
			
 
				+        {
			
 
				+                return getUid1() == other.getUid1();
			
 
				+        }
			
 
				+
			
 
				+        //to our success
			
 
				+        B3_FORCE_INLINE       unsigned int getHash()const
			
 
				+        {
			
 
				+                int key = m_uid;
			
 
				+                // Thomas Wang's hash
			
 
				+                key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
			
 
				+                return key;
			
 
				+        }
			
 
				+
			
 
				+        
			
 
				+};
			
 
				+
			
 
				+
			
 
				+template <class Value>
			
 
				+class b3HashKey
			
 
				+{
			
 
				+	int	m_uid;
			
 
				+public:
			
 
				+
			
 
				+	b3HashKey(int uid)	:m_uid(uid)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	int	getUid1() const
			
 
				+	{
			
 
				+		return m_uid;
			
 
				+	}
			
 
				+
			
 
				+	bool equals(const b3HashKey<Value>& other) const
			
 
				+	{
			
 
				+		return getUid1() == other.getUid1();
			
 
				+	}
			
 
				+	//to our success
			
 
				+	B3_FORCE_INLINE	unsigned int getHash()const
			
 
				+	{
			
 
				+		int key = m_uid;
			
 
				+		// Thomas Wang's hash
			
 
				+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
			
 
				+		return key;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+///The b3HashMap template class implements a generic and lightweight hashmap.
			
 
				+///A basic sample of how to use b3HashMap is located in Demos\BasicDemo\main.cpp
			
 
				+template <class Key, class Value>
			
 
				+class b3HashMap
			
 
				+{
			
 
				+
			
 
				+protected:
			
 
				+	b3AlignedObjectArray<int>		m_hashTable;
			
 
				+	b3AlignedObjectArray<int>		m_next;
			
 
				+	
			
 
				+	b3AlignedObjectArray<Value>		m_valueArray;
			
 
				+	b3AlignedObjectArray<Key>		m_keyArray;
			
 
				+
			
 
				+	void	growTables(const Key& /*key*/)
			
 
				+	{
			
 
				+		int newCapacity = m_valueArray.capacity();
			
 
				+
			
 
				+		if (m_hashTable.size() < newCapacity)
			
 
				+		{
			
 
				+			//grow hashtable and next table
			
 
				+			int curHashtableSize = m_hashTable.size();
			
 
				+
			
 
				+			m_hashTable.resize(newCapacity);
			
 
				+			m_next.resize(newCapacity);
			
 
				+
			
 
				+			int i;
			
 
				+
			
 
				+			for (i= 0; i < newCapacity; ++i)
			
 
				+			{
			
 
				+				m_hashTable[i] = B3_HASH_NULL;
			
 
				+			}
			
 
				+			for (i = 0; i < newCapacity; ++i)
			
 
				+			{
			
 
				+				m_next[i] = B3_HASH_NULL;
			
 
				+			}
			
 
				+
			
 
				+			for(i=0;i<curHashtableSize;i++)
			
 
				+			{
			
 
				+				//const Value& value = m_valueArray[i];
			
 
				+				//const Key& key = m_keyArray[i];
			
 
				+
			
 
				+				int	hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity()-1);	// New hash value with new mask
			
 
				+				m_next[i] = m_hashTable[hashValue];
			
 
				+				m_hashTable[hashValue] = i;
			
 
				+			}
			
 
				+
			
 
				+
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	public:
			
 
				+
			
 
				+	void insert(const Key& key, const Value& value) {
			
 
				+		int hash = key.getHash() & (m_valueArray.capacity()-1);
			
 
				+
			
 
				+		//replace value if the key is already there
			
 
				+		int index = findIndex(key);
			
 
				+		if (index != B3_HASH_NULL)
			
 
				+		{
			
 
				+			m_valueArray[index]=value;
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		int count = m_valueArray.size();
			
 
				+		int oldCapacity = m_valueArray.capacity();
			
 
				+		m_valueArray.push_back(value);
			
 
				+		m_keyArray.push_back(key);
			
 
				+
			
 
				+		int newCapacity = m_valueArray.capacity();
			
 
				+		if (oldCapacity < newCapacity)
			
 
				+		{
			
 
				+			growTables(key);
			
 
				+			//hash with new capacity
			
 
				+			hash = key.getHash() & (m_valueArray.capacity()-1);
			
 
				+		}
			
 
				+		m_next[count] = m_hashTable[hash];
			
 
				+		m_hashTable[hash] = count;
			
 
				+	}
			
 
				+
			
 
				+	void remove(const Key& key) {
			
 
				+
			
 
				+		int hash = key.getHash() & (m_valueArray.capacity()-1);
			
 
				+
			
 
				+		int pairIndex = findIndex(key);
			
 
				+		
			
 
				+		if (pairIndex ==B3_HASH_NULL)
			
 
				+		{
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		// Remove the pair from the hash table.
			
 
				+		int index = m_hashTable[hash];
			
 
				+		b3Assert(index != B3_HASH_NULL);
			
 
				+
			
 
				+		int previous = B3_HASH_NULL;
			
 
				+		while (index != pairIndex)
			
 
				+		{
			
 
				+			previous = index;
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+
			
 
				+		if (previous != B3_HASH_NULL)
			
 
				+		{
			
 
				+			b3Assert(m_next[previous] == pairIndex);
			
 
				+			m_next[previous] = m_next[pairIndex];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			m_hashTable[hash] = m_next[pairIndex];
			
 
				+		}
			
 
				+
			
 
				+		// We now move the last pair into spot of the
			
 
				+		// pair being removed. We need to fix the hash
			
 
				+		// table indices to support the move.
			
 
				+
			
 
				+		int lastPairIndex = m_valueArray.size() - 1;
			
 
				+
			
 
				+		// If the removed pair is the last pair, we are done.
			
 
				+		if (lastPairIndex == pairIndex)
			
 
				+		{
			
 
				+			m_valueArray.pop_back();
			
 
				+			m_keyArray.pop_back();
			
 
				+			return;
			
 
				+		}
			
 
				+
			
 
				+		// Remove the last pair from the hash table.
			
 
				+		int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity()-1);
			
 
				+
			
 
				+		index = m_hashTable[lastHash];
			
 
				+		b3Assert(index != B3_HASH_NULL);
			
 
				+
			
 
				+		previous = B3_HASH_NULL;
			
 
				+		while (index != lastPairIndex)
			
 
				+		{
			
 
				+			previous = index;
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+
			
 
				+		if (previous != B3_HASH_NULL)
			
 
				+		{
			
 
				+			b3Assert(m_next[previous] == lastPairIndex);
			
 
				+			m_next[previous] = m_next[lastPairIndex];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			m_hashTable[lastHash] = m_next[lastPairIndex];
			
 
				+		}
			
 
				+
			
 
				+		// Copy the last pair into the remove pair's spot.
			
 
				+		m_valueArray[pairIndex] = m_valueArray[lastPairIndex];
			
 
				+		m_keyArray[pairIndex] = m_keyArray[lastPairIndex];
			
 
				+
			
 
				+		// Insert the last pair into the hash table
			
 
				+		m_next[pairIndex] = m_hashTable[lastHash];
			
 
				+		m_hashTable[lastHash] = pairIndex;
			
 
				+
			
 
				+		m_valueArray.pop_back();
			
 
				+		m_keyArray.pop_back();
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	int size() const
			
 
				+	{
			
 
				+		return m_valueArray.size();
			
 
				+	}
			
 
				+
			
 
				+	const Value* getAtIndex(int index) const
			
 
				+	{
			
 
				+		b3Assert(index < m_valueArray.size());
			
 
				+
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Value* getAtIndex(int index)
			
 
				+	{
			
 
				+		b3Assert(index < m_valueArray.size());
			
 
				+
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Value* operator[](const Key& key) {
			
 
				+		return find(key);
			
 
				+	}
			
 
				+
			
 
				+	const Value*	find(const Key& key) const
			
 
				+	{
			
 
				+		int index = findIndex(key);
			
 
				+		if (index == B3_HASH_NULL)
			
 
				+		{
			
 
				+			return NULL;
			
 
				+		}
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+	Value*	find(const Key& key)
			
 
				+	{
			
 
				+		int index = findIndex(key);
			
 
				+		if (index == B3_HASH_NULL)
			
 
				+		{
			
 
				+			return NULL;
			
 
				+		}
			
 
				+		return &m_valueArray[index];
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	int	findIndex(const Key& key) const
			
 
				+	{
			
 
				+		unsigned int hash = key.getHash() & (m_valueArray.capacity()-1);
			
 
				+
			
 
				+		if (hash >= (unsigned int)m_hashTable.size())
			
 
				+		{
			
 
				+			return B3_HASH_NULL;
			
 
				+		}
			
 
				+
			
 
				+		int index = m_hashTable[hash];
			
 
				+		while ((index != B3_HASH_NULL) && key.equals(m_keyArray[index]) == false)
			
 
				+		{
			
 
				+			index = m_next[index];
			
 
				+		}
			
 
				+		return index;
			
 
				+	}
			
 
				+
			
 
				+	void	clear()
			
 
				+	{
			
 
				+		m_hashTable.clear();
			
 
				+		m_next.clear();
			
 
				+		m_valueArray.clear();
			
 
				+		m_keyArray.clear();
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_HASH_MAP_H
			
--- a/include/Bullet3Common/b3Logging.h
+++ b/include/Bullet3Common/b3Logging.h
@@ -0,0 +1,69 @@
 
				+
			
 
				+#ifndef B3_LOGGING_H
			
 
				+#define B3_LOGGING_H
			
 
				+
			
 
				+///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail
			
 
				+///You can also customize the message by uncommenting out a different line below
			
 
				+#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__)
			
 
				+//#define b3Printf(...) do {b3OutputPrintfVarArgsInternal("b3Printf[%s,%d]:",__FILE__,__LINE__);b3OutputPrintfVarArgsInternal(__VA_ARGS__); } while(0)
			
 
				+//#define b3Printf b3OutputPrintfVarArgsInternal
			
 
				+//#define b3Printf(...) printf(__VA_ARGS__)
			
 
				+//#define b3Printf(...)
			
 
				+
			
 
				+#define b3Warning(...) do {b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n",__FILE__,__LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__); }while(0)
			
 
				+#define b3Error(...) do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n",__FILE__,__LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__); } while(0)
			
 
				+
			
 
				+
			
 
				+#ifndef B3_NO_PROFILE
			
 
				+
			
 
				+void b3EnterProfileZone(const char* name);
			
 
				+void b3LeaveProfileZone();
			
 
				+
			
 
				+class	b3ProfileZone
			
 
				+{
			
 
				+public:
			
 
				+	b3ProfileZone(const char* name)
			
 
				+	{ 
			
 
				+		b3EnterProfileZone( name ); 
			
 
				+	}
			
 
				+
			
 
				+	~b3ProfileZone()
			
 
				+	{ 
			
 
				+		b3LeaveProfileZone(); 
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#define	B3_PROFILE( name )			b3ProfileZone __profile( name )
			
 
				+
			
 
				+#else //B3_NO_PROFILE
			
 
				+
			
 
				+#define	B3_PROFILE( name )
			
 
				+#define b3StartProfile(a)
			
 
				+#define b3StopProfile
			
 
				+
			
 
				+#endif //#ifndef B3_NO_PROFILE
			
 
				+
			
 
				+
			
 
				+typedef void (b3PrintfFunc)(const char* msg);
			
 
				+typedef void (b3WarningMessageFunc)(const char* msg);
			
 
				+typedef void (b3ErrorMessageFunc)(const char* msg);
			
 
				+typedef void (b3EnterProfileZoneFunc)(const char* msg);
			
 
				+typedef void (b3LeaveProfileZoneFunc)();
			
 
				+
			
 
				+///The developer can route b3Printf output using their own implementation
			
 
				+void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc);
			
 
				+void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc);
			
 
				+void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc);
			
 
				+
			
 
				+///Set custom profile zone functions (zones can be nested)
			
 
				+void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc);
			
 
				+void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc);
			
 
				+
			
 
				+///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version)
			
 
				+void b3OutputPrintfVarArgsInternal(const char *str, ...);
			
 
				+void b3OutputWarningMessageVarArgsInternal(const char *str, ...);
			
 
				+void b3OutputErrorMessageVarArgsInternal(const char *str, ...);
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif//B3_LOGGING_H
			
--- a/include/Bullet3Common/b3Matrix3x3.h
+++ b/include/Bullet3Common/b3Matrix3x3.h
@@ -0,0 +1,1362 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef	B3_MATRIX3x3_H
			
 
				+#define B3_MATRIX3x3_H
			
 
				+
			
 
				+#include "b3Vector3.h"
			
 
				+#include "b3Quaternion.h"
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+//const __m128 B3_ATTRIBUTE_ALIGNED16(b3v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
			
 
				+#endif
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
			
 
				+#endif
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3Matrix3x3Data	b3Matrix3x3DoubleData 
			
 
				+#else
			
 
				+#define b3Matrix3x3Data	b3Matrix3x3FloatData
			
 
				+#endif //B3_USE_DOUBLE_PRECISION
			
 
				+
			
 
				+
			
 
				+/**@brief The b3Matrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with b3Quaternion, b3Transform and b3Vector3.
			
 
				+* Make sure to only include a pure orthogonal matrix without scaling. */
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3Matrix3x3 {
			
 
				+
			
 
				+	///Data storage for the matrix, each vector is a row of the matrix
			
 
				+	b3Vector3 m_el[3];
			
 
				+
			
 
				+public:
			
 
				+	/** @brief No initializaion constructor */
			
 
				+	b3Matrix3x3 () {}
			
 
				+
			
 
				+	//		explicit b3Matrix3x3(const b3Scalar *m) { setFromOpenGLSubMatrix(m); }
			
 
				+
			
 
				+	/**@brief Constructor from Quaternion */
			
 
				+	explicit b3Matrix3x3(const b3Quaternion& q) { setRotation(q); }
			
 
				+	/*
			
 
				+	template <typename b3Scalar>
			
 
				+	Matrix3x3(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{ 
			
 
				+	setEulerYPR(yaw, pitch, roll);
			
 
				+	}
			
 
				+	*/
			
 
				+	/** @brief Constructor with row major formatting */
			
 
				+	b3Matrix3x3(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz,
			
 
				+		const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz,
			
 
				+		const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz)
			
 
				+	{ 
			
 
				+		setValue(xx, xy, xz, 
			
 
				+			yx, yy, yz, 
			
 
				+			zx, zy, zz);
			
 
				+	}
			
 
				+
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+	B3_FORCE_INLINE b3Matrix3x3 (const b3SimdFloat4 v0, const b3SimdFloat4 v1, const b3SimdFloat4 v2 ) 
			
 
				+	{
			
 
				+        m_el[0].mVec128 = v0;
			
 
				+        m_el[1].mVec128 = v1;
			
 
				+        m_el[2].mVec128 = v2;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Matrix3x3 (const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2 ) 
			
 
				+	{
			
 
				+        m_el[0] = v0;
			
 
				+        m_el[1] = v1;
			
 
				+        m_el[2] = v2;
			
 
				+	}
			
 
				+
			
 
				+	// Copy constructor
			
 
				+	B3_FORCE_INLINE b3Matrix3x3(const b3Matrix3x3& rhs)
			
 
				+	{
			
 
				+		m_el[0].mVec128 = rhs.m_el[0].mVec128;
			
 
				+		m_el[1].mVec128 = rhs.m_el[1].mVec128;
			
 
				+		m_el[2].mVec128 = rhs.m_el[2].mVec128;
			
 
				+	}
			
 
				+
			
 
				+	// Assignment Operator
			
 
				+	B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& m) 
			
 
				+	{
			
 
				+		m_el[0].mVec128 = m.m_el[0].mVec128;
			
 
				+		m_el[1].mVec128 = m.m_el[1].mVec128;
			
 
				+		m_el[2].mVec128 = m.m_el[2].mVec128;
			
 
				+		
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+	/** @brief Copy constructor */
			
 
				+	B3_FORCE_INLINE b3Matrix3x3 (const b3Matrix3x3& other)
			
 
				+	{
			
 
				+		m_el[0] = other.m_el[0];
			
 
				+		m_el[1] = other.m_el[1];
			
 
				+		m_el[2] = other.m_el[2];
			
 
				+	}
			
 
				+    
			
 
				+	/** @brief Assignment Operator */
			
 
				+	B3_FORCE_INLINE b3Matrix3x3& operator=(const b3Matrix3x3& other)
			
 
				+	{
			
 
				+		m_el[0] = other.m_el[0];
			
 
				+		m_el[1] = other.m_el[1];
			
 
				+		m_el[2] = other.m_el[2];
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+	/** @brief Get a column of the matrix as a vector 
			
 
				+	*  @param i Column number 0 indexed */
			
 
				+	B3_FORCE_INLINE b3Vector3 getColumn(int i) const
			
 
				+	{
			
 
				+		return b3MakeVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/** @brief Get a row of the matrix as a vector 
			
 
				+	*  @param i Row number 0 indexed */
			
 
				+	B3_FORCE_INLINE const b3Vector3& getRow(int i) const
			
 
				+	{
			
 
				+		b3FullAssert(0 <= i && i < 3);
			
 
				+		return m_el[i];
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Get a mutable reference to a row of the matrix as a vector 
			
 
				+	*  @param i Row number 0 indexed */
			
 
				+	B3_FORCE_INLINE b3Vector3&  operator[](int i)
			
 
				+	{ 
			
 
				+		b3FullAssert(0 <= i && i < 3);
			
 
				+		return m_el[i]; 
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Get a const reference to a row of the matrix as a vector 
			
 
				+	*  @param i Row number 0 indexed */
			
 
				+	B3_FORCE_INLINE const b3Vector3& operator[](int i) const
			
 
				+	{
			
 
				+		b3FullAssert(0 <= i && i < 3);
			
 
				+		return m_el[i]; 
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Multiply by the target matrix on the right
			
 
				+	*  @param m Rotation matrix to be applied 
			
 
				+	* Equivilant to this = this * m */
			
 
				+	b3Matrix3x3& operator*=(const b3Matrix3x3& m); 
			
 
				+
			
 
				+	/** @brief Adds by the target matrix on the right
			
 
				+	*  @param m matrix to be applied 
			
 
				+	* Equivilant to this = this + m */
			
 
				+	b3Matrix3x3& operator+=(const b3Matrix3x3& m); 
			
 
				+
			
 
				+	/** @brief Substractss by the target matrix on the right
			
 
				+	*  @param m matrix to be applied 
			
 
				+	* Equivilant to this = this - m */
			
 
				+	b3Matrix3x3& operator-=(const b3Matrix3x3& m); 
			
 
				+
			
 
				+	/** @brief Set from the rotational part of a 4x4 OpenGL matrix
			
 
				+	*  @param m A pointer to the beginning of the array of scalars*/
			
 
				+	void setFromOpenGLSubMatrix(const b3Scalar *m)
			
 
				+	{
			
 
				+		m_el[0].setValue(m[0],m[4],m[8]);
			
 
				+		m_el[1].setValue(m[1],m[5],m[9]);
			
 
				+		m_el[2].setValue(m[2],m[6],m[10]);
			
 
				+
			
 
				+	}
			
 
				+	/** @brief Set the values of the matrix explicitly (row major)
			
 
				+	*  @param xx Top left
			
 
				+	*  @param xy Top Middle
			
 
				+	*  @param xz Top Right
			
 
				+	*  @param yx Middle Left
			
 
				+	*  @param yy Middle Middle
			
 
				+	*  @param yz Middle Right
			
 
				+	*  @param zx Bottom Left
			
 
				+	*  @param zy Bottom Middle
			
 
				+	*  @param zz Bottom Right*/
			
 
				+	void setValue(const b3Scalar& xx, const b3Scalar& xy, const b3Scalar& xz, 
			
 
				+		const b3Scalar& yx, const b3Scalar& yy, const b3Scalar& yz, 
			
 
				+		const b3Scalar& zx, const b3Scalar& zy, const b3Scalar& zz)
			
 
				+	{
			
 
				+		m_el[0].setValue(xx,xy,xz);
			
 
				+		m_el[1].setValue(yx,yy,yz);
			
 
				+		m_el[2].setValue(zx,zy,zz);
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Set the matrix from a quaternion
			
 
				+	*  @param q The Quaternion to match */  
			
 
				+	void setRotation(const b3Quaternion& q) 
			
 
				+	{
			
 
				+		b3Scalar d = q.length2();
			
 
				+		b3FullAssert(d != b3Scalar(0.0));
			
 
				+		b3Scalar s = b3Scalar(2.0) / d;
			
 
				+    
			
 
				+    #if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+        __m128	vs, Q = q.get128();
			
 
				+		__m128i Qi = b3CastfTo128i(Q);
			
 
				+        __m128	Y, Z;
			
 
				+        __m128	V1, V2, V3;
			
 
				+        __m128	V11, V21, V31;
			
 
				+        __m128	NQ = _mm_xor_ps(Q, b3vMzeroMask);
			
 
				+		__m128i NQi = b3CastfTo128i(NQ);
			
 
				+        
			
 
				+        V1 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,2,3)));	// Y X Z W
			
 
				+		V2 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(0,0,1,3));     // -X -X  Y  W
			
 
				+        V3 = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(2,1,0,3)));	// Z Y X W
			
 
				+        V1 = _mm_xor_ps(V1, b3vMPPP);	//	change the sign of the first element
			
 
				+			
			
 
				+        V11	= b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,1,0,3)));	// Y Y X W
			
 
				+		V21 = _mm_unpackhi_ps(Q, Q);                    //  Z  Z  W  W
			
 
				+		V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(0,2,0,3));	//  X  Z -X -W
			
 
				+
			
 
				+		V2 = V2 * V1;	//
			
 
				+		V1 = V1 * V11;	//
			
 
				+		V3 = V3 * V31;	//
			
 
				+
			
 
				+        V11 = _mm_shuffle_ps(NQ, Q, B3_SHUFFLE(2,3,1,3));	//	-Z -W  Y  W
			
 
				+		V11 = V11 * V21;	//
			
 
				+        V21 = _mm_xor_ps(V21, b3vMPPP);	//	change the sign of the first element
			
 
				+		V31 = _mm_shuffle_ps(Q, NQ, B3_SHUFFLE(3,3,1,3));	//	 W  W -Y -W
			
 
				+        V31 = _mm_xor_ps(V31, b3vMPPP);	//	change the sign of the first element
			
 
				+		Y = b3CastiTo128f(_mm_shuffle_epi32 (NQi, B3_SHUFFLE(3,2,0,3)));	// -W -Z -X -W
			
 
				+		Z = b3CastiTo128f(_mm_shuffle_epi32 (Qi, B3_SHUFFLE(1,0,1,3)));	//  Y  X  Y  W
			
 
				+
			
 
				+		vs = _mm_load_ss(&s);
			
 
				+		V21 = V21 * Y;
			
 
				+		V31 = V31 * Z;
			
 
				+
			
 
				+		V1 = V1 + V11;
			
 
				+        V2 = V2 + V21;
			
 
				+        V3 = V3 + V31;
			
 
				+
			
 
				+        vs = b3_splat3_ps(vs, 0);
			
 
				+            //	s ready
			
 
				+        V1 = V1 * vs;
			
 
				+        V2 = V2 * vs;
			
 
				+        V3 = V3 * vs;
			
 
				+        
			
 
				+        V1 = V1 + b3v1000;
			
 
				+        V2 = V2 + b3v0100;
			
 
				+        V3 = V3 + b3v0010;
			
 
				+        
			
 
				+        m_el[0] = b3MakeVector3(V1); 
			
 
				+        m_el[1] = b3MakeVector3(V2);
			
 
				+        m_el[2] = b3MakeVector3(V3);
			
 
				+    #else    
			
 
				+		b3Scalar xs = q.getX() * s,   ys = q.getY() * s,   zs = q.getZ() * s;
			
 
				+		b3Scalar wx = q.getW() * xs,  wy = q.getW() * ys,  wz = q.getW() * zs;
			
 
				+		b3Scalar xx = q.getX() * xs,  xy = q.getX() * ys,  xz = q.getX() * zs;
			
 
				+		b3Scalar yy = q.getY() * ys,  yz = q.getY() * zs,  zz = q.getZ() * zs;
			
 
				+		setValue(
			
 
				+            b3Scalar(1.0) - (yy + zz), xy - wz, xz + wy,
			
 
				+			xy + wz, b3Scalar(1.0) - (xx + zz), yz - wx,
			
 
				+			xz - wy, yz + wx, b3Scalar(1.0) - (xx + yy));
			
 
				+	#endif
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+	/** @brief Set the matrix from euler angles using YPR around YXZ respectively
			
 
				+	*  @param yaw Yaw about Y axis
			
 
				+	*  @param pitch Pitch about X axis
			
 
				+	*  @param roll Roll about Z axis 
			
 
				+	*/
			
 
				+	void setEulerYPR(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll) 
			
 
				+	{
			
 
				+		setEulerZYX(roll, pitch, yaw);
			
 
				+	}
			
 
				+
			
 
				+	/** @brief Set the matrix from euler angles YPR around ZYX axes
			
 
				+	* @param eulerX Roll about X axis
			
 
				+	* @param eulerY Pitch around Y axis
			
 
				+	* @param eulerZ Yaw aboud Z axis
			
 
				+	* 
			
 
				+	* These angles are used to produce a rotation matrix. The euler
			
 
				+	* angles are applied in ZYX order. I.e a vector is first rotated 
			
 
				+	* about X then Y and then Z
			
 
				+	**/
			
 
				+	void setEulerZYX(b3Scalar eulerX,b3Scalar eulerY,b3Scalar eulerZ) { 
			
 
				+		///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
			
 
				+		b3Scalar ci ( b3Cos(eulerX)); 
			
 
				+		b3Scalar cj ( b3Cos(eulerY)); 
			
 
				+		b3Scalar ch ( b3Cos(eulerZ)); 
			
 
				+		b3Scalar si ( b3Sin(eulerX)); 
			
 
				+		b3Scalar sj ( b3Sin(eulerY)); 
			
 
				+		b3Scalar sh ( b3Sin(eulerZ)); 
			
 
				+		b3Scalar cc = ci * ch; 
			
 
				+		b3Scalar cs = ci * sh; 
			
 
				+		b3Scalar sc = si * ch; 
			
 
				+		b3Scalar ss = si * sh;
			
 
				+
			
 
				+		setValue(cj * ch, sj * sc - cs, sj * cc + ss,
			
 
				+			cj * sh, sj * ss + cc, sj * cs - sc, 
			
 
				+			-sj,      cj * si,      cj * ci);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Set the matrix to the identity */
			
 
				+	void setIdentity()
			
 
				+	{ 
			
 
				+#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+			m_el[0] = b3MakeVector3(b3v1000); 
			
 
				+			m_el[1] = b3MakeVector3(b3v0100);
			
 
				+			m_el[2] = b3MakeVector3(b3v0010);
			
 
				+#else
			
 
				+		setValue(b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0), 
			
 
				+			b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0), 
			
 
				+			b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0)); 
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	static const b3Matrix3x3&	getIdentity()
			
 
				+	{
			
 
				+#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) || defined(B3_USE_NEON)
			
 
				+        static const b3Matrix3x3 
			
 
				+        identityMatrix(b3v1000, b3v0100, b3v0010);
			
 
				+#else
			
 
				+		static const b3Matrix3x3 
			
 
				+        identityMatrix(
			
 
				+            b3Scalar(1.0), b3Scalar(0.0), b3Scalar(0.0), 
			
 
				+			b3Scalar(0.0), b3Scalar(1.0), b3Scalar(0.0), 
			
 
				+			b3Scalar(0.0), b3Scalar(0.0), b3Scalar(1.0));
			
 
				+#endif
			
 
				+		return identityMatrix;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
			
 
				+	* @param m The array to be filled */
			
 
				+	void getOpenGLSubMatrix(b3Scalar *m) const 
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+        __m128 v0 = m_el[0].mVec128;
			
 
				+        __m128 v1 = m_el[1].mVec128;
			
 
				+        __m128 v2 = m_el[2].mVec128;    //  x2 y2 z2 w2
			
 
				+        __m128 *vm = (__m128 *)m;
			
 
				+        __m128 vT;
			
 
				+        
			
 
				+        v2 = _mm_and_ps(v2, b3vFFF0fMask);  //  x2 y2 z2 0
			
 
				+        
			
 
				+        vT = _mm_unpackhi_ps(v0, v1);	//	z0 z1 * *
			
 
				+        v0 = _mm_unpacklo_ps(v0, v1);	//	x0 x1 y0 y1
			
 
				+
			
 
				+        v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3) );	// y0 y1 y2 0
			
 
				+        v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3) );	// x0 x1 x2 0
			
 
				+        v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT)));	// z0 z1 z2 0
			
 
				+
			
 
				+        vm[0] = v0;
			
 
				+        vm[1] = v1;
			
 
				+        vm[2] = v2;
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+        // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
			
 
				+        static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
			
 
				+        float32x4_t *vm = (float32x4_t *)m;
			
 
				+        float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  // {x0 x1 z0 z1}, {y0 y1 w0 w1}
			
 
				+        float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       // {x2  0 }, {y2 0}
			
 
				+        float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
			
 
				+        float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
			
 
				+        float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
			
 
				+        float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       // z0 z1 z2  0
			
 
				+
			
 
				+        vm[0] = v0;
			
 
				+        vm[1] = v1;
			
 
				+        vm[2] = v2;
			
 
				+#else
			
 
				+		m[0]  = b3Scalar(m_el[0].getX()); 
			
 
				+		m[1]  = b3Scalar(m_el[1].getX());
			
 
				+		m[2]  = b3Scalar(m_el[2].getX());
			
 
				+		m[3]  = b3Scalar(0.0); 
			
 
				+		m[4]  = b3Scalar(m_el[0].getY());
			
 
				+		m[5]  = b3Scalar(m_el[1].getY());
			
 
				+		m[6]  = b3Scalar(m_el[2].getY());
			
 
				+		m[7]  = b3Scalar(0.0); 
			
 
				+		m[8]  = b3Scalar(m_el[0].getZ()); 
			
 
				+		m[9]  = b3Scalar(m_el[1].getZ());
			
 
				+		m[10] = b3Scalar(m_el[2].getZ());
			
 
				+		m[11] = b3Scalar(0.0); 
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Get the matrix represented as a quaternion 
			
 
				+	* @param q The quaternion which will be set */
			
 
				+	void getRotation(b3Quaternion& q) const
			
 
				+	{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+        b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ();
			
 
				+        b3Scalar s, x;
			
 
				+        
			
 
				+        union {
			
 
				+            b3SimdFloat4 vec;
			
 
				+            b3Scalar f[4];
			
 
				+        } temp;
			
 
				+        
			
 
				+        if (trace > b3Scalar(0.0)) 
			
 
				+        {
			
 
				+            x = trace + b3Scalar(1.0);
			
 
				+
			
 
				+            temp.f[0]=m_el[2].getY() - m_el[1].getZ();
			
 
				+            temp.f[1]=m_el[0].getZ() - m_el[2].getX();
			
 
				+            temp.f[2]=m_el[1].getX() - m_el[0].getY();
			
 
				+            temp.f[3]=x;
			
 
				+            //temp.f[3]= s * b3Scalar(0.5);
			
 
				+        } 
			
 
				+        else 
			
 
				+        {
			
 
				+            int i, j, k;
			
 
				+            if(m_el[0].getX() < m_el[1].getY()) 
			
 
				+            { 
			
 
				+                if( m_el[1].getY() < m_el[2].getZ() )
			
 
				+                    { i = 2; j = 0; k = 1; }
			
 
				+                else
			
 
				+                    { i = 1; j = 2; k = 0; }
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                if( m_el[0].getX() < m_el[2].getZ())
			
 
				+                    { i = 2; j = 0; k = 1; }
			
 
				+                else
			
 
				+                    { i = 0; j = 1; k = 2; }
			
 
				+            }
			
 
				+
			
 
				+            x = m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0);
			
 
				+
			
 
				+            temp.f[3] = (m_el[k][j] - m_el[j][k]);
			
 
				+            temp.f[j] = (m_el[j][i] + m_el[i][j]);
			
 
				+            temp.f[k] = (m_el[k][i] + m_el[i][k]);
			
 
				+            temp.f[i] = x;
			
 
				+            //temp.f[i] = s * b3Scalar(0.5);
			
 
				+        }
			
 
				+
			
 
				+        s = b3Sqrt(x);
			
 
				+        q.set128(temp.vec);
			
 
				+        s = b3Scalar(0.5) / s;
			
 
				+
			
 
				+        q *= s;
			
 
				+#else    
			
 
				+		b3Scalar trace = m_el[0].getX() + m_el[1].getY() + m_el[2].getZ();
			
 
				+
			
 
				+		b3Scalar temp[4];
			
 
				+
			
 
				+		if (trace > b3Scalar(0.0)) 
			
 
				+		{
			
 
				+			b3Scalar s = b3Sqrt(trace + b3Scalar(1.0));
			
 
				+			temp[3]=(s * b3Scalar(0.5));
			
 
				+			s = b3Scalar(0.5) / s;
			
 
				+
			
 
				+			temp[0]=((m_el[2].getY() - m_el[1].getZ()) * s);
			
 
				+			temp[1]=((m_el[0].getZ() - m_el[2].getX()) * s);
			
 
				+			temp[2]=((m_el[1].getX() - m_el[0].getY()) * s);
			
 
				+		} 
			
 
				+		else 
			
 
				+		{
			
 
				+			int i = m_el[0].getX() < m_el[1].getY() ? 
			
 
				+				(m_el[1].getY() < m_el[2].getZ() ? 2 : 1) :
			
 
				+				(m_el[0].getX() < m_el[2].getZ() ? 2 : 0); 
			
 
				+			int j = (i + 1) % 3;  
			
 
				+			int k = (i + 2) % 3;
			
 
				+
			
 
				+			b3Scalar s = b3Sqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + b3Scalar(1.0));
			
 
				+			temp[i] = s * b3Scalar(0.5);
			
 
				+			s = b3Scalar(0.5) / s;
			
 
				+
			
 
				+			temp[3] = (m_el[k][j] - m_el[j][k]) * s;
			
 
				+			temp[j] = (m_el[j][i] + m_el[i][j]) * s;
			
 
				+			temp[k] = (m_el[k][i] + m_el[i][k]) * s;
			
 
				+		}
			
 
				+		q.setValue(temp[0],temp[1],temp[2],temp[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
			
 
				+	* @param yaw Yaw around Y axis
			
 
				+	* @param pitch Pitch around X axis
			
 
				+	* @param roll around Z axis */	
			
 
				+	void getEulerYPR(b3Scalar& yaw, b3Scalar& pitch, b3Scalar& roll) const
			
 
				+	{
			
 
				+
			
 
				+		// first use the normal calculus
			
 
				+		yaw = b3Scalar(b3Atan2(m_el[1].getX(), m_el[0].getX()));
			
 
				+		pitch = b3Scalar(b3Asin(-m_el[2].getX()));
			
 
				+		roll = b3Scalar(b3Atan2(m_el[2].getY(), m_el[2].getZ()));
			
 
				+
			
 
				+		// on pitch = +/-HalfPI
			
 
				+		if (b3Fabs(pitch)==B3_HALF_PI)
			
 
				+		{
			
 
				+			if (yaw>0)
			
 
				+				yaw-=B3_PI;
			
 
				+			else
			
 
				+				yaw+=B3_PI;
			
 
				+
			
 
				+			if (roll>0)
			
 
				+				roll-=B3_PI;
			
 
				+			else
			
 
				+				roll+=B3_PI;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+
			
 
				+	/**@brief Get the matrix represented as euler angles around ZYX
			
 
				+	* @param yaw Yaw around X axis
			
 
				+	* @param pitch Pitch around Y axis
			
 
				+	* @param roll around X axis 
			
 
				+	* @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/	
			
 
				+	void getEulerZYX(b3Scalar& yaw, b3Scalar& pitch, b3Scalar& roll, unsigned int solution_number = 1) const
			
 
				+	{
			
 
				+		struct Euler
			
 
				+		{
			
 
				+			b3Scalar yaw;
			
 
				+			b3Scalar pitch;
			
 
				+			b3Scalar roll;
			
 
				+		};
			
 
				+
			
 
				+		Euler euler_out;
			
 
				+		Euler euler_out2; //second solution
			
 
				+		//get the pointer to the raw data
			
 
				+
			
 
				+		// Check that pitch is not at a singularity
			
 
				+		if (b3Fabs(m_el[2].getX()) >= 1)
			
 
				+		{
			
 
				+			euler_out.yaw = 0;
			
 
				+			euler_out2.yaw = 0;
			
 
				+
			
 
				+			// From difference of angles formula
			
 
				+			b3Scalar delta = b3Atan2(m_el[0].getX(),m_el[0].getZ());
			
 
				+			if (m_el[2].getX() > 0)  //gimbal locked up
			
 
				+			{
			
 
				+				euler_out.pitch = B3_PI / b3Scalar(2.0);
			
 
				+				euler_out2.pitch = B3_PI / b3Scalar(2.0);
			
 
				+				euler_out.roll = euler_out.pitch + delta;
			
 
				+				euler_out2.roll = euler_out.pitch + delta;
			
 
				+			}
			
 
				+			else // gimbal locked down
			
 
				+			{
			
 
				+				euler_out.pitch = -B3_PI / b3Scalar(2.0);
			
 
				+				euler_out2.pitch = -B3_PI / b3Scalar(2.0);
			
 
				+				euler_out.roll = -euler_out.pitch + delta;
			
 
				+				euler_out2.roll = -euler_out.pitch + delta;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			euler_out.pitch = - b3Asin(m_el[2].getX());
			
 
				+			euler_out2.pitch = B3_PI - euler_out.pitch;
			
 
				+
			
 
				+			euler_out.roll = b3Atan2(m_el[2].getY()/b3Cos(euler_out.pitch), 
			
 
				+				m_el[2].getZ()/b3Cos(euler_out.pitch));
			
 
				+			euler_out2.roll = b3Atan2(m_el[2].getY()/b3Cos(euler_out2.pitch), 
			
 
				+				m_el[2].getZ()/b3Cos(euler_out2.pitch));
			
 
				+
			
 
				+			euler_out.yaw = b3Atan2(m_el[1].getX()/b3Cos(euler_out.pitch), 
			
 
				+				m_el[0].getX()/b3Cos(euler_out.pitch));
			
 
				+			euler_out2.yaw = b3Atan2(m_el[1].getX()/b3Cos(euler_out2.pitch), 
			
 
				+				m_el[0].getX()/b3Cos(euler_out2.pitch));
			
 
				+		}
			
 
				+
			
 
				+		if (solution_number == 1)
			
 
				+		{ 
			
 
				+			yaw = euler_out.yaw; 
			
 
				+			pitch = euler_out.pitch;
			
 
				+			roll = euler_out.roll;
			
 
				+		}
			
 
				+		else
			
 
				+		{ 
			
 
				+			yaw = euler_out2.yaw; 
			
 
				+			pitch = euler_out2.pitch;
			
 
				+			roll = euler_out2.roll;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Create a scaled copy of the matrix 
			
 
				+	* @param s Scaling vector The elements of the vector will scale each column */
			
 
				+
			
 
				+	b3Matrix3x3 scaled(const b3Vector3& s) const
			
 
				+	{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+		return b3Matrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
			
 
				+#else		
			
 
				+		return b3Matrix3x3(
			
 
				+            m_el[0].getX() * s.getX(), m_el[0].getY() * s.getY(), m_el[0].getZ() * s.getZ(),
			
 
				+			m_el[1].getX() * s.getX(), m_el[1].getY() * s.getY(), m_el[1].getZ() * s.getZ(),
			
 
				+			m_el[2].getX() * s.getX(), m_el[2].getY() * s.getY(), m_el[2].getZ() * s.getZ());
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the determinant of the matrix */
			
 
				+	b3Scalar            determinant() const;
			
 
				+	/**@brief Return the adjoint of the matrix */
			
 
				+	b3Matrix3x3 adjoint() const;
			
 
				+	/**@brief Return the matrix with all values non negative */
			
 
				+	b3Matrix3x3 absolute() const;
			
 
				+	/**@brief Return the transpose of the matrix */
			
 
				+	b3Matrix3x3 transpose() const;
			
 
				+	/**@brief Return the inverse of the matrix */
			
 
				+	b3Matrix3x3 inverse() const; 
			
 
				+
			
 
				+	b3Matrix3x3 transposeTimes(const b3Matrix3x3& m) const;
			
 
				+	b3Matrix3x3 timesTranspose(const b3Matrix3x3& m) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Scalar tdotx(const b3Vector3& v) const 
			
 
				+	{
			
 
				+		return m_el[0].getX() * v.getX() + m_el[1].getX() * v.getY() + m_el[2].getX() * v.getZ();
			
 
				+	}
			
 
				+	B3_FORCE_INLINE b3Scalar tdoty(const b3Vector3& v) const 
			
 
				+	{
			
 
				+		return m_el[0].getY() * v.getX() + m_el[1].getY() * v.getY() + m_el[2].getY() * v.getZ();
			
 
				+	}
			
 
				+	B3_FORCE_INLINE b3Scalar tdotz(const b3Vector3& v) const 
			
 
				+	{
			
 
				+		return m_el[0].getZ() * v.getX() + m_el[1].getZ() * v.getY() + m_el[2].getZ() * v.getZ();
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	/**@brief diagonalizes this matrix by the Jacobi method.
			
 
				+	* @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
			
 
				+	* coordinate system, i.e., old_this = rot * new_this * rot^T. 
			
 
				+	* @param threshold See iteration
			
 
				+	* @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied 
			
 
				+	* by the sum of the absolute values of the diagonal, or when maxSteps have been executed. 
			
 
				+	* 
			
 
				+	* Note that this matrix is assumed to be symmetric. 
			
 
				+	*/
			
 
				+	void diagonalize(b3Matrix3x3& rot, b3Scalar threshold, int maxSteps)
			
 
				+	{
			
 
				+		rot.setIdentity();
			
 
				+		for (int step = maxSteps; step > 0; step--)
			
 
				+		{
			
 
				+			// find off-diagonal element [p][q] with largest magnitude
			
 
				+			int p = 0;
			
 
				+			int q = 1;
			
 
				+			int r = 2;
			
 
				+			b3Scalar max = b3Fabs(m_el[0][1]);
			
 
				+			b3Scalar v = b3Fabs(m_el[0][2]);
			
 
				+			if (v > max)
			
 
				+			{
			
 
				+				q = 2;
			
 
				+				r = 1;
			
 
				+				max = v;
			
 
				+			}
			
 
				+			v = b3Fabs(m_el[1][2]);
			
 
				+			if (v > max)
			
 
				+			{
			
 
				+				p = 1;
			
 
				+				q = 2;
			
 
				+				r = 0;
			
 
				+				max = v;
			
 
				+			}
			
 
				+
			
 
				+			b3Scalar t = threshold * (b3Fabs(m_el[0][0]) + b3Fabs(m_el[1][1]) + b3Fabs(m_el[2][2]));
			
 
				+			if (max <= t)
			
 
				+			{
			
 
				+				if (max <= B3_EPSILON * t)
			
 
				+				{
			
 
				+					return;
			
 
				+				}
			
 
				+				step = 1;
			
 
				+			}
			
 
				+
			
 
				+			// compute Jacobi rotation J which leads to a zero for element [p][q] 
			
 
				+			b3Scalar mpq = m_el[p][q];
			
 
				+			b3Scalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
			
 
				+			b3Scalar theta2 = theta * theta;
			
 
				+			b3Scalar cos;
			
 
				+			b3Scalar sin;
			
 
				+			if (theta2 * theta2 < b3Scalar(10 / B3_EPSILON))
			
 
				+			{
			
 
				+				t = (theta >= 0) ? 1 / (theta + b3Sqrt(1 + theta2))
			
 
				+					: 1 / (theta - b3Sqrt(1 + theta2));
			
 
				+				cos = 1 / b3Sqrt(1 + t * t);
			
 
				+				sin = cos * t;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// approximation for large theta-value, i.e., a nearly diagonal matrix
			
 
				+				t = 1 / (theta * (2 + b3Scalar(0.5) / theta2));
			
 
				+				cos = 1 - b3Scalar(0.5) * t * t;
			
 
				+				sin = cos * t;
			
 
				+			}
			
 
				+
			
 
				+			// apply rotation to matrix (this = J^T * this * J)
			
 
				+			m_el[p][q] = m_el[q][p] = 0;
			
 
				+			m_el[p][p] -= t * mpq;
			
 
				+			m_el[q][q] += t * mpq;
			
 
				+			b3Scalar mrp = m_el[r][p];
			
 
				+			b3Scalar mrq = m_el[r][q];
			
 
				+			m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
			
 
				+			m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
			
 
				+
			
 
				+			// apply rotation to rot (rot = rot * J)
			
 
				+			for (int i = 0; i < 3; i++)
			
 
				+			{
			
 
				+				b3Vector3& row = rot[i];
			
 
				+				mrp = row[p];
			
 
				+				mrq = row[q];
			
 
				+				row[p] = cos * mrp - sin * mrq;
			
 
				+				row[q] = cos * mrq + sin * mrp;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	/**@brief Calculate the matrix cofactor 
			
 
				+	* @param r1 The first row to use for calculating the cofactor
			
 
				+	* @param c1 The first column to use for calculating the cofactor
			
 
				+	* @param r1 The second row to use for calculating the cofactor
			
 
				+	* @param c1 The second column to use for calculating the cofactor
			
 
				+	* See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
			
 
				+	*/
			
 
				+	b3Scalar cofac(int r1, int c1, int r2, int c2) const 
			
 
				+	{
			
 
				+		return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
			
 
				+	}
			
 
				+
			
 
				+	void	serialize(struct	b3Matrix3x3Data& dataOut) const;
			
 
				+
			
 
				+	void	serializeFloat(struct	b3Matrix3x3FloatData& dataOut) const;
			
 
				+
			
 
				+	void	deSerialize(const struct	b3Matrix3x3Data& dataIn);
			
 
				+
			
 
				+	void	deSerializeFloat(const struct	b3Matrix3x3FloatData& dataIn);
			
 
				+
			
 
				+	void	deSerializeDouble(const struct	b3Matrix3x3DoubleData& dataIn);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3& 
			
 
				+b3Matrix3x3::operator*=(const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+    __m128 rv00, rv01, rv02;
			
 
				+    __m128 rv10, rv11, rv12;
			
 
				+    __m128 rv20, rv21, rv22;
			
 
				+    __m128 mv0, mv1, mv2;
			
 
				+
			
 
				+    rv02 = m_el[0].mVec128;
			
 
				+    rv12 = m_el[1].mVec128;
			
 
				+    rv22 = m_el[2].mVec128;
			
 
				+
			
 
				+    mv0 = _mm_and_ps(m[0].mVec128, b3vFFF0fMask); 
			
 
				+    mv1 = _mm_and_ps(m[1].mVec128, b3vFFF0fMask); 
			
 
				+    mv2 = _mm_and_ps(m[2].mVec128, b3vFFF0fMask); 
			
 
				+    
			
 
				+    // rv0
			
 
				+    rv00 = b3_splat_ps(rv02, 0);
			
 
				+    rv01 = b3_splat_ps(rv02, 1);
			
 
				+    rv02 = b3_splat_ps(rv02, 2);
			
 
				+    
			
 
				+    rv00 = _mm_mul_ps(rv00, mv0);
			
 
				+    rv01 = _mm_mul_ps(rv01, mv1);
			
 
				+    rv02 = _mm_mul_ps(rv02, mv2);
			
 
				+    
			
 
				+    // rv1
			
 
				+    rv10 = b3_splat_ps(rv12, 0);
			
 
				+    rv11 = b3_splat_ps(rv12, 1);
			
 
				+    rv12 = b3_splat_ps(rv12, 2);
			
 
				+    
			
 
				+    rv10 = _mm_mul_ps(rv10, mv0);
			
 
				+    rv11 = _mm_mul_ps(rv11, mv1);
			
 
				+    rv12 = _mm_mul_ps(rv12, mv2);
			
 
				+    
			
 
				+    // rv2
			
 
				+    rv20 = b3_splat_ps(rv22, 0);
			
 
				+    rv21 = b3_splat_ps(rv22, 1);
			
 
				+    rv22 = b3_splat_ps(rv22, 2);
			
 
				+    
			
 
				+    rv20 = _mm_mul_ps(rv20, mv0);
			
 
				+    rv21 = _mm_mul_ps(rv21, mv1);
			
 
				+    rv22 = _mm_mul_ps(rv22, mv2);
			
 
				+
			
 
				+    rv00 = _mm_add_ps(rv00, rv01);
			
 
				+    rv10 = _mm_add_ps(rv10, rv11);
			
 
				+    rv20 = _mm_add_ps(rv20, rv21);
			
 
				+
			
 
				+    m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
			
 
				+    m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
			
 
				+    m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+    float32x4_t rv0, rv1, rv2;
			
 
				+    float32x4_t v0, v1, v2;
			
 
				+    float32x4_t mv0, mv1, mv2;
			
 
				+
			
 
				+    v0 = m_el[0].mVec128;
			
 
				+    v1 = m_el[1].mVec128;
			
 
				+    v2 = m_el[2].mVec128;
			
 
				+
			
 
				+    mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask); 
			
 
				+    mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask); 
			
 
				+    mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask); 
			
 
				+    
			
 
				+    rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
			
 
				+    rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
			
 
				+    rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
			
 
				+    
			
 
				+    rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
			
 
				+    rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
			
 
				+    rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
			
 
				+    
			
 
				+    rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
			
 
				+    rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
			
 
				+    rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
			
 
				+
			
 
				+    m_el[0].mVec128 = rv0;
			
 
				+    m_el[1].mVec128 = rv1;
			
 
				+    m_el[2].mVec128 = rv2;
			
 
				+#else    
			
 
				+	setValue(
			
 
				+        m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
			
 
				+		m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
			
 
				+		m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
			
 
				+#endif
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3& 
			
 
				+b3Matrix3x3::operator+=(const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+    m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
			
 
				+    m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
			
 
				+    m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
			
 
				+#else
			
 
				+	setValue(
			
 
				+		m_el[0][0]+m.m_el[0][0], 
			
 
				+		m_el[0][1]+m.m_el[0][1],
			
 
				+		m_el[0][2]+m.m_el[0][2],
			
 
				+		m_el[1][0]+m.m_el[1][0], 
			
 
				+		m_el[1][1]+m.m_el[1][1],
			
 
				+		m_el[1][2]+m.m_el[1][2],
			
 
				+		m_el[2][0]+m.m_el[2][0], 
			
 
				+		m_el[2][1]+m.m_el[2][1],
			
 
				+		m_el[2][2]+m.m_el[2][2]);
			
 
				+#endif
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3
			
 
				+operator*(const b3Matrix3x3& m, const b3Scalar & k)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+    __m128 vk = b3_splat_ps(_mm_load_ss((float *)&k), 0x80);
			
 
				+    return b3Matrix3x3(
			
 
				+                _mm_mul_ps(m[0].mVec128, vk), 
			
 
				+                _mm_mul_ps(m[1].mVec128, vk), 
			
 
				+                _mm_mul_ps(m[2].mVec128, vk)); 
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+    return b3Matrix3x3(
			
 
				+                vmulq_n_f32(m[0].mVec128, k),
			
 
				+                vmulq_n_f32(m[1].mVec128, k),
			
 
				+                vmulq_n_f32(m[2].mVec128, k)); 
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m[0].getX()*k,m[0].getY()*k,m[0].getZ()*k,
			
 
				+		m[1].getX()*k,m[1].getY()*k,m[1].getZ()*k,
			
 
				+		m[2].getX()*k,m[2].getY()*k,m[2].getZ()*k);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+operator+(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+	return b3Matrix3x3(
			
 
				+        m1[0].mVec128 + m2[0].mVec128,
			
 
				+        m1[1].mVec128 + m2[1].mVec128,
			
 
				+        m1[2].mVec128 + m2[2].mVec128);
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+        m1[0][0]+m2[0][0], 
			
 
				+        m1[0][1]+m2[0][1],
			
 
				+        m1[0][2]+m2[0][2],
			
 
				+        
			
 
				+        m1[1][0]+m2[1][0], 
			
 
				+        m1[1][1]+m2[1][1],
			
 
				+        m1[1][2]+m2[1][2],
			
 
				+        
			
 
				+        m1[2][0]+m2[2][0], 
			
 
				+        m1[2][1]+m2[2][1],
			
 
				+        m1[2][2]+m2[2][2]);
			
 
				+#endif    
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+operator-(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+	return b3Matrix3x3(
			
 
				+        m1[0].mVec128 - m2[0].mVec128,
			
 
				+        m1[1].mVec128 - m2[1].mVec128,
			
 
				+        m1[2].mVec128 - m2[2].mVec128);
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+        m1[0][0]-m2[0][0], 
			
 
				+        m1[0][1]-m2[0][1],
			
 
				+        m1[0][2]-m2[0][2],
			
 
				+        
			
 
				+        m1[1][0]-m2[1][0], 
			
 
				+        m1[1][1]-m2[1][1],
			
 
				+        m1[1][2]-m2[1][2],
			
 
				+        
			
 
				+        m1[2][0]-m2[2][0], 
			
 
				+        m1[2][1]-m2[2][1],
			
 
				+        m1[2][2]-m2[2][2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3& 
			
 
				+b3Matrix3x3::operator-=(const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+    m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
			
 
				+    m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
			
 
				+    m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
			
 
				+#else
			
 
				+	setValue(
			
 
				+	m_el[0][0]-m.m_el[0][0], 
			
 
				+	m_el[0][1]-m.m_el[0][1],
			
 
				+	m_el[0][2]-m.m_el[0][2],
			
 
				+	m_el[1][0]-m.m_el[1][0], 
			
 
				+	m_el[1][1]-m.m_el[1][1],
			
 
				+	m_el[1][2]-m.m_el[1][2],
			
 
				+	m_el[2][0]-m.m_el[2][0], 
			
 
				+	m_el[2][1]-m.m_el[2][1],
			
 
				+	m_el[2][2]-m.m_el[2][2]);
			
 
				+#endif
			
 
				+	return *this;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar 
			
 
				+b3Matrix3x3::determinant() const
			
 
				+{ 
			
 
				+	return b3Triple((*this)[0], (*this)[1], (*this)[2]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+b3Matrix3x3::absolute() const
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+    return b3Matrix3x3(
			
 
				+            _mm_and_ps(m_el[0].mVec128, b3vAbsfMask),
			
 
				+            _mm_and_ps(m_el[1].mVec128, b3vAbsfMask),
			
 
				+            _mm_and_ps(m_el[2].mVec128, b3vAbsfMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+    return b3Matrix3x3(
			
 
				+            (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, b3v3AbsMask),
			
 
				+            (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, b3v3AbsMask),
			
 
				+            (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, b3v3AbsMask));
			
 
				+#else	
			
 
				+	return b3Matrix3x3(
			
 
				+            b3Fabs(m_el[0].getX()), b3Fabs(m_el[0].getY()), b3Fabs(m_el[0].getZ()),
			
 
				+            b3Fabs(m_el[1].getX()), b3Fabs(m_el[1].getY()), b3Fabs(m_el[1].getZ()),
			
 
				+            b3Fabs(m_el[2].getX()), b3Fabs(m_el[2].getY()), b3Fabs(m_el[2].getZ()));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+b3Matrix3x3::transpose() const 
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+    __m128 v0 = m_el[0].mVec128;
			
 
				+    __m128 v1 = m_el[1].mVec128;
			
 
				+    __m128 v2 = m_el[2].mVec128;    //  x2 y2 z2 w2
			
 
				+    __m128 vT;
			
 
				+    
			
 
				+    v2 = _mm_and_ps(v2, b3vFFF0fMask);  //  x2 y2 z2 0
			
 
				+    
			
 
				+    vT = _mm_unpackhi_ps(v0, v1);	//	z0 z1 * *
			
 
				+    v0 = _mm_unpacklo_ps(v0, v1);	//	x0 x1 y0 y1
			
 
				+
			
 
				+    v1 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(2, 3, 1, 3) );	// y0 y1 y2 0
			
 
				+    v0 = _mm_shuffle_ps(v0, v2, B3_SHUFFLE(0, 1, 0, 3) );	// x0 x1 x2 0
			
 
				+    v2 = b3CastdTo128f(_mm_move_sd(b3CastfTo128d(v2), b3CastfTo128d(vT)));	// z0 z1 z2 0
			
 
				+
			
 
				+
			
 
				+    return b3Matrix3x3( v0, v1, v2 );
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+    // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
			
 
				+    static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
			
 
				+    float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  // {x0 x1 z0 z1}, {y0 y1 w0 w1}
			
 
				+    float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       // {x2  0 }, {y2 0}
			
 
				+    float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
			
 
				+    float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
			
 
				+    float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
			
 
				+    float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       // z0 z1 z2  0
			
 
				+    return b3Matrix3x3( v0, v1, v2 ); 
			
 
				+#else
			
 
				+	return b3Matrix3x3( m_el[0].getX(), m_el[1].getX(), m_el[2].getX(),
			
 
				+                        m_el[0].getY(), m_el[1].getY(), m_el[2].getY(),
			
 
				+                        m_el[0].getZ(), m_el[1].getZ(), m_el[2].getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+b3Matrix3x3::adjoint() const 
			
 
				+{
			
 
				+	return b3Matrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
			
 
				+		cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
			
 
				+		cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+b3Matrix3x3::inverse() const
			
 
				+{
			
 
				+	b3Vector3 co = b3MakeVector3(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
			
 
				+	b3Scalar det = (*this)[0].dot(co);
			
 
				+	b3FullAssert(det != b3Scalar(0.0));
			
 
				+	b3Scalar s = b3Scalar(1.0) / det;
			
 
				+	return b3Matrix3x3(co.getX() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
			
 
				+		co.getY() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
			
 
				+		co.getZ() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+b3Matrix3x3::transposeTimes(const b3Matrix3x3& m) const
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+    // zeros w
			
 
				+//    static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
			
 
				+    __m128 row = m_el[0].mVec128;
			
 
				+    __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, b3vFFF0fMask );
			
 
				+    __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, b3vFFF0fMask);
			
 
				+    __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, b3vFFF0fMask );
			
 
				+    __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
			
 
				+    __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
			
 
				+    __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
			
 
				+    row = m_el[1].mVec128;
			
 
				+    r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
			
 
				+    r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
			
 
				+    r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
			
 
				+    row = m_el[2].mVec128;
			
 
				+    r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
			
 
				+    r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
			
 
				+    r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
			
 
				+    return b3Matrix3x3( r0, r1, r2 );
			
 
				+
			
 
				+#elif defined B3_USE_NEON
			
 
				+    // zeros w
			
 
				+    static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
			
 
				+    float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
			
 
				+    float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
			
 
				+    float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
			
 
				+    float32x4_t row = m_el[0].mVec128;
			
 
				+    float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
			
 
				+    float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
			
 
				+    float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
			
 
				+    row = m_el[1].mVec128;
			
 
				+    r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
			
 
				+    r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
			
 
				+    r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
			
 
				+    row = m_el[2].mVec128;
			
 
				+    r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
			
 
				+    r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
			
 
				+    r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
			
 
				+    return b3Matrix3x3( r0, r1, r2 );
			
 
				+#else
			
 
				+    return b3Matrix3x3(
			
 
				+		m_el[0].getX() * m[0].getX() + m_el[1].getX() * m[1].getX() + m_el[2].getX() * m[2].getX(),
			
 
				+		m_el[0].getX() * m[0].getY() + m_el[1].getX() * m[1].getY() + m_el[2].getX() * m[2].getY(),
			
 
				+		m_el[0].getX() * m[0].getZ() + m_el[1].getX() * m[1].getZ() + m_el[2].getX() * m[2].getZ(),
			
 
				+		m_el[0].getY() * m[0].getX() + m_el[1].getY() * m[1].getX() + m_el[2].getY() * m[2].getX(),
			
 
				+		m_el[0].getY() * m[0].getY() + m_el[1].getY() * m[1].getY() + m_el[2].getY() * m[2].getY(),
			
 
				+		m_el[0].getY() * m[0].getZ() + m_el[1].getY() * m[1].getZ() + m_el[2].getY() * m[2].getZ(),
			
 
				+		m_el[0].getZ() * m[0].getX() + m_el[1].getZ() * m[1].getX() + m_el[2].getZ() * m[2].getX(),
			
 
				+		m_el[0].getZ() * m[0].getY() + m_el[1].getZ() * m[1].getY() + m_el[2].getZ() * m[2].getY(),
			
 
				+		m_el[0].getZ() * m[0].getZ() + m_el[1].getZ() * m[1].getZ() + m_el[2].getZ() * m[2].getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+b3Matrix3x3::timesTranspose(const b3Matrix3x3& m) const
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+    __m128 a0 = m_el[0].mVec128;
			
 
				+    __m128 a1 = m_el[1].mVec128;
			
 
				+    __m128 a2 = m_el[2].mVec128;
			
 
				+    
			
 
				+    b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
			
 
				+    __m128 mx = mT[0].mVec128;
			
 
				+    __m128 my = mT[1].mVec128;
			
 
				+    __m128 mz = mT[2].mVec128;
			
 
				+    
			
 
				+    __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
			
 
				+    __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
			
 
				+    __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
			
 
				+    r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
			
 
				+    r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
			
 
				+    r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
			
 
				+    r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
			
 
				+    r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
			
 
				+    r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
			
 
				+    return b3Matrix3x3( r0, r1, r2);
			
 
				+            
			
 
				+#elif defined B3_USE_NEON
			
 
				+    float32x4_t a0 = m_el[0].mVec128;
			
 
				+    float32x4_t a1 = m_el[1].mVec128;
			
 
				+    float32x4_t a2 = m_el[2].mVec128;
			
 
				+    
			
 
				+    b3Matrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
			
 
				+    float32x4_t mx = mT[0].mVec128;
			
 
				+    float32x4_t my = mT[1].mVec128;
			
 
				+    float32x4_t mz = mT[2].mVec128;
			
 
				+    
			
 
				+    float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
			
 
				+    float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
			
 
				+    float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
			
 
				+    r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
			
 
				+    r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
			
 
				+    r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
			
 
				+    r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
			
 
				+    r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
			
 
				+    r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
			
 
				+    return b3Matrix3x3( r0, r1, r2 );
			
 
				+    
			
 
				+#else
			
 
				+	return b3Matrix3x3(
			
 
				+		m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
			
 
				+		m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
			
 
				+		m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator*(const b3Matrix3x3& m, const b3Vector3& v) 
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))|| defined (B3_USE_NEON)
			
 
				+    return v.dot3(m[0], m[1], m[2]);
			
 
				+#else
			
 
				+	return b3MakeVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator*(const b3Vector3& v, const b3Matrix3x3& m)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+
			
 
				+    const __m128 vv = v.mVec128;
			
 
				+
			
 
				+    __m128 c0 = b3_splat_ps( vv, 0);
			
 
				+    __m128 c1 = b3_splat_ps( vv, 1);
			
 
				+    __m128 c2 = b3_splat_ps( vv, 2);
			
 
				+
			
 
				+    c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, b3vFFF0fMask) );
			
 
				+    c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, b3vFFF0fMask) );
			
 
				+    c0 = _mm_add_ps(c0, c1);
			
 
				+    c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, b3vFFF0fMask) );
			
 
				+    
			
 
				+    return b3MakeVector3(_mm_add_ps(c0, c2));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+    const float32x4_t vv = v.mVec128;
			
 
				+    const float32x2_t vlo = vget_low_f32(vv);
			
 
				+    const float32x2_t vhi = vget_high_f32(vv);
			
 
				+
			
 
				+    float32x4_t c0, c1, c2;
			
 
				+
			
 
				+    c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, b3vFFF0Mask);
			
 
				+    c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, b3vFFF0Mask);
			
 
				+    c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, b3vFFF0Mask);
			
 
				+
			
 
				+    c0 = vmulq_lane_f32(c0, vlo, 0);
			
 
				+    c1 = vmulq_lane_f32(c1, vlo, 1);
			
 
				+    c2 = vmulq_lane_f32(c2, vhi, 0);
			
 
				+    c0 = vaddq_f32(c0, c1);
			
 
				+    c0 = vaddq_f32(c0, c2);
			
 
				+    
			
 
				+    return b3MakeVector3(c0);
			
 
				+#else
			
 
				+	return b3MakeVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Matrix3x3 
			
 
				+operator*(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+
			
 
				+    __m128 m10 = m1[0].mVec128;  
			
 
				+    __m128 m11 = m1[1].mVec128;
			
 
				+    __m128 m12 = m1[2].mVec128;
			
 
				+    
			
 
				+    __m128 m2v = _mm_and_ps(m2[0].mVec128, b3vFFF0fMask);
			
 
				+    
			
 
				+    __m128 c0 = b3_splat_ps( m10, 0);
			
 
				+    __m128 c1 = b3_splat_ps( m11, 0);
			
 
				+    __m128 c2 = b3_splat_ps( m12, 0);
			
 
				+    
			
 
				+    c0 = _mm_mul_ps(c0, m2v);
			
 
				+    c1 = _mm_mul_ps(c1, m2v);
			
 
				+    c2 = _mm_mul_ps(c2, m2v);
			
 
				+    
			
 
				+    m2v = _mm_and_ps(m2[1].mVec128, b3vFFF0fMask);
			
 
				+    
			
 
				+    __m128 c0_1 = b3_splat_ps( m10, 1);
			
 
				+    __m128 c1_1 = b3_splat_ps( m11, 1);
			
 
				+    __m128 c2_1 = b3_splat_ps( m12, 1);
			
 
				+    
			
 
				+    c0_1 = _mm_mul_ps(c0_1, m2v);
			
 
				+    c1_1 = _mm_mul_ps(c1_1, m2v);
			
 
				+    c2_1 = _mm_mul_ps(c2_1, m2v);
			
 
				+    
			
 
				+    m2v = _mm_and_ps(m2[2].mVec128, b3vFFF0fMask);
			
 
				+    
			
 
				+    c0 = _mm_add_ps(c0, c0_1);
			
 
				+    c1 = _mm_add_ps(c1, c1_1);
			
 
				+    c2 = _mm_add_ps(c2, c2_1);
			
 
				+    
			
 
				+    m10 = b3_splat_ps( m10, 2);
			
 
				+    m11 = b3_splat_ps( m11, 2);
			
 
				+    m12 = b3_splat_ps( m12, 2);
			
 
				+    
			
 
				+    m10 = _mm_mul_ps(m10, m2v);
			
 
				+    m11 = _mm_mul_ps(m11, m2v);
			
 
				+    m12 = _mm_mul_ps(m12, m2v);
			
 
				+    
			
 
				+    c0 = _mm_add_ps(c0, m10);
			
 
				+    c1 = _mm_add_ps(c1, m11);
			
 
				+    c2 = _mm_add_ps(c2, m12);
			
 
				+    
			
 
				+    return b3Matrix3x3(c0, c1, c2);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+
			
 
				+    float32x4_t rv0, rv1, rv2;
			
 
				+    float32x4_t v0, v1, v2;
			
 
				+    float32x4_t mv0, mv1, mv2;
			
 
				+
			
 
				+    v0 = m1[0].mVec128;
			
 
				+    v1 = m1[1].mVec128;
			
 
				+    v2 = m1[2].mVec128;
			
 
				+
			
 
				+    mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, b3vFFF0Mask); 
			
 
				+    mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, b3vFFF0Mask); 
			
 
				+    mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, b3vFFF0Mask); 
			
 
				+    
			
 
				+    rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
			
 
				+    rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
			
 
				+    rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
			
 
				+    
			
 
				+    rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
			
 
				+    rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
			
 
				+    rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
			
 
				+    
			
 
				+    rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
			
 
				+    rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
			
 
				+    rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
			
 
				+
			
 
				+	return b3Matrix3x3(rv0, rv1, rv2);
			
 
				+        
			
 
				+#else	
			
 
				+	return b3Matrix3x3(
			
 
				+		m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
			
 
				+		m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
			
 
				+		m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+B3_FORCE_INLINE b3Matrix3x3 b3MultTransposeLeft(const b3Matrix3x3& m1, const b3Matrix3x3& m2) {
			
 
				+return b3Matrix3x3(
			
 
				+m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
			
 
				+m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
			
 
				+m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
			
 
				+m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
			
 
				+m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
			
 
				+m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
			
 
				+m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
			
 
				+m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
			
 
				+m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+/**@brief Equality operator between two matrices
			
 
				+* It will test all elements are equal.  */
			
 
				+B3_FORCE_INLINE bool operator==(const b3Matrix3x3& m1, const b3Matrix3x3& m2)
			
 
				+{
			
 
				+#if (defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+
			
 
				+    __m128 c0, c1, c2;
			
 
				+
			
 
				+    c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
			
 
				+    c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
			
 
				+    c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
			
 
				+    
			
 
				+    c0 = _mm_and_ps(c0, c1);
			
 
				+    c0 = _mm_and_ps(c0, c2);
			
 
				+
			
 
				+    return (0x7 == _mm_movemask_ps((__m128)c0));
			
 
				+#else 
			
 
				+	return 
			
 
				+    (   m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
			
 
				+		m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
			
 
				+		m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+///for serialization
			
 
				+struct	b3Matrix3x3FloatData
			
 
				+{
			
 
				+	b3Vector3FloatData m_el[3];
			
 
				+};
			
 
				+
			
 
				+///for serialization
			
 
				+struct	b3Matrix3x3DoubleData
			
 
				+{
			
 
				+	b3Vector3DoubleData m_el[3];
			
 
				+};
			
 
				+
			
 
				+
			
 
				+	
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Matrix3x3::serialize(struct	b3Matrix3x3Data& dataOut) const
			
 
				+{
			
 
				+	for (int i=0;i<3;i++)
			
 
				+		m_el[i].serialize(dataOut.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Matrix3x3::serializeFloat(struct	b3Matrix3x3FloatData& dataOut) const
			
 
				+{
			
 
				+	for (int i=0;i<3;i++)
			
 
				+		m_el[i].serializeFloat(dataOut.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Matrix3x3::deSerialize(const struct	b3Matrix3x3Data& dataIn)
			
 
				+{
			
 
				+	for (int i=0;i<3;i++)
			
 
				+		m_el[i].deSerialize(dataIn.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Matrix3x3::deSerializeFloat(const struct	b3Matrix3x3FloatData& dataIn)
			
 
				+{
			
 
				+	for (int i=0;i<3;i++)
			
 
				+		m_el[i].deSerializeFloat(dataIn.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Matrix3x3::deSerializeDouble(const struct	b3Matrix3x3DoubleData& dataIn)
			
 
				+{
			
 
				+	for (int i=0;i<3;i++)
			
 
				+		m_el[i].deSerializeDouble(dataIn.m_el[i]);
			
 
				+}
			
 
				+
			
 
				+#endif //B3_MATRIX3x3_H
			
 
				+
			
--- a/include/Bullet3Common/b3MinMax.h
+++ b/include/Bullet3Common/b3MinMax.h
@@ -0,0 +1,71 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_GEN_MINMAX_H
			
 
				+#define B3_GEN_MINMAX_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE const T& b3Min(const T& a, const T& b) 
			
 
				+{
			
 
				+  return a < b ? a : b ;
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE const T& b3Max(const T& a, const T& b) 
			
 
				+{
			
 
				+  return  a > b ? a : b;
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE const T& b3Clamped(const T& a, const T& lb, const T& ub) 
			
 
				+{
			
 
				+	return a < lb ? lb : (ub < a ? ub : a); 
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3SetMin(T& a, const T& b) 
			
 
				+{
			
 
				+    if (b < a) 
			
 
				+	{
			
 
				+		a = b;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3SetMax(T& a, const T& b) 
			
 
				+{
			
 
				+    if (a < b) 
			
 
				+	{
			
 
				+		a = b;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3Clamp(T& a, const T& lb, const T& ub) 
			
 
				+{
			
 
				+	if (a < lb) 
			
 
				+	{
			
 
				+		a = lb; 
			
 
				+	}
			
 
				+	else if (ub < a) 
			
 
				+	{
			
 
				+		a = ub;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif //B3_GEN_MINMAX_H
			
--- a/include/Bullet3Common/b3PoolAllocator.h
+++ b/include/Bullet3Common/b3PoolAllocator.h
@@ -0,0 +1,121 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef _BT_POOL_ALLOCATOR_H
			
 
				+#define _BT_POOL_ALLOCATOR_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+///The b3PoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately.
			
 
				+class b3PoolAllocator
			
 
				+{
			
 
				+	int				m_elemSize;
			
 
				+	int				m_maxElements;
			
 
				+	int				m_freeCount;
			
 
				+	void*			m_firstFree;
			
 
				+	unsigned char*	m_pool;
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	b3PoolAllocator(int elemSize, int maxElements)
			
 
				+		:m_elemSize(elemSize),
			
 
				+		m_maxElements(maxElements)
			
 
				+	{
			
 
				+		m_pool = (unsigned char*) b3AlignedAlloc( static_cast<unsigned int>(m_elemSize*m_maxElements),16);
			
 
				+
			
 
				+		unsigned char* p = m_pool;
			
 
				+        m_firstFree = p;
			
 
				+        m_freeCount = m_maxElements;
			
 
				+        int count = m_maxElements;
			
 
				+        while (--count) {
			
 
				+            *(void**)p = (p + m_elemSize);
			
 
				+            p += m_elemSize;
			
 
				+        }
			
 
				+        *(void**)p = 0;
			
 
				+    }
			
 
				+
			
 
				+	~b3PoolAllocator()
			
 
				+	{
			
 
				+		b3AlignedFree( m_pool);
			
 
				+	}
			
 
				+
			
 
				+	int	getFreeCount() const
			
 
				+	{
			
 
				+		return m_freeCount;
			
 
				+	}
			
 
				+
			
 
				+	int getUsedCount() const
			
 
				+	{
			
 
				+		return m_maxElements - m_freeCount;
			
 
				+	}
			
 
				+
			
 
				+	int getMaxCount() const
			
 
				+	{
			
 
				+		return m_maxElements;
			
 
				+	}
			
 
				+
			
 
				+	void*	allocate(int size)
			
 
				+	{
			
 
				+		// release mode fix
			
 
				+		(void)size;
			
 
				+		b3Assert(!size || size<=m_elemSize);
			
 
				+		b3Assert(m_freeCount>0);
			
 
				+        void* result = m_firstFree;
			
 
				+        m_firstFree = *(void**)m_firstFree;
			
 
				+        --m_freeCount;
			
 
				+        return result;
			
 
				+	}
			
 
				+
			
 
				+	bool validPtr(void* ptr)
			
 
				+	{
			
 
				+		if (ptr) {
			
 
				+			if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize))
			
 
				+			{
			
 
				+				return true;
			
 
				+			}
			
 
				+		}
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	void	freeMemory(void* ptr)
			
 
				+	{
			
 
				+		 if (ptr) {
			
 
				+            b3Assert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize);
			
 
				+
			
 
				+            *(void**)ptr = m_firstFree;
			
 
				+            m_firstFree = ptr;
			
 
				+            ++m_freeCount;
			
 
				+        }
			
 
				+	}
			
 
				+
			
 
				+	int	getElementSize() const
			
 
				+	{
			
 
				+		return m_elemSize;
			
 
				+	}
			
 
				+
			
 
				+	unsigned char*	getPoolAddress()
			
 
				+	{
			
 
				+		return m_pool;
			
 
				+	}
			
 
				+
			
 
				+	const unsigned char*	getPoolAddress() const
			
 
				+	{
			
 
				+		return m_pool;
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //_BT_POOL_ALLOCATOR_H
			
--- a/include/Bullet3Common/b3QuadWord.h
+++ b/include/Bullet3Common/b3QuadWord.h
@@ -0,0 +1,245 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_SIMD_QUADWORD_H
			
 
				+#define B3_SIMD_QUADWORD_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+#include "b3MinMax.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#if defined (__CELLOS_LV2) && defined (__SPU__)
			
 
				+#include <altivec.h>
			
 
				+#endif
			
 
				+
			
 
				+/**@brief The b3QuadWord class is base class for b3Vector3 and b3Quaternion. 
			
 
				+ * Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword.
			
 
				+ */
			
 
				+#ifndef USE_LIBSPE2
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3QuadWord
			
 
				+#else
			
 
				+class b3QuadWord
			
 
				+#endif
			
 
				+{
			
 
				+protected:
			
 
				+
			
 
				+#if defined (__SPU__) && defined (__CELLOS_LV2__)
			
 
				+	union {
			
 
				+		vec_float4 mVec128;
			
 
				+		b3Scalar	m_floats[4];
			
 
				+	};
			
 
				+public:
			
 
				+	vec_float4	get128() const
			
 
				+	{
			
 
				+		return mVec128;
			
 
				+	}
			
 
				+
			
 
				+#else //__CELLOS_LV2__ __SPU__
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON) 
			
 
				+public:
			
 
				+	union {
			
 
				+		b3SimdFloat4 mVec128;
			
 
				+		b3Scalar	m_floats[4];
			
 
				+		struct {b3Scalar x,y,z,w;};
			
 
				+	};
			
 
				+public:
			
 
				+	B3_FORCE_INLINE	b3SimdFloat4	get128() const
			
 
				+	{
			
 
				+		return mVec128;
			
 
				+	}
			
 
				+	B3_FORCE_INLINE	void	set128(b3SimdFloat4 v128)
			
 
				+	{
			
 
				+		mVec128 = v128;
			
 
				+	}
			
 
				+#else
			
 
				+public:
			
 
				+	union
			
 
				+	{
			
 
				+		b3Scalar	m_floats[4];
			
 
				+		struct {b3Scalar x,y,z,w;};
			
 
				+	};
			
 
				+#endif // B3_USE_SSE
			
 
				+
			
 
				+#endif //__CELLOS_LV2__ __SPU__
			
 
				+
			
 
				+	public:
			
 
				+  
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+
			
 
				+	// Set Vector 
			
 
				+	B3_FORCE_INLINE b3QuadWord(const b3SimdFloat4 vec)
			
 
				+	{
			
 
				+		mVec128 = vec;
			
 
				+	}
			
 
				+
			
 
				+	// Copy constructor
			
 
				+	B3_FORCE_INLINE b3QuadWord(const b3QuadWord& rhs)
			
 
				+	{
			
 
				+		mVec128 = rhs.mVec128;
			
 
				+	}
			
 
				+
			
 
				+	// Assignment Operator
			
 
				+	B3_FORCE_INLINE b3QuadWord& 
			
 
				+	operator=(const b3QuadWord& v) 
			
 
				+	{
			
 
				+		mVec128 = v.mVec128;
			
 
				+		
			
 
				+		return *this;
			
 
				+	}
			
 
				+	
			
 
				+#endif
			
 
				+
			
 
				+  /**@brief Return the x value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
			
 
				+  /**@brief Return the y value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
			
 
				+  /**@brief Return the z value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
			
 
				+  /**@brief Set the x value */
			
 
				+		B3_FORCE_INLINE void	setX(b3Scalar _x) { m_floats[0] = _x;};
			
 
				+  /**@brief Set the y value */
			
 
				+		B3_FORCE_INLINE void	setY(b3Scalar _y) { m_floats[1] = _y;};
			
 
				+  /**@brief Set the z value */
			
 
				+		B3_FORCE_INLINE void	setZ(b3Scalar _z) { m_floats[2] = _z;};
			
 
				+  /**@brief Set the w value */
			
 
				+		B3_FORCE_INLINE void	setW(b3Scalar _w) { m_floats[3] = _w;};
			
 
				+  /**@brief Return the x value */
			
 
				+
			
 
				+
			
 
				+	//B3_FORCE_INLINE b3Scalar&       operator[](int i)       { return (&m_floats[0])[i];	}      
			
 
				+	//B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
			
 
				+	///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
			
 
				+	B3_FORCE_INLINE	operator       b3Scalar *()       { return &m_floats[0]; }
			
 
				+	B3_FORCE_INLINE	operator const b3Scalar *() const { return &m_floats[0]; }
			
 
				+
			
 
				+	B3_FORCE_INLINE	bool	operator==(const b3QuadWord& other) const
			
 
				+	{
			
 
				+#ifdef B3_USE_SSE
			
 
				+        return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
			
 
				+#else 
			
 
				+		return ((m_floats[3]==other.m_floats[3]) && 
			
 
				+                (m_floats[2]==other.m_floats[2]) && 
			
 
				+                (m_floats[1]==other.m_floats[1]) && 
			
 
				+                (m_floats[0]==other.m_floats[0]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	bool	operator!=(const b3QuadWord& other) const
			
 
				+	{
			
 
				+		return !(*this == other);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Set x,y,z and zero w 
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   */
			
 
				+		B3_FORCE_INLINE void 	setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
			
 
				+		{
			
 
				+			m_floats[0]=_x;
			
 
				+			m_floats[1]=_y;
			
 
				+			m_floats[2]=_z;
			
 
				+			m_floats[3] = 0.f;
			
 
				+		}
			
 
				+
			
 
				+/*		void getValue(b3Scalar *m) const 
			
 
				+		{
			
 
				+			m[0] = m_floats[0];
			
 
				+			m[1] = m_floats[1];
			
 
				+			m[2] = m_floats[2];
			
 
				+		}
			
 
				+*/
			
 
				+/**@brief Set the values 
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   * @param w Value of w
			
 
				+   */
			
 
				+		B3_FORCE_INLINE void	setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w)
			
 
				+		{
			
 
				+			m_floats[0]=_x;
			
 
				+			m_floats[1]=_y;
			
 
				+			m_floats[2]=_z;
			
 
				+			m_floats[3]=_w;
			
 
				+		}
			
 
				+  /**@brief No initialization constructor */
			
 
				+		B3_FORCE_INLINE b3QuadWord()
			
 
				+		//	:m_floats[0](b3Scalar(0.)),m_floats[1](b3Scalar(0.)),m_floats[2](b3Scalar(0.)),m_floats[3](b3Scalar(0.))
			
 
				+		{
			
 
				+		}
			
 
				+ 
			
 
				+  /**@brief Three argument constructor (zeros w)
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   */
			
 
				+		B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)		
			
 
				+		{
			
 
				+			m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f;
			
 
				+		}
			
 
				+
			
 
				+/**@brief Initializing constructor
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   * @param w Value of w
			
 
				+   */
			
 
				+		B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w) 
			
 
				+		{
			
 
				+			m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w;
			
 
				+		}
			
 
				+
			
 
				+  /**@brief Set each element to the max of the current values and the values of another b3QuadWord
			
 
				+   * @param other The other b3QuadWord to compare with 
			
 
				+   */
			
 
				+		B3_FORCE_INLINE void	setMax(const b3QuadWord& other)
			
 
				+		{
			
 
				+        #ifdef B3_USE_SSE
			
 
				+            mVec128 = _mm_max_ps(mVec128, other.mVec128);
			
 
				+        #elif defined(B3_USE_NEON)
			
 
				+            mVec128 = vmaxq_f32(mVec128, other.mVec128);
			
 
				+        #else
			
 
				+        	b3SetMax(m_floats[0], other.m_floats[0]);
			
 
				+			b3SetMax(m_floats[1], other.m_floats[1]);
			
 
				+			b3SetMax(m_floats[2], other.m_floats[2]);
			
 
				+			b3SetMax(m_floats[3], other.m_floats[3]);
			
 
				+		#endif
			
 
				+        }
			
 
				+  /**@brief Set each element to the min of the current values and the values of another b3QuadWord
			
 
				+   * @param other The other b3QuadWord to compare with 
			
 
				+   */
			
 
				+		B3_FORCE_INLINE void	setMin(const b3QuadWord& other)
			
 
				+		{
			
 
				+        #ifdef B3_USE_SSE
			
 
				+            mVec128 = _mm_min_ps(mVec128, other.mVec128);
			
 
				+        #elif defined(B3_USE_NEON)
			
 
				+            mVec128 = vminq_f32(mVec128, other.mVec128);
			
 
				+        #else
			
 
				+        	b3SetMin(m_floats[0], other.m_floats[0]);
			
 
				+			b3SetMin(m_floats[1], other.m_floats[1]);
			
 
				+			b3SetMin(m_floats[2], other.m_floats[2]);
			
 
				+			b3SetMin(m_floats[3], other.m_floats[3]);
			
 
				+		#endif
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_SIMD_QUADWORD_H
			
--- a/include/Bullet3Common/b3Quaternion.h
+++ b/include/Bullet3Common/b3Quaternion.h
@@ -0,0 +1,893 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_SIMD__QUATERNION_H_
			
 
				+#define B3_SIMD__QUATERNION_H_
			
 
				+
			
 
				+
			
 
				+#include "b3Vector3.h"
			
 
				+#include "b3QuadWord.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
			
 
				+
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
			
 
				+const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
			
 
				+class b3Quaternion : public b3QuadWord {
			
 
				+public:
			
 
				+  /**@brief No initialization constructor */
			
 
				+	b3Quaternion() {}
			
 
				+
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))|| defined(B3_USE_NEON) 
			
 
				+	// Set Vector 
			
 
				+	B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
			
 
				+	{
			
 
				+		mVec128 = vec;
			
 
				+	}
			
 
				+
			
 
				+	// Copy constructor
			
 
				+	B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
			
 
				+	{
			
 
				+		mVec128 = rhs.mVec128;
			
 
				+	}
			
 
				+
			
 
				+	// Assignment Operator
			
 
				+	B3_FORCE_INLINE b3Quaternion& 
			
 
				+	operator=(const b3Quaternion& v) 
			
 
				+	{
			
 
				+		mVec128 = v.mVec128;
			
 
				+		
			
 
				+		return *this;
			
 
				+	}
			
 
				+	
			
 
				+#endif
			
 
				+
			
 
				+	//		template <typename b3Scalar>
			
 
				+	//		explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
			
 
				+  /**@brief Constructor from scalars */
			
 
				+	b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) 
			
 
				+		: b3QuadWord(_x, _y, _z, _w) 
			
 
				+	{
			
 
				+		//b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
			
 
				+	}
			
 
				+  /**@brief Axis angle Constructor
			
 
				+   * @param axis The axis which the rotation is around
			
 
				+   * @param angle The magnitude of the rotation around the angle (Radians) */
			
 
				+	b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle) 
			
 
				+	{ 
			
 
				+		setRotation(_axis, _angle); 
			
 
				+	}
			
 
				+  /**@brief Constructor from Euler angles
			
 
				+   * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
			
 
				+   * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
			
 
				+   * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
			
 
				+	b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{ 
			
 
				+#ifndef B3_EULER_DEFAULT_ZYX
			
 
				+		setEuler(yaw, pitch, roll); 
			
 
				+#else
			
 
				+		setEulerZYX(yaw, pitch, roll); 
			
 
				+#endif 
			
 
				+	}
			
 
				+  /**@brief Set the rotation using axis angle notation 
			
 
				+   * @param axis The axis around which to rotate
			
 
				+   * @param angle The magnitude of the rotation in Radians */
			
 
				+	void setRotation(const b3Vector3& axis, const b3Scalar& _angle)
			
 
				+	{
			
 
				+		b3Scalar d = axis.length();
			
 
				+		b3Assert(d != b3Scalar(0.0));
			
 
				+		b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
			
 
				+		setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s, 
			
 
				+			b3Cos(_angle * b3Scalar(0.5)));
			
 
				+	}
			
 
				+  /**@brief Set the quaternion using Euler angles
			
 
				+   * @param yaw Angle around Y
			
 
				+   * @param pitch Angle around X
			
 
				+   * @param roll Angle around Z */
			
 
				+	void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{
			
 
				+		b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);  
			
 
				+		b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);  
			
 
				+		b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);  
			
 
				+		b3Scalar cosYaw = b3Cos(halfYaw);
			
 
				+		b3Scalar sinYaw = b3Sin(halfYaw);
			
 
				+		b3Scalar cosPitch = b3Cos(halfPitch);
			
 
				+		b3Scalar sinPitch = b3Sin(halfPitch);
			
 
				+		b3Scalar cosRoll = b3Cos(halfRoll);
			
 
				+		b3Scalar sinRoll = b3Sin(halfRoll);
			
 
				+		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
			
 
				+			cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
			
 
				+			sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
			
 
				+			cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
			
 
				+	}
			
 
				+  /**@brief Set the quaternion using euler angles 
			
 
				+   * @param yaw Angle around Z
			
 
				+   * @param pitch Angle around Y
			
 
				+   * @param roll Angle around X */
			
 
				+	void setEulerZYX(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
			
 
				+	{
			
 
				+		b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);  
			
 
				+		b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);  
			
 
				+		b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);  
			
 
				+		b3Scalar cosYaw = b3Cos(halfYaw);
			
 
				+		b3Scalar sinYaw = b3Sin(halfYaw);
			
 
				+		b3Scalar cosPitch = b3Cos(halfPitch);
			
 
				+		b3Scalar sinPitch = b3Sin(halfPitch);
			
 
				+		b3Scalar cosRoll = b3Cos(halfRoll);
			
 
				+		b3Scalar sinRoll = b3Sin(halfRoll);
			
 
				+		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
			
 
				+                         cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
			
 
				+                         cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
			
 
				+                         cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
			
 
				+	}
			
 
				+  /**@brief Add two quaternions
			
 
				+   * @param q The quaternion to add to this one */
			
 
				+	B3_FORCE_INLINE	b3Quaternion& operator+=(const b3Quaternion& q)
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_add_ps(mVec128, q.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vaddq_f32(mVec128, q.mVec128);
			
 
				+#else	
			
 
				+		m_floats[0] += q.getX(); 
			
 
				+        m_floats[1] += q.getY(); 
			
 
				+        m_floats[2] += q.getZ(); 
			
 
				+        m_floats[3] += q.m_floats[3];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Subtract out a quaternion
			
 
				+   * @param q The quaternion to subtract from this one */
			
 
				+	b3Quaternion& operator-=(const b3Quaternion& q) 
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vsubq_f32(mVec128, q.mVec128);
			
 
				+#else	
			
 
				+		m_floats[0] -= q.getX(); 
			
 
				+        m_floats[1] -= q.getY(); 
			
 
				+        m_floats[2] -= q.getZ(); 
			
 
				+        m_floats[3] -= q.m_floats[3];
			
 
				+#endif
			
 
				+        return *this;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Scale this quaternion
			
 
				+   * @param s The scalar to scale by */
			
 
				+	b3Quaternion& operator*=(const b3Scalar& s)
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0);	//	(S S S S)
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vs);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmulq_n_f32(mVec128, s);
			
 
				+#else
			
 
				+		m_floats[0] *= s; 
			
 
				+        m_floats[1] *= s; 
			
 
				+        m_floats[2] *= s; 
			
 
				+        m_floats[3] *= s;
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Multiply this quaternion by q on the right
			
 
				+   * @param q The other quaternion 
			
 
				+   * Equivilant to this = this * q */
			
 
				+	b3Quaternion& operator*=(const b3Quaternion& q)
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128 vQ2 = q.get128();
			
 
				+		
			
 
				+		__m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0,1,2,0));
			
 
				+		__m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0));
			
 
				+		
			
 
				+		A1 = A1 * B1;
			
 
				+		
			
 
				+		__m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1,2,0,1));
			
 
				+		__m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
			
 
				+		
			
 
				+		A2 = A2 * B2;
			
 
				+		
			
 
				+		B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2,0,1,2));
			
 
				+		B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
			
 
				+		
			
 
				+		B1 = B1 * B2;	//	A3 *= B3
			
 
				+		
			
 
				+		mVec128 = b3_splat_ps(mVec128, 3);	//	A0
			
 
				+		mVec128 = mVec128 * vQ2;	//	A0 * B0
			
 
				+		
			
 
				+		A1 = A1 + A2;	//	AB12
			
 
				+		mVec128 = mVec128 - B1;	//	AB03 = AB0 - AB3 
			
 
				+		A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
			
 
				+		mVec128 = mVec128+ A1;	//	AB03 + AB12
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)     
			
 
				+
			
 
				+        float32x4_t vQ1 = mVec128;
			
 
				+        float32x4_t vQ2 = q.get128();
			
 
				+        float32x4_t A0, A1, B1, A2, B2, A3, B3;
			
 
				+        float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
			
 
				+        
			
 
				+        {
			
 
				+        float32x2x2_t tmp;
			
 
				+        tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
			
 
				+        vQ1zx = tmp.val[0];
			
 
				+
			
 
				+        tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
			
 
				+        vQ2zx = tmp.val[0];
			
 
				+        }
			
 
				+        vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
			
 
				+
			
 
				+        vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+        vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+        vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+        A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
			
 
				+        B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
			
 
				+
			
 
				+        A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+        B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+        A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
			
 
				+        B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
			
 
				+
			
 
				+        A1 = vmulq_f32(A1, B1);
			
 
				+        A2 = vmulq_f32(A2, B2);
			
 
				+        A3 = vmulq_f32(A3, B3);	//	A3 *= B3
			
 
				+        A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
			
 
				+
			
 
				+        A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
			
 
				+        A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3 
			
 
				+        
			
 
				+        //	change the sign of the last element
			
 
				+        A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
			
 
				+        A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
			
 
				+        
			
 
				+        mVec128 = A0;
			
 
				+#else
			
 
				+		setValue(
			
 
				+            m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
			
 
				+			m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
			
 
				+			m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
			
 
				+			m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+  /**@brief Return the dot product between this quaternion and another
			
 
				+   * @param q The other quaternion */
			
 
				+	b3Scalar dot(const b3Quaternion& q) const
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vd;
			
 
				+		
			
 
				+		vd = _mm_mul_ps(mVec128, q.mVec128);
			
 
				+		
			
 
				+        __m128 t = _mm_movehl_ps(vd, vd);
			
 
				+		vd = _mm_add_ps(vd, t);
			
 
				+		t = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, t);
			
 
				+		
			
 
				+        return _mm_cvtss_f32(vd);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
			
 
				+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));  
			
 
				+		x = vpadd_f32(x, x);
			
 
				+		return vget_lane_f32(x, 0);
			
 
				+#else    
			
 
				+		return  m_floats[0] * q.getX() + 
			
 
				+                m_floats[1] * q.getY() + 
			
 
				+                m_floats[2] * q.getZ() + 
			
 
				+                m_floats[3] * q.m_floats[3];
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the length squared of the quaternion */
			
 
				+	b3Scalar length2() const
			
 
				+	{
			
 
				+		return dot(*this);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the length of the quaternion */
			
 
				+	b3Scalar length() const
			
 
				+	{
			
 
				+		return b3Sqrt(length2());
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Normalize the quaternion 
			
 
				+   * Such that x^2 + y^2 + z^2 +w^2 = 1 */
			
 
				+	b3Quaternion& normalize() 
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vd;
			
 
				+		
			
 
				+		vd = _mm_mul_ps(mVec128, mVec128);
			
 
				+		
			
 
				+        __m128 t = _mm_movehl_ps(vd, vd);
			
 
				+		vd = _mm_add_ps(vd, t);
			
 
				+		t = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, t);
			
 
				+
			
 
				+		vd = _mm_sqrt_ss(vd);
			
 
				+		vd = _mm_div_ss(b3vOnes, vd);
			
 
				+        vd = b3_pshufd_ps(vd, 0); // splat
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vd);
			
 
				+    
			
 
				+		return *this;
			
 
				+#else    
			
 
				+		return *this /= length();
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return a scaled version of this quaternion
			
 
				+   * @param s The scale factor */
			
 
				+	B3_FORCE_INLINE b3Quaternion
			
 
				+	operator*(const b3Scalar& s) const
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0x00);	//	(S S S S)
			
 
				+		
			
 
				+		return b3Quaternion(_mm_mul_ps(mVec128, vs));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion(vmulq_n_f32(mVec128, s));
			
 
				+#else
			
 
				+		return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return an inversely scaled versionof this quaternion
			
 
				+   * @param s The inverse scale factor */
			
 
				+	b3Quaternion operator/(const b3Scalar& s) const
			
 
				+	{
			
 
				+		b3Assert(s != b3Scalar(0.0));
			
 
				+		return *this * (b3Scalar(1.0) / s);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Inversely scale this quaternion
			
 
				+   * @param s The scale factor */
			
 
				+	b3Quaternion& operator/=(const b3Scalar& s) 
			
 
				+	{
			
 
				+		b3Assert(s != b3Scalar(0.0));
			
 
				+		return *this *= b3Scalar(1.0) / s;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return a normalized version of this quaternion */
			
 
				+	b3Quaternion normalized() const 
			
 
				+	{
			
 
				+		return *this / length();
			
 
				+	} 
			
 
				+  /**@brief Return the angle between this quaternion and the other 
			
 
				+   * @param q The other quaternion */
			
 
				+	b3Scalar angle(const b3Quaternion& q) const 
			
 
				+	{
			
 
				+		b3Scalar s = b3Sqrt(length2() * q.length2());
			
 
				+		b3Assert(s != b3Scalar(0.0));
			
 
				+		return b3Acos(dot(q) / s);
			
 
				+	}
			
 
				+  /**@brief Return the angle of rotation represented by this quaternion */
			
 
				+	b3Scalar getAngle() const 
			
 
				+	{
			
 
				+		b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
			
 
				+		return s;
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the axis of the rotation represented by this quaternion */
			
 
				+	b3Vector3 getAxis() const
			
 
				+	{
			
 
				+		b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3];
			
 
				+		
			
 
				+		if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
			
 
				+			return b3MakeVector3(1.0, 0.0, 0.0);  // Arbitrary
			
 
				+		b3Scalar s = 1.f/b3Sqrt(s_squared);
			
 
				+		return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
			
 
				+	}
			
 
				+
			
 
				+	/**@brief Return the inverse of this quaternion */
			
 
				+	b3Quaternion inverse() const
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+        return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
			
 
				+#else	
			
 
				+		return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the sum of this quaternion and the other 
			
 
				+   * @param q2 The other quaternion */
			
 
				+	B3_FORCE_INLINE b3Quaternion
			
 
				+	operator+(const b3Quaternion& q2) const
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+        return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
			
 
				+#else	
			
 
				+		const b3Quaternion& q1 = *this;
			
 
				+		return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the difference between this quaternion and the other 
			
 
				+   * @param q2 The other quaternion */
			
 
				+	B3_FORCE_INLINE b3Quaternion
			
 
				+	operator-(const b3Quaternion& q2) const
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+        return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
			
 
				+#else	
			
 
				+		const b3Quaternion& q1 = *this;
			
 
				+		return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the negative of this quaternion 
			
 
				+   * This simply negates each element */
			
 
				+	B3_FORCE_INLINE b3Quaternion operator-() const
			
 
				+	{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask) );
			
 
				+#else	
			
 
				+		const b3Quaternion& q2 = *this;
			
 
				+		return b3Quaternion( - q2.getX(), - q2.getY(),  - q2.getZ(),  - q2.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+  /**@todo document this and it's use */
			
 
				+	B3_FORCE_INLINE b3Quaternion farthest( const b3Quaternion& qd) const 
			
 
				+	{
			
 
				+		b3Quaternion diff,sum;
			
 
				+		diff = *this - qd;
			
 
				+		sum = *this + qd;
			
 
				+		if( diff.dot(diff) > sum.dot(sum) )
			
 
				+			return qd;
			
 
				+		return (-qd);
			
 
				+	}
			
 
				+
			
 
				+	/**@todo document this and it's use */
			
 
				+	B3_FORCE_INLINE b3Quaternion nearest( const b3Quaternion& qd) const 
			
 
				+	{
			
 
				+		b3Quaternion diff,sum;
			
 
				+		diff = *this - qd;
			
 
				+		sum = *this + qd;
			
 
				+		if( diff.dot(diff) < sum.dot(sum) )
			
 
				+			return qd;
			
 
				+		return (-qd);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+  /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
			
 
				+   * @param q The other quaternion to interpolate with 
			
 
				+   * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
			
 
				+   * Slerp interpolates assuming constant velocity.  */
			
 
				+	b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
			
 
				+	{
			
 
				+	  b3Scalar magnitude = b3Sqrt(length2() * q.length2()); 
			
 
				+	  b3Assert(magnitude > b3Scalar(0));
			
 
				+
			
 
				+    b3Scalar product = dot(q) / magnitude;
			
 
				+    if (b3Fabs(product) < b3Scalar(1))
			
 
				+		{
			
 
				+      // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
			
 
				+      const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
			
 
				+
			
 
				+      const b3Scalar theta = b3Acos(sign * product);
			
 
				+      const b3Scalar s1 = b3Sin(sign * t * theta);   
			
 
				+      const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
			
 
				+      const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
			
 
				+
			
 
				+      return b3Quaternion(
			
 
				+          (m_floats[0] * s0 + q.getX() * s1) * d,
			
 
				+          (m_floats[1] * s0 + q.getY() * s1) * d,
			
 
				+          (m_floats[2] * s0 + q.getZ() * s1) * d,
			
 
				+          (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			return *this;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static const b3Quaternion&	getIdentity()
			
 
				+	{
			
 
				+		static const b3Quaternion identityQuat(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.),b3Scalar(1.));
			
 
				+		return identityQuat;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
			
 
				+
			
 
				+	
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/**@brief Return the product of two quaternions */
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+operator*(const b3Quaternion& q1, const b3Quaternion& q2) 
			
 
				+{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	__m128 vQ1 = q1.get128();
			
 
				+	__m128 vQ2 = q2.get128();
			
 
				+	__m128 A0, A1, B1, A2, B2;
			
 
				+    
			
 
				+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
			
 
				+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
			
 
				+
			
 
				+	A1 = A1 * B1;
			
 
				+	
			
 
				+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext 
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
			
 
				+
			
 
				+	A2 = A2 * B2;
			
 
				+
			
 
				+	B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
			
 
				+	
			
 
				+	B1 = B1 * B2;	//	A3 *= B3
			
 
				+
			
 
				+	A0 = b3_splat_ps(vQ1, 3);	//	A0
			
 
				+	A0 = A0 * vQ2;	//	A0 * B0
			
 
				+
			
 
				+	A1 = A1 + A2;	//	AB12
			
 
				+	A0 =  A0 - B1;	//	AB03 = AB0 - AB3 
			
 
				+	
			
 
				+    A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
			
 
				+	A0 = A0 + A1;	//	AB03 + AB12
			
 
				+	
			
 
				+	return b3Quaternion(A0);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)     
			
 
				+
			
 
				+	float32x4_t vQ1 = q1.get128();
			
 
				+	float32x4_t vQ2 = q2.get128();
			
 
				+	float32x4_t A0, A1, B1, A2, B2, A3, B3;
			
 
				+    float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
			
 
				+    
			
 
				+    {
			
 
				+    float32x2x2_t tmp;
			
 
				+    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
			
 
				+    vQ1zx = tmp.val[0];
			
 
				+
			
 
				+    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
			
 
				+    vQ2zx = tmp.val[0];
			
 
				+    }
			
 
				+    vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
			
 
				+
			
 
				+    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+    A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
			
 
				+    B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
			
 
				+
			
 
				+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
			
 
				+    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
			
 
				+
			
 
				+	A1 = vmulq_f32(A1, B1);
			
 
				+	A2 = vmulq_f32(A2, B2);
			
 
				+	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
			
 
				+	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
			
 
				+
			
 
				+	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
			
 
				+	A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3 
			
 
				+	
			
 
				+    //	change the sign of the last element
			
 
				+    A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
			
 
				+	A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
			
 
				+	
			
 
				+	return b3Quaternion(A0);
			
 
				+
			
 
				+#else
			
 
				+	return b3Quaternion(
			
 
				+        q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
			
 
				+		q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
			
 
				+		q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
			
 
				+		q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ()); 
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+operator*(const b3Quaternion& q, const b3Vector3& w)
			
 
				+{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	__m128 vQ1 = q.get128();
			
 
				+	__m128 vQ2 = w.get128();
			
 
				+	__m128 A1, B1, A2, B2, A3, B3;
			
 
				+	
			
 
				+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3,3,3,0));
			
 
				+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0,1,2,0));
			
 
				+
			
 
				+	A1 = A1 * B1;
			
 
				+	
			
 
				+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
			
 
				+
			
 
				+	A2 = A2 * B2;
			
 
				+
			
 
				+	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
			
 
				+	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
			
 
				+	
			
 
				+	A3 = A3 * B3;	//	A3 *= B3
			
 
				+
			
 
				+	A1 = A1 + A2;	//	AB12
			
 
				+	A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
			
 
				+    A1 = A1 - A3;	//	AB123 = AB12 - AB3 
			
 
				+	
			
 
				+	return b3Quaternion(A1);
			
 
				+    
			
 
				+#elif defined(B3_USE_NEON)     
			
 
				+
			
 
				+	float32x4_t vQ1 = q.get128();
			
 
				+	float32x4_t vQ2 = w.get128();
			
 
				+	float32x4_t A1, B1, A2, B2, A3, B3;
			
 
				+    float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
			
 
				+    
			
 
				+    vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); 
			
 
				+    {
			
 
				+    float32x2x2_t tmp;
			
 
				+
			
 
				+    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
			
 
				+    vQ2zx = tmp.val[0];
			
 
				+
			
 
				+    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
			
 
				+    vQ1zx = tmp.val[0];
			
 
				+    }
			
 
				+
			
 
				+    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+    A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X 
			
 
				+    B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x 
			
 
				+
			
 
				+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
			
 
				+    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
			
 
				+
			
 
				+	A1 = vmulq_f32(A1, B1);
			
 
				+	A2 = vmulq_f32(A2, B2);
			
 
				+	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
			
 
				+
			
 
				+	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
			
 
				+	
			
 
				+    //	change the sign of the last element
			
 
				+    A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
			
 
				+	
			
 
				+    A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
			
 
				+	
			
 
				+	return b3Quaternion(A1);
			
 
				+    
			
 
				+#else
			
 
				+	return b3Quaternion( 
			
 
				+         q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
			
 
				+		 q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
			
 
				+		 q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
			
 
				+		-q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ()); 
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+operator*(const b3Vector3& w, const b3Quaternion& q)
			
 
				+{
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	__m128 vQ1 = w.get128();
			
 
				+	__m128 vQ2 = q.get128();
			
 
				+	__m128 A1, B1, A2, B2, A3, B3;
			
 
				+	
			
 
				+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0));  // X Y  z x
			
 
				+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0));  // W W  W X 
			
 
				+
			
 
				+	A1 = A1 * B1;
			
 
				+	
			
 
				+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
			
 
				+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
			
 
				+
			
 
				+	A2 = A2 *B2;
			
 
				+
			
 
				+	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
			
 
				+	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
			
 
				+	
			
 
				+	A3 = A3 * B3;	//	A3 *= B3
			
 
				+
			
 
				+	A1 = A1 + A2;	//	AB12
			
 
				+	A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
			
 
				+	A1 = A1 - A3;	//	AB123 = AB12 - AB3 
			
 
				+	
			
 
				+	return b3Quaternion(A1);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)     
			
 
				+
			
 
				+	float32x4_t vQ1 = w.get128();
			
 
				+	float32x4_t vQ2 = q.get128();
			
 
				+	float32x4_t  A1, B1, A2, B2, A3, B3;
			
 
				+    float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
			
 
				+    
			
 
				+    {
			
 
				+    float32x2x2_t tmp;
			
 
				+   
			
 
				+    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
			
 
				+    vQ1zx = tmp.val[0];
			
 
				+
			
 
				+    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
			
 
				+    vQ2zx = tmp.val[0];
			
 
				+    }
			
 
				+    vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
			
 
				+
			
 
				+    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
			
 
				+
			
 
				+    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
			
 
				+    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
			
 
				+
			
 
				+    A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
			
 
				+    B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
			
 
				+
			
 
				+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
			
 
				+    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
			
 
				+
			
 
				+    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
			
 
				+    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
			
 
				+
			
 
				+	A1 = vmulq_f32(A1, B1);
			
 
				+	A2 = vmulq_f32(A2, B2);
			
 
				+	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
			
 
				+
			
 
				+	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
			
 
				+	
			
 
				+    //	change the sign of the last element
			
 
				+    A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
			
 
				+	
			
 
				+    A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
			
 
				+	
			
 
				+	return b3Quaternion(A1);
			
 
				+    
			
 
				+#else
			
 
				+	return b3Quaternion( 
			
 
				+        +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
			
 
				+		+w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
			
 
				+		+w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
			
 
				+		-w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ()); 
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Calculate the dot product between two quaternions */
			
 
				+B3_FORCE_INLINE b3Scalar 
			
 
				+b3Dot(const b3Quaternion& q1, const b3Quaternion& q2) 
			
 
				+{ 
			
 
				+	return q1.dot(q2); 
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**@brief Return the length of a quaternion */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Length(const b3Quaternion& q) 
			
 
				+{ 
			
 
				+	return q.length(); 
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the angle between two quaternions*/
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Angle(const b3Quaternion& q1, const b3Quaternion& q2) 
			
 
				+{ 
			
 
				+	return q1.angle(q2); 
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the inverse of a quaternion*/
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3Inverse(const b3Quaternion& q) 
			
 
				+{
			
 
				+	return q.inverse();
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the result of spherical linear interpolation betwen two quaternions 
			
 
				+ * @param q1 The first quaternion
			
 
				+ * @param q2 The second quaternion 
			
 
				+ * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2 
			
 
				+ * Slerp assumes constant velocity between positions. */
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t) 
			
 
				+{
			
 
				+	return q1.slerp(q2, t);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
			
 
				+{
			
 
				+	return rot0*rot1;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion
			
 
				+b3QuatNormalized(const b3Quaternion& orn)
			
 
				+{
			
 
				+	return orn.normalized();
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v) 
			
 
				+{
			
 
				+	b3Quaternion q = rotation * v;
			
 
				+	q *= rotation.inverse();
			
 
				+#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+    return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
			
 
				+#else	
			
 
				+	return b3MakeVector3(q.getX(),q.getY(),q.getZ());
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion 
			
 
				+b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
			
 
				+{
			
 
				+	b3Vector3 c = v0.cross(v1);
			
 
				+	b3Scalar  d = v0.dot(v1);
			
 
				+
			
 
				+	if (d < -1.0 + B3_EPSILON)
			
 
				+	{
			
 
				+		b3Vector3 n,unused;
			
 
				+		b3PlaneSpace1(v0,n,unused);
			
 
				+		return b3Quaternion(n.getX(),n.getY(),n.getZ(),0.0f); // just pick any vector that is orthogonal to v0
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar  s = b3Sqrt((1.0f + d) * 2.0f);
			
 
				+	b3Scalar rs = 1.0f / s;
			
 
				+
			
 
				+	return b3Quaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Quaternion 
			
 
				+b3ShortestArcQuatNormalize2(b3Vector3& v0,b3Vector3& v1)
			
 
				+{
			
 
				+	v0.normalize();
			
 
				+	v1.normalize();
			
 
				+	return b3ShortestArcQuat(v0,v1);
			
 
				+}
			
 
				+
			
 
				+#endif //B3_SIMD__QUATERNION_H_
			
 
				+
			
 
				+
			
 
				+
			
--- a/include/Bullet3Common/b3Random.h
+++ b/include/Bullet3Common/b3Random.h
@@ -0,0 +1,50 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_GEN_RANDOM_H
			
 
				+#define B3_GEN_RANDOM_H
			
 
				+
			
 
				+#include "b3Scalar.h"
			
 
				+
			
 
				+#ifdef MT19937
			
 
				+
			
 
				+#include <limits.h>
			
 
				+#include <mt19937.h>
			
 
				+
			
 
				+#define B3_RAND_MAX UINT_MAX
			
 
				+
			
 
				+B3_FORCE_INLINE void         b3Srand(unsigned int seed) { init_genrand(seed); }
			
 
				+B3_FORCE_INLINE unsigned int b3rand()                   { return genrand_int32(); }
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define B3_RAND_MAX RAND_MAX
			
 
				+
			
 
				+B3_FORCE_INLINE void         b3Srand(unsigned int seed) { srand(seed); } 
			
 
				+B3_FORCE_INLINE unsigned int b3rand()                   { return rand(); }
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+inline b3Scalar b3RandRange(b3Scalar minRange, b3Scalar maxRange)
			
 
				+{
			
 
				+	return (b3rand() / (b3Scalar(B3_RAND_MAX) + b3Scalar(1.0))) * (maxRange - minRange) + minRange;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif //B3_GEN_RANDOM_H
			
 
				+
			
--- a/include/Bullet3Common/b3Scalar.h
+++ b/include/Bullet3Common/b3Scalar.h
@@ -0,0 +1,661 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_SCALAR_H
			
 
				+#define B3_SCALAR_H
			
 
				+
			
 
				+#ifdef B3_MANAGED_CODE
			
 
				+//Aligned data types not supported in managed code
			
 
				+#pragma unmanaged
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+#include <math.h>
			
 
				+#include <stdlib.h>//size_t for MSVC 6.0
			
 
				+#include <float.h>
			
 
				+
			
 
				+//Original repository is at http://github.com/erwincoumans/bullet3
			
 
				+#define B3_BULLET_VERSION 300
			
 
				+
			
 
				+inline int	b3GetVersion()
			
 
				+{
			
 
				+	return B3_BULLET_VERSION;
			
 
				+}
			
 
				+
			
 
				+#if defined(DEBUG) || defined (_DEBUG)
			
 
				+#define B3_DEBUG
			
 
				+#endif
			
 
				+
			
 
				+#include "b3Logging.h"//for b3Error
			
 
				+
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+
			
 
				+		#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
			
 
				+
			
 
				+			#define B3_FORCE_INLINE inline
			
 
				+			#define B3_ATTRIBUTE_ALIGNED16(a) a
			
 
				+			#define B3_ATTRIBUTE_ALIGNED64(a) a
			
 
				+			#define B3_ATTRIBUTE_ALIGNED128(a) a
			
 
				+		#else
			
 
				+			//#define B3_HAS_ALIGNED_ALLOCATOR
			
 
				+			#pragma warning(disable : 4324) // disable padding warning
			
 
				+//			#pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning.
			
 
				+//			#pragma warning(disable:4996) //Turn off warnings about deprecated C routines
			
 
				+//			#pragma warning(disable:4786) // Disable the "debug name too long" warning
			
 
				+
			
 
				+			#define B3_FORCE_INLINE __forceinline
			
 
				+			#define B3_ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a
			
 
				+			#define B3_ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a
			
 
				+			#define B3_ATTRIBUTE_ALIGNED128(a) __declspec (align(128)) a
			
 
				+		#ifdef _XBOX
			
 
				+			#define B3_USE_VMX128
			
 
				+
			
 
				+			#include <ppcintrinsics.h>
			
 
				+ 			#define B3_HAVE_NATIVE_FSEL
			
 
				+ 			#define b3Fsel(a,b,c) __fsel((a),(b),(c))
			
 
				+		#else
			
 
				+
			
 
				+#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (B3_USE_DOUBLE_PRECISION))
			
 
				+			#define B3_USE_SSE
			
 
				+			#ifdef B3_USE_SSE
			
 
				+			//B3_USE_SSE_IN_API is disabled under Windows by default, because 
			
 
				+			//it makes it harder to integrate Bullet into your application under Windows 
			
 
				+			//(structured embedding Bullet structs/classes need to be 16-byte aligned)
			
 
				+			//with relatively little performance gain
			
 
				+			//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
			
 
				+			//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
			
 
				+			//#define B3_USE_SSE_IN_API
			
 
				+			#endif //B3_USE_SSE
			
 
				+			#include <emmintrin.h>
			
 
				+#endif
			
 
				+
			
 
				+		#endif//_XBOX
			
 
				+
			
 
				+		#endif //__MINGW32__
			
 
				+
			
 
				+#ifdef B3_DEBUG
			
 
				+	#ifdef _MSC_VER
			
 
				+		#include <stdio.h>
			
 
				+		#define b3Assert(x) { if(!(x)){b3Error("Assert "__FILE__ ":%u ("#x")\n", __LINE__);__debugbreak();	}}
			
 
				+	#else//_MSC_VER
			
 
				+		#include <assert.h>
			
 
				+		#define b3Assert assert
			
 
				+	#endif//_MSC_VER
			
 
				+#else
			
 
				+		#define b3Assert(x)
			
 
				+#endif
			
 
				+		//b3FullAssert is optional, slows down a lot
			
 
				+		#define b3FullAssert(x)
			
 
				+
			
 
				+		#define b3Likely(_c)  _c
			
 
				+		#define b3Unlikely(_c) _c
			
 
				+
			
 
				+#else
			
 
				+	
			
 
				+#if defined	(__CELLOS_LV2__)
			
 
				+		#define B3_FORCE_INLINE inline __attribute__((always_inline))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
			
 
				+		#ifndef assert
			
 
				+		#include <assert.h>
			
 
				+		#endif
			
 
				+#ifdef B3_DEBUG
			
 
				+#ifdef __SPU__
			
 
				+#include <spu_printf.h>
			
 
				+#define printf spu_printf
			
 
				+	#define b3Assert(x) {if(!(x)){b3Error("Assert "__FILE__ ":%u ("#x")\n", __LINE__);spu_hcmpeq(0,0);}}
			
 
				+#else
			
 
				+	#define b3Assert assert
			
 
				+#endif
			
 
				+	
			
 
				+#else
			
 
				+		#define b3Assert(x)
			
 
				+#endif
			
 
				+		//b3FullAssert is optional, slows down a lot
			
 
				+		#define b3FullAssert(x)
			
 
				+
			
 
				+		#define b3Likely(_c)  _c
			
 
				+		#define b3Unlikely(_c) _c
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#ifdef USE_LIBSPE2
			
 
				+
			
 
				+		#define B3_FORCE_INLINE __inline
			
 
				+		#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
			
 
				+		#ifndef assert
			
 
				+		#include <assert.h>
			
 
				+		#endif
			
 
				+#ifdef B3_DEBUG
			
 
				+		#define b3Assert assert
			
 
				+#else
			
 
				+		#define b3Assert(x)
			
 
				+#endif
			
 
				+		//b3FullAssert is optional, slows down a lot
			
 
				+		#define b3FullAssert(x)
			
 
				+
			
 
				+
			
 
				+		#define b3Likely(_c)   __builtin_expect((_c), 1)
			
 
				+		#define b3Unlikely(_c) __builtin_expect((_c), 0)
			
 
				+		
			
 
				+
			
 
				+#else
			
 
				+	//non-windows systems
			
 
				+
			
 
				+#if (defined (__APPLE__) && (!defined (B3_USE_DOUBLE_PRECISION)))
			
 
				+    #if defined (__i386__) || defined (__x86_64__)
			
 
				+        #define B3_USE_SSE
			
 
				+		//B3_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
			
 
				+		//if apps run into issues, we will disable the next line
			
 
				+		#define B3_USE_SSE_IN_API
			
 
				+        #ifdef B3_USE_SSE
			
 
				+            // include appropriate SSE level
			
 
				+            #if defined (__SSE4_1__)
			
 
				+                #include <smmintrin.h>
			
 
				+            #elif defined (__SSSE3__)
			
 
				+                #include <tmmintrin.h>
			
 
				+            #elif defined (__SSE3__)
			
 
				+                #include <pmmintrin.h>
			
 
				+            #else
			
 
				+                #include <emmintrin.h>
			
 
				+            #endif
			
 
				+        #endif //B3_USE_SSE
			
 
				+    #elif defined( __armv7__ )
			
 
				+        #ifdef __clang__
			
 
				+            #define B3_USE_NEON 1
			
 
				+
			
 
				+            #if defined B3_USE_NEON && defined (__clang__)
			
 
				+                #include <arm_neon.h>
			
 
				+            #endif//B3_USE_NEON
			
 
				+       #endif //__clang__
			
 
				+    #endif//__arm__
			
 
				+
			
 
				+	#define B3_FORCE_INLINE inline __attribute__ ((always_inline))
			
 
				+///@todo: check out alignment methods for other platforms/compilers
			
 
				+	#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
			
 
				+	#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
			
 
				+	#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
			
 
				+	#ifndef assert
			
 
				+	#include <assert.h>
			
 
				+	#endif
			
 
				+
			
 
				+	#if defined(DEBUG) || defined (_DEBUG)
			
 
				+	 #if defined (__i386__) || defined (__x86_64__)
			
 
				+	#include <stdio.h>
			
 
				+	 #define b3Assert(x)\
			
 
				+	{\
			
 
				+	if(!(x))\
			
 
				+	{\
			
 
				+		b3Error("Assert %s in line %d, file %s\n",#x, __LINE__, __FILE__);\
			
 
				+		asm volatile ("int3");\
			
 
				+	}\
			
 
				+	}
			
 
				+	#else//defined (__i386__) || defined (__x86_64__)
			
 
				+		#define b3Assert assert
			
 
				+	#endif//defined (__i386__) || defined (__x86_64__)
			
 
				+	#else//defined(DEBUG) || defined (_DEBUG)
			
 
				+		#define b3Assert(x)
			
 
				+	#endif//defined(DEBUG) || defined (_DEBUG)
			
 
				+
			
 
				+	//b3FullAssert is optional, slows down a lot
			
 
				+	#define b3FullAssert(x)
			
 
				+	#define b3Likely(_c)  _c
			
 
				+	#define b3Unlikely(_c) _c
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+		#define B3_FORCE_INLINE inline
			
 
				+		///@todo: check out alignment methods for other platforms/compilers
			
 
				+		#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
			
 
				+		#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
			
 
				+		///#define B3_ATTRIBUTE_ALIGNED16(a) a
			
 
				+		///#define B3_ATTRIBUTE_ALIGNED64(a) a
			
 
				+		///#define B3_ATTRIBUTE_ALIGNED128(a) a
			
 
				+		#ifndef assert
			
 
				+		#include <assert.h>
			
 
				+		#endif
			
 
				+
			
 
				+#if defined(DEBUG) || defined (_DEBUG)
			
 
				+		#define b3Assert assert
			
 
				+#else
			
 
				+		#define b3Assert(x)
			
 
				+#endif
			
 
				+
			
 
				+		//b3FullAssert is optional, slows down a lot
			
 
				+		#define b3FullAssert(x)
			
 
				+		#define b3Likely(_c)  _c
			
 
				+		#define b3Unlikely(_c) _c
			
 
				+#endif //__APPLE__ 
			
 
				+
			
 
				+#endif // LIBSPE2
			
 
				+
			
 
				+#endif	//__CELLOS_LV2__
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+///The b3Scalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
			
 
				+#if defined(B3_USE_DOUBLE_PRECISION)
			
 
				+typedef double b3Scalar;
			
 
				+//this number could be bigger in double precision
			
 
				+#define B3_LARGE_FLOAT 1e30
			
 
				+#else
			
 
				+typedef float b3Scalar;
			
 
				+//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
			
 
				+#define B3_LARGE_FLOAT 1e18f
			
 
				+#endif
			
 
				+
			
 
				+#ifdef B3_USE_SSE
			
 
				+typedef __m128 b3SimdFloat4;
			
 
				+#endif//B3_USE_SSE
			
 
				+
			
 
				+#if defined B3_USE_SSE_IN_API && defined (B3_USE_SSE)
			
 
				+#ifdef _WIN32
			
 
				+
			
 
				+#ifndef B3_NAN
			
 
				+static int b3NanMask = 0x7F800001;
			
 
				+#define B3_NAN (*(float*)&b3NanMask)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef B3_INFINITY_MASK
			
 
				+static  int b3InfinityMask = 0x7F800000;
			
 
				+#define B3_INFINITY_MASK (*(float*)&b3InfinityMask)
			
 
				+#endif
			
 
				+
			
 
				+inline __m128 operator + (const __m128 A, const __m128 B)
			
 
				+{
			
 
				+    return _mm_add_ps(A, B);
			
 
				+}
			
 
				+
			
 
				+inline __m128 operator - (const __m128 A, const __m128 B)
			
 
				+{
			
 
				+    return _mm_sub_ps(A, B);
			
 
				+}
			
 
				+
			
 
				+inline __m128 operator * (const __m128 A, const __m128 B)
			
 
				+{
			
 
				+    return _mm_mul_ps(A, B);
			
 
				+}
			
 
				+
			
 
				+#define b3CastfTo128i(a) (_mm_castps_si128(a))
			
 
				+#define b3CastfTo128d(a) (_mm_castps_pd(a))
			
 
				+#define b3CastiTo128f(a) (_mm_castsi128_ps(a))
			
 
				+#define b3CastdTo128f(a) (_mm_castpd_ps(a))
			
 
				+#define b3CastdTo128i(a) (_mm_castpd_si128(a))
			
 
				+#define b3Assign128(r0,r1,r2,r3) _mm_setr_ps(r0,r1,r2,r3)
			
 
				+
			
 
				+#else//_WIN32
			
 
				+
			
 
				+#define b3CastfTo128i(a) ((__m128i)(a))
			
 
				+#define b3CastfTo128d(a) ((__m128d)(a))
			
 
				+#define b3CastiTo128f(a)  ((__m128) (a))
			
 
				+#define b3CastdTo128f(a) ((__m128) (a))
			
 
				+#define b3CastdTo128i(a) ((__m128i)(a))
			
 
				+#define b3Assign128(r0,r1,r2,r3) (__m128){r0,r1,r2,r3}
			
 
				+#endif//_WIN32
			
 
				+#endif //B3_USE_SSE_IN_API
			
 
				+
			
 
				+#ifdef B3_USE_NEON
			
 
				+#include <arm_neon.h>
			
 
				+
			
 
				+typedef float32x4_t b3SimdFloat4;
			
 
				+#define B3_INFINITY INFINITY
			
 
				+#define B3_NAN NAN
			
 
				+#define b3Assign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#define B3_DECLARE_ALIGNED_ALLOCATOR() \
			
 
				+   B3_FORCE_INLINE void* operator new(size_t sizeInBytes)   { return b3AlignedAlloc(sizeInBytes,16); }   \
			
 
				+   B3_FORCE_INLINE void  operator delete(void* ptr)         { b3AlignedFree(ptr); }   \
			
 
				+   B3_FORCE_INLINE void* operator new(size_t, void* ptr)   { return ptr; }   \
			
 
				+   B3_FORCE_INLINE void  operator delete(void*, void*)      { }   \
			
 
				+   B3_FORCE_INLINE void* operator new[](size_t sizeInBytes)   { return b3AlignedAlloc(sizeInBytes,16); }   \
			
 
				+   B3_FORCE_INLINE void  operator delete[](void* ptr)         { b3AlignedFree(ptr); }   \
			
 
				+   B3_FORCE_INLINE void* operator new[](size_t, void* ptr)   { return ptr; }   \
			
 
				+   B3_FORCE_INLINE void  operator delete[](void*, void*)      { }   \
			
 
				+
			
 
				+
			
 
				+
			
 
				+#if defined(B3_USE_DOUBLE_PRECISION) || defined(B3_FORCE_DOUBLE_FUNCTIONS)
			
 
				+		
			
 
				+B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar x) { return sqrt(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabs(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cos(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sin(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tan(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) { if (x<b3Scalar(-1))	x=b3Scalar(-1); if (x>b3Scalar(1))	x=b3Scalar(1); return acos(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) { if (x<b3Scalar(-1))	x=b3Scalar(-1); if (x>b3Scalar(1))	x=b3Scalar(1); return asin(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atan(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2(x, y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return exp(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return log(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x,b3Scalar y) { return pow(x,y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x,b3Scalar y) { return fmod(x,y); }
			
 
				+
			
 
				+#else
			
 
				+		
			
 
				+B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar y) 
			
 
				+{ 
			
 
				+#ifdef USE_APPROXIMATION
			
 
				+    double x, z, tempf;
			
 
				+    unsigned long *tfptr = ((unsigned long *)&tempf) + 1;
			
 
				+
			
 
				+	tempf = y;
			
 
				+	*tfptr = (0xbfcdd90a - *tfptr)>>1; /* estimate of 1/sqrt(y) */
			
 
				+	x =  tempf;
			
 
				+	z =  y*b3Scalar(0.5);
			
 
				+	x = (b3Scalar(1.5)*x)-(x*x)*(x*z);         /* iteration formula     */
			
 
				+	x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
			
 
				+	x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
			
 
				+	x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
			
 
				+	x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
			
 
				+	return x*y;
			
 
				+#else
			
 
				+	return sqrtf(y); 
			
 
				+#endif
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabsf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cosf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sinf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tanf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) { 
			
 
				+	if (x<b3Scalar(-1))	
			
 
				+		x=b3Scalar(-1); 
			
 
				+	if (x>b3Scalar(1))	
			
 
				+		x=b3Scalar(1);
			
 
				+	return acosf(x); 
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) { 
			
 
				+	if (x<b3Scalar(-1))	
			
 
				+		x=b3Scalar(-1); 
			
 
				+	if (x>b3Scalar(1))	
			
 
				+		x=b3Scalar(1);
			
 
				+	return asinf(x); 
			
 
				+}
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atanf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2f(x, y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return expf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return logf(x); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x,b3Scalar y) { return powf(x,y); }
			
 
				+B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x,b3Scalar y) { return fmodf(x,y); }
			
 
				+	
			
 
				+#endif
			
 
				+
			
 
				+#define B3_2_PI         b3Scalar(6.283185307179586232)
			
 
				+#define B3_PI           (B3_2_PI * b3Scalar(0.5))
			
 
				+#define B3_HALF_PI      (B3_2_PI * b3Scalar(0.25))
			
 
				+#define B3_RADS_PER_DEG (B3_2_PI / b3Scalar(360.0))
			
 
				+#define B3_DEGS_PER_RAD  (b3Scalar(360.0) / B3_2_PI)
			
 
				+#define B3_SQRT12 b3Scalar(0.7071067811865475244008443621048490)
			
 
				+
			
 
				+#define b3RecipSqrt(x) ((b3Scalar)(b3Scalar(1.0)/b3Sqrt(b3Scalar(x))))		/* reciprocal square root */
			
 
				+
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define B3_EPSILON      DBL_EPSILON
			
 
				+#define B3_INFINITY     DBL_MAX
			
 
				+#else
			
 
				+#define B3_EPSILON      FLT_EPSILON
			
 
				+#define B3_INFINITY     FLT_MAX
			
 
				+#endif
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Atan2Fast(b3Scalar y, b3Scalar x) 
			
 
				+{
			
 
				+	b3Scalar coeff_1 = B3_PI / 4.0f;
			
 
				+	b3Scalar coeff_2 = 3.0f * coeff_1;
			
 
				+	b3Scalar abs_y = b3Fabs(y);
			
 
				+	b3Scalar angle;
			
 
				+	if (x >= 0.0f) {
			
 
				+		b3Scalar r = (x - abs_y) / (x + abs_y);
			
 
				+		angle = coeff_1 - coeff_1 * r;
			
 
				+	} else {
			
 
				+		b3Scalar r = (x + abs_y) / (abs_y - x);
			
 
				+		angle = coeff_2 - coeff_1 * r;
			
 
				+	}
			
 
				+	return (y < 0.0f) ? -angle : angle;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE bool      b3FuzzyZero(b3Scalar x) { return b3Fabs(x) < B3_EPSILON; }
			
 
				+
			
 
				+B3_FORCE_INLINE bool	b3Equal(b3Scalar a, b3Scalar eps) {
			
 
				+	return (((a) <= eps) && !((a) < -eps));
			
 
				+}
			
 
				+B3_FORCE_INLINE bool	b3GreaterEqual (b3Scalar a, b3Scalar eps) {
			
 
				+	return (!((a) <= eps));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE int       b3IsNegative(b3Scalar x) {
			
 
				+    return x < b3Scalar(0.0) ? 1 : 0;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Radians(b3Scalar x) { return x * B3_RADS_PER_DEG; }
			
 
				+B3_FORCE_INLINE b3Scalar b3Degrees(b3Scalar x) { return x * B3_DEGS_PER_RAD; }
			
 
				+
			
 
				+#define B3_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name
			
 
				+
			
 
				+#ifndef b3Fsel
			
 
				+B3_FORCE_INLINE b3Scalar b3Fsel(b3Scalar a, b3Scalar b, b3Scalar c)
			
 
				+{
			
 
				+	return a >= 0 ? b : c;
			
 
				+}
			
 
				+#endif
			
 
				+#define b3Fsels(a,b,c) (b3Scalar)b3Fsel(a,b,c)
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3MachineIsLittleEndian()
			
 
				+{
			
 
				+   long int i = 1;
			
 
				+   const char *p = (const char *) &i;
			
 
				+   if (p[0] == 1)  // Lowest address contains the least significant byte
			
 
				+	   return true;
			
 
				+   else
			
 
				+	   return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+///b3Select avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360
			
 
				+///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html
			
 
				+B3_FORCE_INLINE unsigned b3Select(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero) 
			
 
				+{
			
 
				+    // Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero
			
 
				+    // Rely on positive value or'ed with its negative having sign bit on
			
 
				+    // and zero value or'ed with its negative (which is still zero) having sign bit off 
			
 
				+    // Use arithmetic shift right, shifting the sign bit through all 32 bits
			
 
				+    unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
			
 
				+    unsigned testEqz = ~testNz;
			
 
				+    return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz)); 
			
 
				+}
			
 
				+B3_FORCE_INLINE int b3Select(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero)
			
 
				+{
			
 
				+    unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
			
 
				+    unsigned testEqz = ~testNz; 
			
 
				+    return static_cast<int>((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
			
 
				+}
			
 
				+B3_FORCE_INLINE float b3Select(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero)
			
 
				+{
			
 
				+#ifdef B3_HAVE_NATIVE_FSEL
			
 
				+    return (float)b3Fsel((b3Scalar)condition - b3Scalar(1.0f), valueIfConditionNonZero, valueIfConditionZero);
			
 
				+#else
			
 
				+    return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero; 
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+template<typename T> B3_FORCE_INLINE void b3Swap(T& a, T& b)
			
 
				+{
			
 
				+	T tmp = a;
			
 
				+	a = b;
			
 
				+	b = tmp;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//PCK: endian swapping functions
			
 
				+B3_FORCE_INLINE unsigned b3SwapEndian(unsigned val)
			
 
				+{
			
 
				+	return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8)  | ((val & 0x000000ff) << 24));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE unsigned short b3SwapEndian(unsigned short val)
			
 
				+{
			
 
				+	return static_cast<unsigned short>(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8));
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE unsigned b3SwapEndian(int val)
			
 
				+{
			
 
				+	return b3SwapEndian((unsigned)val);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE unsigned short b3SwapEndian(short val)
			
 
				+{
			
 
				+	return b3SwapEndian((unsigned short) val);
			
 
				+}
			
 
				+
			
 
				+///b3SwapFloat uses using char pointers to swap the endianness
			
 
				+////b3SwapFloat/b3SwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values
			
 
				+///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754. 
			
 
				+///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception. 
			
 
				+///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you. 
			
 
				+///so instead of returning a float/double, we return integer/long long integer
			
 
				+B3_FORCE_INLINE unsigned int  b3SwapEndianFloat(float d)
			
 
				+{
			
 
				+    unsigned int a = 0;
			
 
				+    unsigned char *dst = (unsigned char *)&a;
			
 
				+    unsigned char *src = (unsigned char *)&d;
			
 
				+
			
 
				+    dst[0] = src[3];
			
 
				+    dst[1] = src[2];
			
 
				+    dst[2] = src[1];
			
 
				+    dst[3] = src[0];
			
 
				+    return a;
			
 
				+}
			
 
				+
			
 
				+// unswap using char pointers
			
 
				+B3_FORCE_INLINE float b3UnswapEndianFloat(unsigned int a) 
			
 
				+{
			
 
				+    float d = 0.0f;
			
 
				+    unsigned char *src = (unsigned char *)&a;
			
 
				+    unsigned char *dst = (unsigned char *)&d;
			
 
				+
			
 
				+    dst[0] = src[3];
			
 
				+    dst[1] = src[2];
			
 
				+    dst[2] = src[1];
			
 
				+    dst[3] = src[0];
			
 
				+
			
 
				+    return d;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// swap using char pointers
			
 
				+B3_FORCE_INLINE void  b3SwapEndianDouble(double d, unsigned char* dst)
			
 
				+{
			
 
				+    unsigned char *src = (unsigned char *)&d;
			
 
				+
			
 
				+    dst[0] = src[7];
			
 
				+    dst[1] = src[6];
			
 
				+    dst[2] = src[5];
			
 
				+    dst[3] = src[4];
			
 
				+    dst[4] = src[3];
			
 
				+    dst[5] = src[2];
			
 
				+    dst[6] = src[1];
			
 
				+    dst[7] = src[0];
			
 
				+
			
 
				+}
			
 
				+
			
 
				+// unswap using char pointers
			
 
				+B3_FORCE_INLINE double b3UnswapEndianDouble(const unsigned char *src) 
			
 
				+{
			
 
				+    double d = 0.0;
			
 
				+    unsigned char *dst = (unsigned char *)&d;
			
 
				+
			
 
				+    dst[0] = src[7];
			
 
				+    dst[1] = src[6];
			
 
				+    dst[2] = src[5];
			
 
				+    dst[3] = src[4];
			
 
				+    dst[4] = src[3];
			
 
				+    dst[5] = src[2];
			
 
				+    dst[6] = src[1];
			
 
				+    dst[7] = src[0];
			
 
				+
			
 
				+	return d;
			
 
				+}
			
 
				+
			
 
				+// returns normalized value in range [-B3_PI, B3_PI]
			
 
				+B3_FORCE_INLINE b3Scalar b3NormalizeAngle(b3Scalar angleInRadians) 
			
 
				+{
			
 
				+	angleInRadians = b3Fmod(angleInRadians, B3_2_PI);
			
 
				+	if(angleInRadians < -B3_PI)
			
 
				+	{
			
 
				+		return angleInRadians + B3_2_PI;
			
 
				+	}
			
 
				+	else if(angleInRadians > B3_PI)
			
 
				+	{
			
 
				+		return angleInRadians - B3_2_PI;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return angleInRadians;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+///rudimentary class to provide type info
			
 
				+struct b3TypedObject
			
 
				+{
			
 
				+	b3TypedObject(int objectType)
			
 
				+		:m_objectType(objectType)
			
 
				+	{
			
 
				+	}
			
 
				+	int	m_objectType;
			
 
				+	inline int getObjectType() const
			
 
				+	{
			
 
				+		return m_objectType;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+  
			
 
				+///align a pointer to the provided alignment, upwards
			
 
				+template <typename T>T* b3AlignPointer(T* unalignedPtr, size_t alignment)
			
 
				+{
			
 
				+		
			
 
				+	struct b3ConvertPointerSizeT
			
 
				+	{
			
 
				+		union 
			
 
				+		{
			
 
				+				T* ptr;
			
 
				+				size_t integer;
			
 
				+		};
			
 
				+	};
			
 
				+    b3ConvertPointerSizeT converter;
			
 
				+    
			
 
				+    
			
 
				+	const size_t bit_mask = ~(alignment - 1);
			
 
				+    converter.ptr = unalignedPtr;
			
 
				+	converter.integer += alignment-1;
			
 
				+	converter.integer &= bit_mask;
			
 
				+	return converter.ptr;
			
 
				+}
			
 
				+
			
 
				+#endif //B3_SCALAR_H
			
--- a/include/Bullet3Common/b3StackAlloc.h
+++ b/include/Bullet3Common/b3StackAlloc.h
@@ -0,0 +1,116 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+/*
			
 
				+StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson
			
 
				+Nov.2006
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_STACK_ALLOC
			
 
				+#define B3_STACK_ALLOC
			
 
				+
			
 
				+#include "b3Scalar.h" //for b3Assert
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+///The b3Block class is an internal structure for the b3StackAlloc memory allocator.
			
 
				+struct b3Block
			
 
				+{
			
 
				+	b3Block*			previous;
			
 
				+	unsigned char*		address;
			
 
				+};
			
 
				+
			
 
				+///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out)
			
 
				+class b3StackAlloc
			
 
				+{
			
 
				+public:
			
 
				+
			
 
				+	b3StackAlloc(unsigned int size)	{ ctor();create(size); }
			
 
				+	~b3StackAlloc()		{ destroy(); }
			
 
				+	
			
 
				+	inline void		create(unsigned int size)
			
 
				+	{
			
 
				+		destroy();
			
 
				+		data		=  (unsigned char*) b3AlignedAlloc(size,16);
			
 
				+		totalsize	=	size;
			
 
				+	}
			
 
				+	inline void		destroy()
			
 
				+	{
			
 
				+		b3Assert(usedsize==0);
			
 
				+		//Raise(L"StackAlloc is still in use");
			
 
				+
			
 
				+		if(usedsize==0)
			
 
				+		{
			
 
				+			if(!ischild && data)		
			
 
				+				b3AlignedFree(data);
			
 
				+
			
 
				+			data				=	0;
			
 
				+			usedsize			=	0;
			
 
				+		}
			
 
				+		
			
 
				+	}
			
 
				+
			
 
				+	int	getAvailableMemory() const
			
 
				+	{
			
 
				+		return static_cast<int>(totalsize - usedsize);
			
 
				+	}
			
 
				+
			
 
				+	unsigned char*			allocate(unsigned int size)
			
 
				+	{
			
 
				+		const unsigned int	nus(usedsize+size);
			
 
				+		if(nus<totalsize)
			
 
				+		{
			
 
				+			usedsize=nus;
			
 
				+			return(data+(usedsize-size));
			
 
				+		}
			
 
				+		b3Assert(0);
			
 
				+		//&& (L"Not enough memory"));
			
 
				+		
			
 
				+		return(0);
			
 
				+	}
			
 
				+	B3_FORCE_INLINE b3Block*		beginBlock()
			
 
				+	{
			
 
				+		b3Block*	pb = (b3Block*)allocate(sizeof(b3Block));
			
 
				+		pb->previous	=	current;
			
 
				+		pb->address		=	data+usedsize;
			
 
				+		current			=	pb;
			
 
				+		return(pb);
			
 
				+	}
			
 
				+	B3_FORCE_INLINE void		endBlock(b3Block* block)
			
 
				+	{
			
 
				+		b3Assert(block==current);
			
 
				+		//Raise(L"Unmatched blocks");
			
 
				+		if(block==current)
			
 
				+		{
			
 
				+			current		=	block->previous;
			
 
				+			usedsize	=	(unsigned int)((block->address-data)-sizeof(b3Block));
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+private:
			
 
				+	void		ctor()
			
 
				+	{
			
 
				+		data		=	0;
			
 
				+		totalsize	=	0;
			
 
				+		usedsize	=	0;
			
 
				+		current		=	0;
			
 
				+		ischild		=	false;
			
 
				+	}
			
 
				+	unsigned char*		data;
			
 
				+	unsigned int		totalsize;
			
 
				+	unsigned int		usedsize;
			
 
				+	b3Block*	current;
			
 
				+	bool		ischild;
			
 
				+};
			
 
				+
			
 
				+#endif //B3_STACK_ALLOC
			
--- a/include/Bullet3Common/b3Transform.h
+++ b/include/Bullet3Common/b3Transform.h
@@ -0,0 +1,304 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_TRANSFORM_H
			
 
				+#define B3_TRANSFORM_H
			
 
				+
			
 
				+
			
 
				+#include "b3Matrix3x3.h"
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3TransformData b3TransformDoubleData
			
 
				+#else
			
 
				+#define b3TransformData b3TransformFloatData
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/**@brief The b3Transform class supports rigid transforms with only translation and rotation and no scaling/shear.
			
 
				+ *It can be used in combination with b3Vector3, b3Quaternion and b3Matrix3x3 linear algebra classes. */
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3Transform {
			
 
				+	
			
 
				+  ///Storage for the rotation
			
 
				+	b3Matrix3x3 m_basis;
			
 
				+  ///Storage for the translation
			
 
				+	b3Vector3   m_origin;
			
 
				+
			
 
				+public:
			
 
				+	
			
 
				+  /**@brief No initialization constructor */
			
 
				+	b3Transform() {}
			
 
				+  /**@brief Constructor from b3Quaternion (optional b3Vector3 )
			
 
				+   * @param q Rotation from quaternion 
			
 
				+   * @param c Translation from Vector (default 0,0,0) */
			
 
				+	explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q, 
			
 
				+		const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0))) 
			
 
				+		: m_basis(q),
			
 
				+		m_origin(c)
			
 
				+	{}
			
 
				+
			
 
				+  /**@brief Constructor from b3Matrix3x3 (optional b3Vector3)
			
 
				+   * @param b Rotation from Matrix 
			
 
				+   * @param c Translation from Vector default (0,0,0)*/
			
 
				+	explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b, 
			
 
				+		const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
			
 
				+		: m_basis(b),
			
 
				+		m_origin(c)
			
 
				+	{}
			
 
				+  /**@brief Copy constructor */
			
 
				+	B3_FORCE_INLINE b3Transform (const b3Transform& other)
			
 
				+		: m_basis(other.m_basis),
			
 
				+		m_origin(other.m_origin)
			
 
				+	{
			
 
				+	}
			
 
				+  /**@brief Assignment Operator */
			
 
				+	B3_FORCE_INLINE b3Transform& operator=(const b3Transform& other)
			
 
				+	{
			
 
				+		m_basis = other.m_basis;
			
 
				+		m_origin = other.m_origin;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+  /**@brief Set the current transform as the value of the product of two transforms
			
 
				+   * @param t1 Transform 1
			
 
				+   * @param t2 Transform 2
			
 
				+   * This = Transform1 * Transform2 */
			
 
				+		B3_FORCE_INLINE void mult(const b3Transform& t1, const b3Transform& t2) {
			
 
				+			m_basis = t1.m_basis * t2.m_basis;
			
 
				+			m_origin = t1(t2.m_origin);
			
 
				+		}
			
 
				+
			
 
				+/*		void multInverseLeft(const b3Transform& t1, const b3Transform& t2) {
			
 
				+			b3Vector3 v = t2.m_origin - t1.m_origin;
			
 
				+			m_basis = b3MultTransposeLeft(t1.m_basis, t2.m_basis);
			
 
				+			m_origin = v * t1.m_basis;
			
 
				+		}
			
 
				+		*/
			
 
				+
			
 
				+/**@brief Return the transform of the vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 operator()(const b3Vector3& x) const
			
 
				+	{
			
 
				+        return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the transform of the vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& x) const
			
 
				+	{
			
 
				+		return (*this)(x);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the transform of the b3Quaternion */
			
 
				+	B3_FORCE_INLINE b3Quaternion operator*(const b3Quaternion& q) const
			
 
				+	{
			
 
				+		return getRotation() * q;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the basis matrix for the rotation */
			
 
				+	B3_FORCE_INLINE b3Matrix3x3&       getBasis()          { return m_basis; }
			
 
				+  /**@brief Return the basis matrix for the rotation */
			
 
				+	B3_FORCE_INLINE const b3Matrix3x3& getBasis()    const { return m_basis; }
			
 
				+
			
 
				+  /**@brief Return the origin vector translation */
			
 
				+	B3_FORCE_INLINE b3Vector3&         getOrigin()         { return m_origin; }
			
 
				+  /**@brief Return the origin vector translation */
			
 
				+	B3_FORCE_INLINE const b3Vector3&   getOrigin()   const { return m_origin; }
			
 
				+
			
 
				+  /**@brief Return a quaternion representing the rotation */
			
 
				+	b3Quaternion getRotation() const { 
			
 
				+		b3Quaternion q;
			
 
				+		m_basis.getRotation(q);
			
 
				+		return q;
			
 
				+	}
			
 
				+	
			
 
				+	
			
 
				+  /**@brief Set from an array 
			
 
				+   * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
			
 
				+	void setFromOpenGLMatrix(const b3Scalar *m)
			
 
				+	{
			
 
				+		m_basis.setFromOpenGLSubMatrix(m);
			
 
				+		m_origin.setValue(m[12],m[13],m[14]);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Fill an array representation
			
 
				+   * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
			
 
				+	void getOpenGLMatrix(b3Scalar *m) const 
			
 
				+	{
			
 
				+		m_basis.getOpenGLSubMatrix(m);
			
 
				+		m[12] = m_origin.getX();
			
 
				+		m[13] = m_origin.getY();
			
 
				+		m[14] = m_origin.getZ();
			
 
				+		m[15] = b3Scalar(1.0);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Set the translational element
			
 
				+   * @param origin The vector to set the translation to */
			
 
				+	B3_FORCE_INLINE void setOrigin(const b3Vector3& origin) 
			
 
				+	{ 
			
 
				+		m_origin = origin;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Vector3 invXform(const b3Vector3& inVec) const;
			
 
				+
			
 
				+
			
 
				+  /**@brief Set the rotational element by b3Matrix3x3 */
			
 
				+	B3_FORCE_INLINE void setBasis(const b3Matrix3x3& basis)
			
 
				+	{ 
			
 
				+		m_basis = basis;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Set the rotational element by b3Quaternion */
			
 
				+	B3_FORCE_INLINE void setRotation(const b3Quaternion& q)
			
 
				+	{
			
 
				+		m_basis.setRotation(q);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+  /**@brief Set this transformation to the identity */
			
 
				+	void setIdentity()
			
 
				+	{
			
 
				+		m_basis.setIdentity();
			
 
				+		m_origin.setValue(b3Scalar(0.0), b3Scalar(0.0), b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Multiply this Transform by another(this = this * another) 
			
 
				+   * @param t The other transform */
			
 
				+	b3Transform& operator*=(const b3Transform& t) 
			
 
				+	{
			
 
				+		m_origin += m_basis * t.m_origin;
			
 
				+		m_basis *= t.m_basis;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the inverse of this transform */
			
 
				+	b3Transform inverse() const
			
 
				+	{ 
			
 
				+		b3Matrix3x3 inv = m_basis.transpose();
			
 
				+		return b3Transform(inv, inv * -m_origin);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the inverse of this transform times the other transform
			
 
				+   * @param t The other transform 
			
 
				+   * return this.inverse() * the other */
			
 
				+	b3Transform inverseTimes(const b3Transform& t) const;  
			
 
				+
			
 
				+  /**@brief Return the product of this transform and the other */
			
 
				+	b3Transform operator*(const b3Transform& t) const;
			
 
				+
			
 
				+  /**@brief Return an identity transform */
			
 
				+	static const b3Transform&	getIdentity()
			
 
				+	{
			
 
				+		static const b3Transform identityTransform(b3Matrix3x3::getIdentity());
			
 
				+		return identityTransform;
			
 
				+	}
			
 
				+
			
 
				+	void	serialize(struct	b3TransformData& dataOut) const;
			
 
				+
			
 
				+	void	serializeFloat(struct	b3TransformFloatData& dataOut) const;
			
 
				+
			
 
				+	void	deSerialize(const struct	b3TransformData& dataIn);
			
 
				+
			
 
				+	void	deSerializeDouble(const struct	b3TransformDoubleData& dataIn);
			
 
				+
			
 
				+	void	deSerializeFloat(const struct	b3TransformFloatData& dataIn);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+b3Transform::invXform(const b3Vector3& inVec) const
			
 
				+{
			
 
				+	b3Vector3 v = inVec - m_origin;
			
 
				+	return (m_basis.transpose() * v);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Transform 
			
 
				+b3Transform::inverseTimes(const b3Transform& t) const  
			
 
				+{
			
 
				+	b3Vector3 v = t.getOrigin() - m_origin;
			
 
				+		return b3Transform(m_basis.transposeTimes(t.m_basis),
			
 
				+			v * m_basis);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Transform 
			
 
				+b3Transform::operator*(const b3Transform& t) const
			
 
				+{
			
 
				+	return b3Transform(m_basis * t.m_basis, 
			
 
				+		(*this)(t.m_origin));
			
 
				+}
			
 
				+
			
 
				+/**@brief Test if two transforms have all elements equal */
			
 
				+B3_FORCE_INLINE bool operator==(const b3Transform& t1, const b3Transform& t2)
			
 
				+{
			
 
				+   return ( t1.getBasis()  == t2.getBasis() &&
			
 
				+            t1.getOrigin() == t2.getOrigin() );
			
 
				+}
			
 
				+
			
 
				+
			
 
				+///for serialization
			
 
				+struct	b3TransformFloatData
			
 
				+{
			
 
				+	b3Matrix3x3FloatData	m_basis;
			
 
				+	b3Vector3FloatData	m_origin;
			
 
				+};
			
 
				+
			
 
				+struct	b3TransformDoubleData
			
 
				+{
			
 
				+	b3Matrix3x3DoubleData	m_basis;
			
 
				+	b3Vector3DoubleData	m_origin;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Transform::serialize(b3TransformData& dataOut) const
			
 
				+{
			
 
				+	m_basis.serialize(dataOut.m_basis);
			
 
				+	m_origin.serialize(dataOut.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Transform::serializeFloat(b3TransformFloatData& dataOut) const
			
 
				+{
			
 
				+	m_basis.serializeFloat(dataOut.m_basis);
			
 
				+	m_origin.serializeFloat(dataOut.m_origin);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Transform::deSerialize(const b3TransformData& dataIn)
			
 
				+{
			
 
				+	m_basis.deSerialize(dataIn.m_basis);
			
 
				+	m_origin.deSerialize(dataIn.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Transform::deSerializeFloat(const b3TransformFloatData& dataIn)
			
 
				+{
			
 
				+	m_basis.deSerializeFloat(dataIn.m_basis);
			
 
				+	m_origin.deSerializeFloat(dataIn.m_origin);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Transform::deSerializeDouble(const b3TransformDoubleData& dataIn)
			
 
				+{
			
 
				+	m_basis.deSerializeDouble(dataIn.m_basis);
			
 
				+	m_origin.deSerializeDouble(dataIn.m_origin);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif //B3_TRANSFORM_H
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/include/Bullet3Common/b3TransformUtil.h
+++ b/include/Bullet3Common/b3TransformUtil.h
@@ -0,0 +1,228 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_TRANSFORM_UTIL_H
			
 
				+#define B3_TRANSFORM_UTIL_H
			
 
				+
			
 
				+#include "b3Transform.h"
			
 
				+#define B3_ANGULAR_MOTION_THRESHOLD b3Scalar(0.5)*B3_HALF_PI
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents,const b3Vector3& supportDir)
			
 
				+{
			
 
				+	return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(),
			
 
				+      supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(),
			
 
				+      supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ()); 
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+/// Utils related to temporal transforms
			
 
				+class b3TransformUtil
			
 
				+{
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	static void integrateTransform(const b3Transform& curTrans,const b3Vector3& linvel,const b3Vector3& angvel,b3Scalar timeStep,b3Transform& predictedTransform)
			
 
				+	{
			
 
				+		predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep);
			
 
				+//	#define QUATERNION_DERIVATIVE
			
 
				+	#ifdef QUATERNION_DERIVATIVE
			
 
				+		b3Quaternion predictedOrn = curTrans.getRotation();
			
 
				+		predictedOrn += (angvel * predictedOrn) * (timeStep * b3Scalar(0.5));
			
 
				+		predictedOrn.normalize();
			
 
				+	#else
			
 
				+		//Exponential map
			
 
				+		//google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia
			
 
				+
			
 
				+		b3Vector3 axis;
			
 
				+		b3Scalar	fAngle = angvel.length(); 
			
 
				+		//limit the angular motion
			
 
				+		if (fAngle*timeStep > B3_ANGULAR_MOTION_THRESHOLD)
			
 
				+		{
			
 
				+			fAngle = B3_ANGULAR_MOTION_THRESHOLD / timeStep;
			
 
				+		}
			
 
				+
			
 
				+		if ( fAngle < b3Scalar(0.001) )
			
 
				+		{
			
 
				+			// use Taylor's expansions of sync function
			
 
				+			axis   = angvel*( b3Scalar(0.5)*timeStep-(timeStep*timeStep*timeStep)*(b3Scalar(0.020833333333))*fAngle*fAngle );
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			// sync(fAngle) = sin(c*fAngle)/t
			
 
				+			axis   = angvel*( b3Sin(b3Scalar(0.5)*fAngle*timeStep)/fAngle );
			
 
				+		}
			
 
				+		b3Quaternion dorn (axis.getX(),axis.getY(),axis.getZ(),b3Cos( fAngle*timeStep*b3Scalar(0.5) ));
			
 
				+		b3Quaternion orn0 = curTrans.getRotation();
			
 
				+
			
 
				+		b3Quaternion predictedOrn = dorn * orn0;
			
 
				+		predictedOrn.normalize();
			
 
				+	#endif
			
 
				+		predictedTransform.setRotation(predictedOrn);
			
 
				+	}
			
 
				+
			
 
				+	static void	calculateVelocityQuaternion(const b3Vector3& pos0,const b3Vector3& pos1,const b3Quaternion& orn0,const b3Quaternion& orn1,b3Scalar timeStep,b3Vector3& linVel,b3Vector3& angVel)
			
 
				+	{
			
 
				+		linVel = (pos1 - pos0) / timeStep;
			
 
				+		b3Vector3 axis;
			
 
				+		b3Scalar  angle;
			
 
				+		if (orn0 != orn1)
			
 
				+		{
			
 
				+			calculateDiffAxisAngleQuaternion(orn0,orn1,axis,angle);
			
 
				+			angVel = axis * angle / timeStep;
			
 
				+		} else
			
 
				+		{
			
 
				+			angVel.setValue(0,0,0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	static void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0,const b3Quaternion& orn1a,b3Vector3& axis,b3Scalar& angle)
			
 
				+	{
			
 
				+		b3Quaternion orn1 = orn0.nearest(orn1a);
			
 
				+		b3Quaternion dorn = orn1 * orn0.inverse();
			
 
				+		angle = dorn.getAngle();
			
 
				+		axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ());
			
 
				+		axis[3] = b3Scalar(0.);
			
 
				+		//check for axis length
			
 
				+		b3Scalar len = axis.length2();
			
 
				+		if (len < B3_EPSILON*B3_EPSILON)
			
 
				+			axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
			
 
				+		else
			
 
				+			axis /= b3Sqrt(len);
			
 
				+	}
			
 
				+
			
 
				+	static void	calculateVelocity(const b3Transform& transform0,const b3Transform& transform1,b3Scalar timeStep,b3Vector3& linVel,b3Vector3& angVel)
			
 
				+	{
			
 
				+		linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep;
			
 
				+		b3Vector3 axis;
			
 
				+		b3Scalar  angle;
			
 
				+		calculateDiffAxisAngle(transform0,transform1,axis,angle);
			
 
				+		angVel = axis * angle / timeStep;
			
 
				+	}
			
 
				+
			
 
				+	static void calculateDiffAxisAngle(const b3Transform& transform0,const b3Transform& transform1,b3Vector3& axis,b3Scalar& angle)
			
 
				+	{
			
 
				+		b3Matrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse();
			
 
				+		b3Quaternion dorn;
			
 
				+		dmat.getRotation(dorn);
			
 
				+
			
 
				+		///floating point inaccuracy can lead to w component > 1..., which breaks 
			
 
				+		dorn.normalize();
			
 
				+		
			
 
				+		angle = dorn.getAngle();
			
 
				+		axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ());
			
 
				+		axis[3] = b3Scalar(0.);
			
 
				+		//check for axis length
			
 
				+		b3Scalar len = axis.length2();
			
 
				+		if (len < B3_EPSILON*B3_EPSILON)
			
 
				+			axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
			
 
				+		else
			
 
				+			axis /= b3Sqrt(len);
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+///The b3ConvexSeparatingDistanceUtil can help speed up convex collision detection 
			
 
				+///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance
			
 
				+class	b3ConvexSeparatingDistanceUtil
			
 
				+{
			
 
				+	b3Quaternion	m_ornA;
			
 
				+	b3Quaternion	m_ornB;
			
 
				+	b3Vector3	m_posA;
			
 
				+	b3Vector3	m_posB;
			
 
				+	
			
 
				+	b3Vector3	m_separatingNormal;
			
 
				+
			
 
				+	b3Scalar	m_boundingRadiusA;
			
 
				+	b3Scalar	m_boundingRadiusB;
			
 
				+	b3Scalar	m_separatingDistance;
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	b3ConvexSeparatingDistanceUtil(b3Scalar	boundingRadiusA,b3Scalar	boundingRadiusB)
			
 
				+		:m_boundingRadiusA(boundingRadiusA),
			
 
				+		m_boundingRadiusB(boundingRadiusB),
			
 
				+		m_separatingDistance(0.f)
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar	getConservativeSeparatingDistance()
			
 
				+	{
			
 
				+		return m_separatingDistance;
			
 
				+	}
			
 
				+
			
 
				+	void	updateSeparatingDistance(const b3Transform& transA,const b3Transform& transB)
			
 
				+	{
			
 
				+		const b3Vector3& toPosA = transA.getOrigin();
			
 
				+		const b3Vector3& toPosB = transB.getOrigin();
			
 
				+		b3Quaternion toOrnA = transA.getRotation();
			
 
				+		b3Quaternion toOrnB = transB.getRotation();
			
 
				+
			
 
				+		if (m_separatingDistance>0.f)
			
 
				+		{
			
 
				+			
			
 
				+
			
 
				+			b3Vector3 linVelA,angVelA,linVelB,angVelB;
			
 
				+			b3TransformUtil::calculateVelocityQuaternion(m_posA,toPosA,m_ornA,toOrnA,b3Scalar(1.),linVelA,angVelA);
			
 
				+			b3TransformUtil::calculateVelocityQuaternion(m_posB,toPosB,m_ornB,toOrnB,b3Scalar(1.),linVelB,angVelB);
			
 
				+			b3Scalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB;
			
 
				+			b3Vector3 relLinVel = (linVelB-linVelA);
			
 
				+			b3Scalar relLinVelocLength = relLinVel.dot(m_separatingNormal);
			
 
				+			if (relLinVelocLength<0.f)
			
 
				+			{
			
 
				+				relLinVelocLength = 0.f;
			
 
				+			}
			
 
				+	
			
 
				+			b3Scalar	projectedMotion = maxAngularProjectedVelocity +relLinVelocLength;
			
 
				+			m_separatingDistance -= projectedMotion;
			
 
				+		}
			
 
				+	
			
 
				+		m_posA = toPosA;
			
 
				+		m_posB = toPosB;
			
 
				+		m_ornA = toOrnA;
			
 
				+		m_ornB = toOrnB;
			
 
				+	}
			
 
				+
			
 
				+	void	initSeparatingDistance(const b3Vector3& separatingVector,b3Scalar separatingDistance,const b3Transform& transA,const b3Transform& transB)
			
 
				+	{
			
 
				+		m_separatingDistance = separatingDistance;
			
 
				+
			
 
				+		if (m_separatingDistance>0.f)
			
 
				+		{
			
 
				+			m_separatingNormal = separatingVector;
			
 
				+			
			
 
				+			const b3Vector3& toPosA = transA.getOrigin();
			
 
				+			const b3Vector3& toPosB = transB.getOrigin();
			
 
				+			b3Quaternion toOrnA = transA.getRotation();
			
 
				+			b3Quaternion toOrnB = transB.getRotation();
			
 
				+			m_posA = toPosA;
			
 
				+			m_posB = toPosB;
			
 
				+			m_ornA = toOrnA;
			
 
				+			m_ornB = toOrnB;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_TRANSFORM_UTIL_H
			
 
				+
			
--- a/include/Bullet3Common/b3Vector3.h
+++ b/include/Bullet3Common/b3Vector3.h
@@ -0,0 +1,1345 @@
 
				+/*
			
 
				+Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_VECTOR3_H
			
 
				+#define B3_VECTOR3_H
			
 
				+
			
 
				+//#include <stdint.h>
			
 
				+#include "b3Scalar.h"
			
 
				+#include "b3MinMax.h"
			
 
				+#include "b3AlignedAllocator.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3Vector3Data b3Vector3DoubleData
			
 
				+#define b3Vector3DataName "b3Vector3DoubleData"
			
 
				+#else
			
 
				+#define b3Vector3Data b3Vector3FloatData
			
 
				+#define b3Vector3DataName "b3Vector3FloatData"
			
 
				+#endif //B3_USE_DOUBLE_PRECISION
			
 
				+
			
 
				+#if defined B3_USE_SSE
			
 
				+
			
 
				+//typedef  uint32_t __m128i __attribute__ ((vector_size(16)));
			
 
				+
			
 
				+#ifdef _MSC_VER
			
 
				+#pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#define B3_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
			
 
				+//#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
			
 
				+#define b3_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
			
 
				+#define b3_splat3_ps( _a, _i ) b3_pshufd_ps((_a), B3_SHUFFLE(_i,_i,_i, 3) )
			
 
				+#define b3_splat_ps( _a, _i )  b3_pshufd_ps((_a), B3_SHUFFLE(_i,_i,_i,_i) )
			
 
				+
			
 
				+#define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
			
 
				+#define b3vAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
			
 
				+#define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
			
 
				+#define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask)
			
 
				+#define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask)
			
 
				+#define b3vxyzMaskf b3vFFF0fMask
			
 
				+#define b3vAbsfMask b3CastiTo128f(b3vAbsMask)
			
 
				+
			
 
				+
			
 
				+
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
			
 
				+const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifdef B3_USE_NEON
			
 
				+
			
 
				+const float32x4_t B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
			
 
				+const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
			
 
				+const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3vAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
			
 
				+const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+class b3Vector3;
			
 
				+class b3Vector4;
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+//#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
			
 
				+inline b3Vector3 b3MakeVector3( b3SimdFloat4 v);
			
 
				+inline b3Vector4 b3MakeVector4( b3SimdFloat4 vec);
			
 
				+#endif
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z);
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w);
			
 
				+inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w);
			
 
				+
			
 
				+
			
 
				+/**@brief b3Vector3 can be used to represent 3D points and vectors.
			
 
				+ * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
			
 
				+ * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
			
 
				+ */
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3Vector3
			
 
				+{
			
 
				+public:
			
 
				+#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
			
 
				+        union {
			
 
				+            b3SimdFloat4      mVec128;
			
 
				+            float	m_floats[4];
			
 
				+			struct {float x,y,z,w;};
			
 
				+			
			
 
				+        };
			
 
				+#else
			
 
				+	union
			
 
				+	{
			
 
				+        	float	m_floats[4];
			
 
				+			struct {float	x,y,z,w;};
			
 
				+	};
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM
			
 
				+
			
 
				+	/*B3_FORCE_INLINE		b3Vector3()
			
 
				+	{
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+    B3_FORCE_INLINE	b3SimdFloat4	get128() const
			
 
				+    {
			
 
				+        return mVec128;
			
 
				+    }
			
 
				+    B3_FORCE_INLINE	void	set128(b3SimdFloat4 v128)
			
 
				+    {
			
 
				+        mVec128 = v128;
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				+	public:
			
 
				+
			
 
				+  
			
 
				+    
			
 
				+/**@brief Add a vector to this one 
			
 
				+ * @param The vector to add to this one */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_add_ps(mVec128, v.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vaddq_f32(mVec128, v.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] += v.m_floats[0]; 
			
 
				+		m_floats[1] += v.m_floats[1];
			
 
				+		m_floats[2] += v.m_floats[2];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+  /**@brief Subtract a vector from this one
			
 
				+   * @param The vector to subtract */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v) 
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_sub_ps(mVec128, v.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vsubq_f32(mVec128, v.mVec128);
			
 
				+#else
			
 
				+		m_floats[0] -= v.m_floats[0]; 
			
 
				+		m_floats[1] -= v.m_floats[1];
			
 
				+		m_floats[2] -= v.m_floats[2];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+	
			
 
				+  /**@brief Scale the vector
			
 
				+   * @param s Scale factor */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vs);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmulq_n_f32(mVec128, s);
			
 
				+#else
			
 
				+		m_floats[0] *= s; 
			
 
				+		m_floats[1] *= s;
			
 
				+		m_floats[2] *= s;
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Inversely scale the vector 
			
 
				+   * @param s Scale factor to divide by */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s) 
			
 
				+	{
			
 
				+		b3FullAssert(s != b3Scalar(0.0));
			
 
				+
			
 
				+#if 0 //defined(B3_USE_SSE_IN_API)
			
 
				+// this code is not faster !
			
 
				+		__m128 vs = _mm_load_ss(&s);
			
 
				+		vs = _mm_div_ss(b3v1110, vs);
			
 
				+		vs = b3_pshufd_ps(vs, 0x00);	//	(S S S S)
			
 
				+
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vs);
			
 
				+		
			
 
				+		return *this;
			
 
				+#else
			
 
				+		return *this *= b3Scalar(1.0) / s;
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the dot product
			
 
				+   * @param v The other vector in the dot product */
			
 
				+	B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)		
			
 
				+		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
			
 
				+		__m128 z = _mm_movehl_ps(vd, vd);
			
 
				+		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, y);
			
 
				+		vd = _mm_add_ss(vd, z);
			
 
				+		return _mm_cvtss_f32(vd);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
			
 
				+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));  
			
 
				+		x = vadd_f32(x, vget_high_f32(vd));
			
 
				+		return vget_lane_f32(x, 0);
			
 
				+#else	
			
 
				+		return	m_floats[0] * v.m_floats[0] + 
			
 
				+				m_floats[1] * v.m_floats[1] + 
			
 
				+				m_floats[2] * v.m_floats[2];
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the length of the vector squared */
			
 
				+	B3_FORCE_INLINE b3Scalar length2() const
			
 
				+	{
			
 
				+		return dot(*this);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the length of the vector */
			
 
				+	B3_FORCE_INLINE b3Scalar length() const
			
 
				+	{
			
 
				+		return b3Sqrt(length2());
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the distance squared between the ends of this and another vector
			
 
				+   * This is symantically treating the vector like a point */
			
 
				+	B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const;
			
 
				+
			
 
				+  /**@brief Return the distance between the ends of this and another vector
			
 
				+   * This is symantically treating the vector like a point */
			
 
				+	B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Vector3& safeNormalize() 
			
 
				+	{
			
 
				+		b3Vector3 absVec = this->absolute();
			
 
				+		int maxIndex = absVec.maxAxis();
			
 
				+		if (absVec[maxIndex]>0)
			
 
				+		{
			
 
				+			*this /= absVec[maxIndex];
			
 
				+			return *this /= length();
			
 
				+		}
			
 
				+		setValue(1,0,0);
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Normalize this vector 
			
 
				+   * x^2 + y^2 + z^2 = 1 */
			
 
				+	B3_FORCE_INLINE b3Vector3& normalize() 
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)		
			
 
				+        // dot product first
			
 
				+		__m128 vd = _mm_mul_ps(mVec128, mVec128);
			
 
				+		__m128 z = _mm_movehl_ps(vd, vd);
			
 
				+		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
			
 
				+		vd = _mm_add_ss(vd, y);
			
 
				+		vd = _mm_add_ss(vd, z);
			
 
				+		
			
 
				+        #if 0
			
 
				+        vd = _mm_sqrt_ss(vd);
			
 
				+		vd = _mm_div_ss(b3v1110, vd);
			
 
				+		vd = b3_splat_ps(vd, 0x80);
			
 
				+		mVec128 = _mm_mul_ps(mVec128, vd);
			
 
				+        #else
			
 
				+        
			
 
				+        // NR step 1/sqrt(x) - vd is x, y is output 
			
 
				+        y = _mm_rsqrt_ss(vd); // estimate 
			
 
				+        
			
 
				+        //  one step NR 
			
 
				+        z = b3v1_5;
			
 
				+        vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5	
			
 
				+        //x2 = vd;
			
 
				+        vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
			
 
				+        vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
			
 
				+        z = _mm_sub_ss(z, vd);  // 1.5 - vd * 0.5 * y0 * y0 
			
 
				+
			
 
				+        y = _mm_mul_ss(y, z);   // y0 * (1.5 - vd * 0.5 * y0 * y0)
			
 
				+
			
 
				+		y = b3_splat_ps(y, 0x80);
			
 
				+		mVec128 = _mm_mul_ps(mVec128, y);
			
 
				+
			
 
				+        #endif
			
 
				+
			
 
				+		
			
 
				+		return *this;
			
 
				+#else	
			
 
				+		return *this /= length();
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return a normalized version of this vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 normalized() const;
			
 
				+
			
 
				+  /**@brief Return a rotated version of this vector
			
 
				+   * @param wAxis The axis to rotate about 
			
 
				+   * @param angle The angle to rotate by */
			
 
				+	B3_FORCE_INLINE b3Vector3 rotate( const b3Vector3& wAxis, const b3Scalar angle ) const;
			
 
				+
			
 
				+  /**@brief Return the angle between this and another vector
			
 
				+   * @param v The other vector */
			
 
				+	B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const 
			
 
				+	{
			
 
				+		b3Scalar s = b3Sqrt(length2() * v.length2());
			
 
				+		b3FullAssert(s != b3Scalar(0.0));
			
 
				+		return b3Acos(dot(v) / s);
			
 
				+	}
			
 
				+	
			
 
				+  /**@brief Return a vector will the absolute values of each element */
			
 
				+	B3_FORCE_INLINE b3Vector3 absolute() const 
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) 
			
 
				+		return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Vector3(vabsq_f32(mVec128));
			
 
				+#else	
			
 
				+		return b3MakeVector3(
			
 
				+			b3Fabs(m_floats[0]), 
			
 
				+			b3Fabs(m_floats[1]), 
			
 
				+			b3Fabs(m_floats[2]));
			
 
				+#endif
			
 
				+	}
			
 
				+	
			
 
				+  /**@brief Return the cross product between this and another vector 
			
 
				+   * @param v The other vector */
			
 
				+	B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	T, V;
			
 
				+		
			
 
				+		T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
			
 
				+		V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
			
 
				+		
			
 
				+		V = _mm_mul_ps(V, mVec128);
			
 
				+		T = _mm_mul_ps(T, v.mVec128);
			
 
				+		V = _mm_sub_ps(V, T);
			
 
				+		
			
 
				+		V = b3_pshufd_ps(V, B3_SHUFFLE(1, 2, 0, 3));
			
 
				+		return b3MakeVector3(V);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t T, V;
			
 
				+		// form (Y, Z, X, _) of mVec128 and v.mVec128
			
 
				+		float32x2_t Tlow = vget_low_f32(mVec128);
			
 
				+		float32x2_t Vlow = vget_low_f32(v.mVec128);
			
 
				+		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
			
 
				+		
			
 
				+		V = vmulq_f32(V, mVec128);
			
 
				+		T = vmulq_f32(T, v.mVec128);
			
 
				+		V = vsubq_f32(V, T);
			
 
				+		Vlow = vget_low_f32(V);
			
 
				+		// form (Y, Z, X, _);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
			
 
				+		V = (float32x4_t)vandq_s32((int32x4_t)V, b3vFFF0Mask);
			
 
				+		
			
 
				+		return b3Vector3(V);
			
 
				+#else
			
 
				+		return b3MakeVector3(
			
 
				+			m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
			
 
				+			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
			
 
				+			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		// cross:
			
 
				+		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
			
 
				+		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
			
 
				+		
			
 
				+		V = _mm_mul_ps(V, v1.mVec128);
			
 
				+		T = _mm_mul_ps(T, v2.mVec128);
			
 
				+		V = _mm_sub_ps(V, T);
			
 
				+		
			
 
				+		V = _mm_shuffle_ps(V, V, B3_SHUFFLE(1, 2, 0, 3));
			
 
				+
			
 
				+		// dot: 
			
 
				+		V = _mm_mul_ps(V, mVec128);
			
 
				+		__m128 z = _mm_movehl_ps(V, V);
			
 
				+		__m128 y = _mm_shuffle_ps(V, V, 0x55);
			
 
				+		V = _mm_add_ss(V, y);
			
 
				+		V = _mm_add_ss(V, z);
			
 
				+		return _mm_cvtss_f32(V);
			
 
				+
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		// cross:
			
 
				+		float32x4_t T, V;
			
 
				+		// form (Y, Z, X, _) of mVec128 and v.mVec128
			
 
				+		float32x2_t Tlow = vget_low_f32(v1.mVec128);
			
 
				+		float32x2_t Vlow = vget_low_f32(v2.mVec128);
			
 
				+		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
			
 
				+		
			
 
				+		V = vmulq_f32(V, v1.mVec128);
			
 
				+		T = vmulq_f32(T, v2.mVec128);
			
 
				+		V = vsubq_f32(V, T);
			
 
				+		Vlow = vget_low_f32(V);
			
 
				+		// form (Y, Z, X, _);
			
 
				+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
			
 
				+
			
 
				+		// dot: 
			
 
				+		V = vmulq_f32(mVec128, V);
			
 
				+		float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));  
			
 
				+		x = vadd_f32(x, vget_high_f32(V));
			
 
				+		return vget_lane_f32(x, 0);
			
 
				+#else
			
 
				+		return 
			
 
				+			m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + 
			
 
				+			m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + 
			
 
				+			m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the axis with the smallest value 
			
 
				+   * Note return values are 0,1,2 for x, y, or z */
			
 
				+	B3_FORCE_INLINE int minAxis() const
			
 
				+	{
			
 
				+		return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the axis with the largest value 
			
 
				+   * Note return values are 0,1,2 for x, y, or z */
			
 
				+	B3_FORCE_INLINE int maxAxis() const 
			
 
				+	{
			
 
				+		return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE int furthestAxis() const
			
 
				+	{
			
 
				+		return absolute().minAxis();
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE int closestAxis() const 
			
 
				+	{
			
 
				+		return absolute().maxAxis();
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+	B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vrt = _mm_load_ss(&rt);	//	(rt 0 0 0)
			
 
				+		b3Scalar s = b3Scalar(1.0) - rt;
			
 
				+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
			
 
				+		vs = b3_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
			
 
				+		__m128 r0 = _mm_mul_ps(v0.mVec128, vs);
			
 
				+		vrt = b3_pshufd_ps(vrt, 0x80);	//	(rt rt rt 0.0)
			
 
				+		__m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
			
 
				+		__m128 tmp3 = _mm_add_ps(r0,r1);
			
 
				+		mVec128 = tmp3;
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
			
 
				+		vl = vmulq_n_f32(vl, rt);
			
 
				+		mVec128 = vaddq_f32(vl, v0.mVec128);
			
 
				+#else
			
 
				+		b3Scalar s = b3Scalar(1.0) - rt;
			
 
				+		m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
			
 
				+		m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
			
 
				+		m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
			
 
				+		//don't do the unused w component
			
 
				+		//		m_co[3] = s * v0[3] + rt * v1[3];
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Return the linear interpolation between this and another vector 
			
 
				+   * @param v The other vector 
			
 
				+   * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
			
 
				+	B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const 
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		__m128	vt = _mm_load_ss(&t);	//	(t 0 0 0)
			
 
				+		vt = b3_pshufd_ps(vt, 0x80);	//	(rt rt rt 0.0)
			
 
				+		__m128 vl = _mm_sub_ps(v.mVec128, mVec128);
			
 
				+		vl = _mm_mul_ps(vl, vt);
			
 
				+		vl = _mm_add_ps(vl, mVec128);
			
 
				+		
			
 
				+		return b3MakeVector3(vl);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
			
 
				+		vl = vmulq_n_f32(vl, t);
			
 
				+		vl = vaddq_f32(vl, mVec128);
			
 
				+		
			
 
				+		return b3Vector3(vl);
			
 
				+#else	
			
 
				+		return 
			
 
				+			b3MakeVector3(	m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
			
 
				+						m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
			
 
				+						m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Elementwise multiply this vector by the other 
			
 
				+   * @param v The other vector */
			
 
				+	B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_mul_ps(mVec128, v.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmulq_f32(mVec128, v.mVec128);
			
 
				+#else	
			
 
				+		m_floats[0] *= v.m_floats[0]; 
			
 
				+		m_floats[1] *= v.m_floats[1];
			
 
				+		m_floats[2] *= v.m_floats[2];
			
 
				+#endif
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	 /**@brief Return the x value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
			
 
				+  /**@brief Return the y value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
			
 
				+  /**@brief Return the z value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
			
 
				+/**@brief Return the w value */
			
 
				+		B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
			
 
				+
			
 
				+  /**@brief Set the x value */
			
 
				+		B3_FORCE_INLINE void	setX(b3Scalar _x) { m_floats[0] = _x;};
			
 
				+  /**@brief Set the y value */
			
 
				+		B3_FORCE_INLINE void	setY(b3Scalar _y) { m_floats[1] = _y;};
			
 
				+  /**@brief Set the z value */
			
 
				+		B3_FORCE_INLINE void	setZ(b3Scalar _z) { m_floats[2] = _z;};
			
 
				+  /**@brief Set the w value */
			
 
				+		B3_FORCE_INLINE void	setW(b3Scalar _w) { m_floats[3] = _w;};
			
 
				+
			
 
				+	//B3_FORCE_INLINE b3Scalar&       operator[](int i)       { return (&m_floats[0])[i];	}      
			
 
				+	//B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
			
 
				+	///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
			
 
				+	B3_FORCE_INLINE	operator       b3Scalar *()       { return &m_floats[0]; }
			
 
				+	B3_FORCE_INLINE	operator const b3Scalar *() const { return &m_floats[0]; }
			
 
				+
			
 
				+	B3_FORCE_INLINE	bool	operator==(const b3Vector3& other) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+        return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
			
 
				+#else 
			
 
				+		return ((m_floats[3]==other.m_floats[3]) && 
			
 
				+                (m_floats[2]==other.m_floats[2]) && 
			
 
				+                (m_floats[1]==other.m_floats[1]) && 
			
 
				+                (m_floats[0]==other.m_floats[0]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	bool	operator!=(const b3Vector3& other) const
			
 
				+	{
			
 
				+		return !(*this == other);
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Set each element to the max of the current values and the values of another b3Vector3
			
 
				+   * @param other The other b3Vector3 to compare with 
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void	setMax(const b3Vector3& other)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_max_ps(mVec128, other.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vmaxq_f32(mVec128, other.mVec128);
			
 
				+#else
			
 
				+		b3SetMax(m_floats[0], other.m_floats[0]);
			
 
				+		b3SetMax(m_floats[1], other.m_floats[1]);
			
 
				+		b3SetMax(m_floats[2], other.m_floats[2]);
			
 
				+		b3SetMax(m_floats[3], other.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+  /**@brief Set each element to the min of the current values and the values of another b3Vector3
			
 
				+   * @param other The other b3Vector3 to compare with 
			
 
				+   */
			
 
				+	B3_FORCE_INLINE void	setMin(const b3Vector3& other)
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = _mm_min_ps(mVec128, other.mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		mVec128 = vminq_f32(mVec128, other.mVec128);
			
 
				+#else
			
 
				+		b3SetMin(m_floats[0], other.m_floats[0]);
			
 
				+		b3SetMin(m_floats[1], other.m_floats[1]);
			
 
				+		b3SetMin(m_floats[2], other.m_floats[2]);
			
 
				+		b3SetMin(m_floats[3], other.m_floats[3]);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void 	setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
			
 
				+	{
			
 
				+		m_floats[0]=_x;
			
 
				+		m_floats[1]=_y;
			
 
				+		m_floats[2]=_z;
			
 
				+		m_floats[3] = b3Scalar(0.f);
			
 
				+	}
			
 
				+
			
 
				+	void	getSkewSymmetricMatrix(b3Vector3* v0,b3Vector3* v1,b3Vector3* v2) const
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+ 
			
 
				+		__m128 V  = _mm_and_ps(mVec128, b3vFFF0fMask);
			
 
				+		__m128 V0 = _mm_xor_ps(b3vMzeroMask, V);
			
 
				+		__m128 V2 = _mm_movelh_ps(V0, V);
			
 
				+		
			
 
				+		__m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
			
 
				+		
			
 
				+        V0 = _mm_shuffle_ps(V0, V, 0xDB);
			
 
				+		V2 = _mm_shuffle_ps(V2, V, 0xF9);
			
 
				+		
			
 
				+		v0->mVec128 = V0;
			
 
				+		v1->mVec128 = V1;
			
 
				+		v2->mVec128 = V2;
			
 
				+#else
			
 
				+		v0->setValue(0.		,-getZ()		,getY());
			
 
				+		v1->setValue(getZ()	,0.			,-getX());
			
 
				+		v2->setValue(-getY()	,getX()	,0.);
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	void setZero()
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+		mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		int32x4_t vi = vdupq_n_s32(0); 
			
 
				+		mVec128 = vreinterpretq_f32_s32(vi);
			
 
				+#else	
			
 
				+		setValue(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE bool isZero() const 
			
 
				+	{
			
 
				+		return m_floats[0] == b3Scalar(0) && m_floats[1] == b3Scalar(0) && m_floats[2] == b3Scalar(0);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE bool fuzzyZero() const 
			
 
				+	{
			
 
				+		return length2() < B3_EPSILON;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	serialize(struct	b3Vector3Data& dataOut) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	deSerialize(const struct	b3Vector3Data& dataIn);
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	serializeFloat(struct	b3Vector3FloatData& dataOut) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	deSerializeFloat(const struct	b3Vector3FloatData& dataIn);
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	serializeDouble(struct	b3Vector3DoubleData& dataOut) const;
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	deSerializeDouble(const struct	b3Vector3DoubleData& dataIn);
			
 
				+    
			
 
				+        /**@brief returns index of maximum dot product between this and vectors in array[]
			
 
				+         * @param array The other vectors 
			
 
				+         * @param array_count The number of other vectors 
			
 
				+         * @param dotOut The maximum dot product */
			
 
				+        B3_FORCE_INLINE   long    maxDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const; 
			
 
				+
			
 
				+        /**@brief returns index of minimum dot product between this and vectors in array[]
			
 
				+         * @param array The other vectors 
			
 
				+         * @param array_count The number of other vectors 
			
 
				+         * @param dotOut The minimum dot product */    
			
 
				+        B3_FORCE_INLINE   long    minDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const; 
			
 
				+
			
 
				+    /* create a vector as  b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 ))  */
			
 
				+    B3_FORCE_INLINE b3Vector3  dot3( const b3Vector3 &v0, const b3Vector3 &v1, const b3Vector3 &v2 ) const
			
 
				+    {
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+
			
 
				+        __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
			
 
				+        __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
			
 
				+        __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
			
 
				+        __m128 b0 = _mm_unpacklo_ps( a0, a1 );
			
 
				+        __m128 b1 = _mm_unpackhi_ps( a0, a1 );
			
 
				+        __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
			
 
				+        __m128 r = _mm_movelh_ps( b0, b2 );
			
 
				+        r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
			
 
				+        a2 = _mm_and_ps( a2, b3vxyzMaskf);
			
 
				+        r = _mm_add_ps( r, b3CastdTo128f (_mm_move_sd( b3CastfTo128d(a2), b3CastfTo128d(b1) )));
			
 
				+        return b3MakeVector3(r);
			
 
				+        
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+        static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
			
 
				+        float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
			
 
				+        float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
			
 
				+        float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
			
 
				+        float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
			
 
				+        a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
			
 
				+        float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
			
 
				+        float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
			
 
				+        return b3Vector3( vcombine_f32(b0, b1) );
			
 
				+#else	
			
 
				+		return b3MakeVector3( dot(v0), dot(v1), dot(v2));
			
 
				+#endif
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+/**@brief Return the sum of two vectors (Point symantics)*/
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator+(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128));
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+			v1.m_floats[0] + v2.m_floats[0], 
			
 
				+			v1.m_floats[1] + v2.m_floats[1], 
			
 
				+			v1.m_floats[2] + v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the elementwise product of two vectors */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator*(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128));
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+			v1.m_floats[0] * v2.m_floats[0], 
			
 
				+			v1.m_floats[1] * v2.m_floats[1], 
			
 
				+			v1.m_floats[2] * v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the difference between two vectors */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator-(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API)  && defined(B3_USE_SSE))
			
 
				+
			
 
				+	//	without _mm_and_ps this code causes slowdown in Concave moving
			
 
				+	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
			
 
				+	return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
			
 
				+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+			v1.m_floats[0] - v2.m_floats[0], 
			
 
				+			v1.m_floats[1] - v2.m_floats[1], 
			
 
				+			v1.m_floats[2] - v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the negative of the vector */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator-(const b3Vector3& v)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE))
			
 
				+	__m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask);
			
 
				+	return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); 
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	return b3MakeVector3((b3SimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)b3vMzeroMask));
			
 
				+#else	
			
 
				+	return b3MakeVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator*(const b3Vector3& v, const b3Scalar& s)
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
			
 
				+	vs = b3_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
			
 
				+	return b3MakeVector3(_mm_mul_ps(v.mVec128, vs));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	float32x4_t r = vmulq_n_f32(v.mVec128, s);
			
 
				+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask));
			
 
				+#else
			
 
				+	return b3MakeVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+operator*(const b3Scalar& s, const b3Vector3& v)
			
 
				+{ 
			
 
				+	return v * s; 
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector inversely scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator/(const b3Vector3& v, const b3Scalar& s)
			
 
				+{
			
 
				+	b3FullAssert(s != b3Scalar(0.0));
			
 
				+#if 0 //defined(B3_USE_SSE_IN_API)
			
 
				+// this code is not faster !
			
 
				+	__m128 vs = _mm_load_ss(&s);
			
 
				+    vs = _mm_div_ss(b3v1110, vs);
			
 
				+	vs = b3_pshufd_ps(vs, 0x00);	//	(S S S S)
			
 
				+
			
 
				+	return b3Vector3(_mm_mul_ps(v.mVec128, vs));
			
 
				+#else
			
 
				+	return v * (b3Scalar(1.0) / s);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the vector inversely scaled by s */
			
 
				+B3_FORCE_INLINE b3Vector3
			
 
				+operator/(const b3Vector3& v1, const b3Vector3& v2)
			
 
				+{
			
 
				+#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE))
			
 
				+	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
			
 
				+	vec = _mm_and_ps(vec, b3vFFF0fMask);
			
 
				+	return b3MakeVector3(vec); 
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+	float32x4_t x, y, v, m;
			
 
				+
			
 
				+	x = v1.mVec128;
			
 
				+	y = v2.mVec128;
			
 
				+	
			
 
				+	v = vrecpeq_f32(y);			// v ~ 1/y
			
 
				+	m = vrecpsq_f32(y, v);		// m = (2-v*y)
			
 
				+	v = vmulq_f32(v, m);		// vv = v*m ~~ 1/y
			
 
				+	m = vrecpsq_f32(y, v);		// mm = (2-vv*y)
			
 
				+	v = vmulq_f32(v, x);		// x*vv
			
 
				+	v = vmulq_f32(v, m);		// (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
			
 
				+
			
 
				+	return b3Vector3(v);
			
 
				+#else
			
 
				+	return b3MakeVector3(
			
 
				+			v1.m_floats[0] / v2.m_floats[0], 
			
 
				+			v1.m_floats[1] / v2.m_floats[1],
			
 
				+			v1.m_floats[2] / v2.m_floats[2]);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the dot product between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar 
			
 
				+b3Dot(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{ 
			
 
				+	return v1.dot(v2); 
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**@brief Return the distance squared between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Distance2(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{ 
			
 
				+	return v1.distance2(v2); 
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**@brief Return the distance between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Distance(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{ 
			
 
				+	return v1.distance(v2); 
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the angle between two vectors */
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Angle(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{ 
			
 
				+	return v1.angle(v2); 
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the cross product of two vectors */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+b3Cross(const b3Vector3& v1, const b3Vector3& v2) 
			
 
				+{ 
			
 
				+	return v1.cross(v2); 
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar
			
 
				+b3Triple(const b3Vector3& v1, const b3Vector3& v2, const b3Vector3& v3)
			
 
				+{
			
 
				+	return v1.triple(v2, v3);
			
 
				+}
			
 
				+
			
 
				+/**@brief Return the linear interpolation between two vectors
			
 
				+ * @param v1 One vector 
			
 
				+ * @param v2 The other vector 
			
 
				+ * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
			
 
				+B3_FORCE_INLINE b3Vector3 
			
 
				+b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t)
			
 
				+{
			
 
				+	return v1.lerp(v2, t);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const
			
 
				+{
			
 
				+	return (v - *this).length2();
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const
			
 
				+{
			
 
				+	return (v - *this).length();
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const
			
 
				+{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+	b3Vector3 norm = *this;
			
 
				+
			
 
				+	return norm.normalize();
			
 
				+#else
			
 
				+	return *this / length();
			
 
				+#endif
			
 
				+} 
			
 
				+
			
 
				+B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Scalar _angle ) const
			
 
				+{
			
 
				+	// wAxis must be a unit lenght vector
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
			
 
				+
			
 
				+    __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
			
 
				+	b3Scalar ssin = b3Sin( _angle );
			
 
				+    __m128 C = wAxis.cross( b3MakeVector3(mVec128) ).mVec128;
			
 
				+	O = _mm_and_ps(O, b3vFFF0fMask);
			
 
				+    b3Scalar scos = b3Cos( _angle );
			
 
				+	
			
 
				+	__m128 vsin = _mm_load_ss(&ssin);	//	(S 0 0 0)
			
 
				+    __m128 vcos = _mm_load_ss(&scos);	//	(S 0 0 0)
			
 
				+	
			
 
				+	__m128 Y = b3_pshufd_ps(O, 0xC9);	//	(Y Z X 0)
			
 
				+	__m128 Z = b3_pshufd_ps(O, 0xD2);	//	(Z X Y 0)
			
 
				+	O = _mm_add_ps(O, Y);
			
 
				+	vsin = b3_pshufd_ps(vsin, 0x80);	//	(S S S 0)
			
 
				+	O = _mm_add_ps(O, Z);
			
 
				+    vcos = b3_pshufd_ps(vcos, 0x80);	//	(S S S 0)
			
 
				+	
			
 
				+    vsin = vsin * C; 
			
 
				+	O = O * wAxis.mVec128; 
			
 
				+	__m128 X = mVec128 - O; 
			
 
				+	
			
 
				+    O = O + vsin;
			
 
				+	vcos = vcos * X;
			
 
				+	O = O + vcos;	
			
 
				+	
			
 
				+	return b3MakeVector3(O);
			
 
				+#else
			
 
				+	b3Vector3 o = wAxis * wAxis.dot( *this );
			
 
				+	b3Vector3 _x = *this - o;
			
 
				+	b3Vector3 _y;
			
 
				+
			
 
				+	_y = wAxis.cross( *this );
			
 
				+
			
 
				+	return ( o + _x * b3Cos( _angle ) + _y * b3Sin( _angle ) );
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE   long    b3Vector3::maxDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const
			
 
				+{
			
 
				+#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
			
 
				+    #if defined _WIN32 || defined (B3_USE_SSE)
			
 
				+        const long scalar_cutoff = 10;
			
 
				+        long b3_maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
			
 
				+    #elif defined B3_USE_NEON
			
 
				+        const long scalar_cutoff = 4;
			
 
				+        extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
			
 
				+    #endif
			
 
				+    if( array_count < scalar_cutoff )
			
 
				+#else
			
 
				+	
			
 
				+#endif//B3_USE_SSE || B3_USE_NEON
			
 
				+    {
			
 
				+        b3Scalar maxDot = -B3_INFINITY;
			
 
				+        int i = 0;
			
 
				+        int ptIndex = -1;
			
 
				+        for( i = 0; i < array_count; i++ )
			
 
				+        {
			
 
				+            b3Scalar dot = array[i].dot(*this);
			
 
				+            
			
 
				+            if( dot > maxDot )
			
 
				+            {
			
 
				+                maxDot = dot;
			
 
				+                ptIndex = i;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+		b3Assert(ptIndex>=0);
			
 
				+        if (ptIndex<0)
			
 
				+		{
			
 
				+			ptIndex = 0;
			
 
				+		}
			
 
				+        dotOut = maxDot;
			
 
				+        return ptIndex;
			
 
				+    }
			
 
				+#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
			
 
				+    return b3_maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE   long    b3Vector3::minDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const
			
 
				+{
			
 
				+#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
			
 
				+    #if defined B3_USE_SSE
			
 
				+        const long scalar_cutoff = 10;
			
 
				+        long b3_mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
			
 
				+    #elif defined B3_USE_NEON
			
 
				+        const long scalar_cutoff = 4;
			
 
				+        extern long (*b3_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
			
 
				+    #else
			
 
				+        #error unhandled arch!
			
 
				+    #endif
			
 
				+    
			
 
				+    if( array_count < scalar_cutoff )
			
 
				+#endif//B3_USE_SSE || B3_USE_NEON
			
 
				+    {
			
 
				+        b3Scalar  minDot = B3_INFINITY;
			
 
				+        int i = 0;
			
 
				+        int ptIndex = -1;
			
 
				+        
			
 
				+        for( i = 0; i < array_count; i++ )
			
 
				+        {
			
 
				+            b3Scalar dot = array[i].dot(*this);
			
 
				+            
			
 
				+            if( dot < minDot )
			
 
				+            {
			
 
				+                minDot = dot;
			
 
				+                ptIndex = i;
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        dotOut = minDot;
			
 
				+        
			
 
				+        return ptIndex;
			
 
				+    }
			
 
				+#if defined (B3_USE_SSE) || defined (B3_USE_NEON)
			
 
				+    return b3_mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class b3Vector4 : public b3Vector3
			
 
				+{
			
 
				+public:
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+	B3_FORCE_INLINE b3Vector4 absolute4() const 
			
 
				+	{
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) 
			
 
				+		return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask));
			
 
				+#elif defined(B3_USE_NEON)
			
 
				+		return b3Vector4(vabsq_f32(mVec128));
			
 
				+#else	
			
 
				+		return b3MakeVector4(
			
 
				+			b3Fabs(m_floats[0]), 
			
 
				+			b3Fabs(m_floats[1]), 
			
 
				+			b3Fabs(m_floats[2]),
			
 
				+			b3Fabs(m_floats[3]));
			
 
				+#endif
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	b3Scalar	getW() const { return m_floats[3];}
			
 
				+
			
 
				+
			
 
				+		B3_FORCE_INLINE int maxAxis4() const
			
 
				+	{
			
 
				+		int maxIndex = -1;
			
 
				+		b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT);
			
 
				+		if (m_floats[0] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 0;
			
 
				+			maxVal = m_floats[0];
			
 
				+		}
			
 
				+		if (m_floats[1] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 1;
			
 
				+			maxVal = m_floats[1];
			
 
				+		}
			
 
				+		if (m_floats[2] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 2;
			
 
				+			maxVal =m_floats[2];
			
 
				+		}
			
 
				+		if (m_floats[3] > maxVal)
			
 
				+		{
			
 
				+			maxIndex = 3;
			
 
				+			maxVal = m_floats[3];
			
 
				+		}
			
 
				+
			
 
				+		return maxIndex;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	B3_FORCE_INLINE int minAxis4() const
			
 
				+	{
			
 
				+		int minIndex = -1;
			
 
				+		b3Scalar minVal = b3Scalar(B3_LARGE_FLOAT);
			
 
				+		if (m_floats[0] < minVal)
			
 
				+		{
			
 
				+			minIndex = 0;
			
 
				+			minVal = m_floats[0];
			
 
				+		}
			
 
				+		if (m_floats[1] < minVal)
			
 
				+		{
			
 
				+			minIndex = 1;
			
 
				+			minVal = m_floats[1];
			
 
				+		}
			
 
				+		if (m_floats[2] < minVal)
			
 
				+		{
			
 
				+			minIndex = 2;
			
 
				+			minVal =m_floats[2];
			
 
				+		}
			
 
				+		if (m_floats[3] < minVal)
			
 
				+		{
			
 
				+			minIndex = 3;
			
 
				+			minVal = m_floats[3];
			
 
				+		}
			
 
				+		
			
 
				+		return minIndex;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	B3_FORCE_INLINE int closestAxis4() const 
			
 
				+	{
			
 
				+		return absolute4().maxAxis4();
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+ 
			
 
				+
			
 
				+  /**@brief Set x,y,z and zero w 
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   */
			
 
				+		
			
 
				+
			
 
				+/*		void getValue(b3Scalar *m) const 
			
 
				+		{
			
 
				+			m[0] = m_floats[0];
			
 
				+			m[1] = m_floats[1];
			
 
				+			m[2] =m_floats[2];
			
 
				+		}
			
 
				+*/
			
 
				+/**@brief Set the values 
			
 
				+   * @param x Value of x
			
 
				+   * @param y Value of y
			
 
				+   * @param z Value of z
			
 
				+   * @param w Value of w
			
 
				+   */
			
 
				+		B3_FORCE_INLINE void	setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w)
			
 
				+		{
			
 
				+			m_floats[0]=_x;
			
 
				+			m_floats[1]=_y;
			
 
				+			m_floats[2]=_z;
			
 
				+			m_floats[3]=_w;
			
 
				+		}
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
			
 
				+B3_FORCE_INLINE void	b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal)
			
 
				+{
			
 
				+	#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+	unsigned char* dest = (unsigned char*) &destVal;
			
 
				+	unsigned char* src  = (unsigned char*) &sourceVal;
			
 
				+	dest[0] = src[7];
			
 
				+    dest[1] = src[6];
			
 
				+    dest[2] = src[5];
			
 
				+    dest[3] = src[4];
			
 
				+    dest[4] = src[3];
			
 
				+    dest[5] = src[2];
			
 
				+    dest[6] = src[1];
			
 
				+    dest[7] = src[0];
			
 
				+#else
			
 
				+	unsigned char* dest = (unsigned char*) &destVal;
			
 
				+	unsigned char* src  = (unsigned char*) &sourceVal;
			
 
				+	dest[0] = src[3];
			
 
				+    dest[1] = src[2];
			
 
				+    dest[2] = src[1];
			
 
				+    dest[3] = src[0];
			
 
				+#endif //B3_USE_DOUBLE_PRECISION
			
 
				+}
			
 
				+///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
			
 
				+B3_FORCE_INLINE void	b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec)
			
 
				+{
			
 
				+	for (int i=0;i<4;i++)
			
 
				+	{
			
 
				+		b3SwapScalarEndian(sourceVec[i],destVec[i]);
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				+///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
			
 
				+B3_FORCE_INLINE void	b3UnSwapVector3Endian(b3Vector3& vector)
			
 
				+{
			
 
				+
			
 
				+	b3Vector3	swappedVec;
			
 
				+	for (int i=0;i<4;i++)
			
 
				+	{
			
 
				+		b3SwapScalarEndian(vector[i],swappedVec[i]);
			
 
				+	}
			
 
				+	vector = swappedVec;
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+B3_FORCE_INLINE void b3PlaneSpace1 (const T& n, T& p, T& q)
			
 
				+{
			
 
				+  if (b3Fabs(n[2]) > B3_SQRT12) {
			
 
				+    // choose p in y-z plane
			
 
				+    b3Scalar a = n[1]*n[1] + n[2]*n[2];
			
 
				+    b3Scalar k = b3RecipSqrt (a);
			
 
				+    p[0] = 0;
			
 
				+	p[1] = -n[2]*k;
			
 
				+	p[2] = n[1]*k;
			
 
				+    // set q = n x p
			
 
				+    q[0] = a*k;
			
 
				+	q[1] = -n[0]*p[2];
			
 
				+	q[2] = n[0]*p[1];
			
 
				+  }
			
 
				+  else {
			
 
				+    // choose p in x-y plane
			
 
				+    b3Scalar a = n[0]*n[0] + n[1]*n[1];
			
 
				+    b3Scalar k = b3RecipSqrt (a);
			
 
				+    p[0] = -n[1]*k;
			
 
				+	p[1] = n[0]*k;
			
 
				+	p[2] = 0;
			
 
				+    // set q = n x p
			
 
				+    q[0] = -n[2]*p[1];
			
 
				+	q[1] = n[2]*p[0];
			
 
				+	q[2] = a*k;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+struct	b3Vector3FloatData
			
 
				+{
			
 
				+	float	m_floats[4];
			
 
				+};
			
 
				+
			
 
				+struct	b3Vector3DoubleData
			
 
				+{
			
 
				+	double	m_floats[4];
			
 
				+
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Vector3::serializeFloat(struct	b3Vector3FloatData& dataOut) const
			
 
				+{
			
 
				+	///could also do a memcpy, check if it is worth it
			
 
				+	for (int i=0;i<4;i++)
			
 
				+		dataOut.m_floats[i] = float(m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void	b3Vector3::deSerializeFloat(const struct	b3Vector3FloatData& dataIn)
			
 
				+{
			
 
				+	for (int i=0;i<4;i++)
			
 
				+		m_floats[i] = b3Scalar(dataIn.m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Vector3::serializeDouble(struct	b3Vector3DoubleData& dataOut) const
			
 
				+{
			
 
				+	///could also do a memcpy, check if it is worth it
			
 
				+	for (int i=0;i<4;i++)
			
 
				+		dataOut.m_floats[i] = double(m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void	b3Vector3::deSerializeDouble(const struct	b3Vector3DoubleData& dataIn)
			
 
				+{
			
 
				+	for (int i=0;i<4;i++)
			
 
				+		m_floats[i] = b3Scalar(dataIn.m_floats[i]);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void	b3Vector3::serialize(struct	b3Vector3Data& dataOut) const
			
 
				+{
			
 
				+	///could also do a memcpy, check if it is worth it
			
 
				+	for (int i=0;i<4;i++)
			
 
				+		dataOut.m_floats[i] = m_floats[i];
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE void	b3Vector3::deSerialize(const struct	b3Vector3Data& dataIn)
			
 
				+{
			
 
				+	for (int i=0;i<4;i++)
			
 
				+		m_floats[i] = dataIn.m_floats[i];
			
 
				+}
			
 
				+
			
 
				+
			
 
				+		
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z)
			
 
				+{
			
 
				+	b3Vector3	tmp;
			
 
				+	tmp.setValue(x,y,z);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w)
			
 
				+{	
			
 
				+	b3Vector3	tmp;
			
 
				+	tmp.setValue(x,y,z);
			
 
				+	tmp.w = w;
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w)
			
 
				+{
			
 
				+	b3Vector4	tmp;
			
 
				+	tmp.setValue(x,y,z,w);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) 
			
 
				+
			
 
				+inline b3Vector3 b3MakeVector3( b3SimdFloat4 v)
			
 
				+{
			
 
				+        b3Vector3 tmp;
			
 
				+        tmp.set128(v);
			
 
				+        return tmp;
			
 
				+}
			
 
				+
			
 
				+inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec)
			
 
				+{
			
 
				+	b3Vector4	tmp;
			
 
				+	tmp.set128(vec);
			
 
				+	return tmp;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#endif //B3_VECTOR3_H
			
--- a/include/Bullet3Common/shared/b3Float4.h
+++ b/include/Bullet3Common/shared/b3Float4.h
@@ -0,0 +1,97 @@
 
				+#ifndef B3_FLOAT4_H
			
 
				+#define B3_FLOAT4_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3PlatformDefinitions.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+	#include "Bullet3Common/b3Vector3.h"
			
 
				+	#define b3Float4 b3Vector3
			
 
				+	#define b3Float4ConstArg const b3Vector3&
			
 
				+	#define b3Dot3F4 b3Dot
			
 
				+	#define b3Cross3 b3Cross
			
 
				+	#define	b3MakeFloat4  b3MakeVector3
			
 
				+	inline b3Vector3 b3Normalized(const b3Vector3& vec)
			
 
				+	{
			
 
				+		return vec.normalized();
			
 
				+	}
			
 
				+
			
 
				+	inline b3Float4 b3FastNormalized3(b3Float4ConstArg v)
			
 
				+	{
			
 
				+		return v.normalized();
			
 
				+	}
			
 
				+
			
 
				+	inline b3Float4 b3MaxFloat4 (const b3Float4& a, const b3Float4& b)
			
 
				+	{
			
 
				+		b3Float4 tmp = a;
			
 
				+		tmp.setMax(b);
			
 
				+		return tmp;
			
 
				+	}
			
 
				+	inline b3Float4 b3MinFloat4 (const b3Float4& a, const b3Float4& b)
			
 
				+	{
			
 
				+		b3Float4 tmp = a;
			
 
				+		tmp.setMin(b);
			
 
				+		return tmp;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				+#else
			
 
				+	typedef float4	b3Float4;
			
 
				+	#define b3Float4ConstArg const b3Float4
			
 
				+	#define b3MakeFloat4 (float4)
			
 
				+	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)
			
 
				+	{
			
 
				+		float4 a1 = b3MakeFloat4(v0.xyz,0.f);
			
 
				+		float4 b1 = b3MakeFloat4(v1.xyz,0.f);
			
 
				+		return dot(a1, b1);
			
 
				+	}
			
 
				+	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)
			
 
				+	{
			
 
				+		float4 a1 = b3MakeFloat4(v0.xyz,0.f);
			
 
				+		float4 b1 = b3MakeFloat4(v1.xyz,0.f);
			
 
				+		return cross(a1, b1);
			
 
				+	}
			
 
				+	#define b3MinFloat4 min
			
 
				+	#define b3MaxFloat4 max
			
 
				+
			
 
				+	#define b3Normalized(a) normalize(a)
			
 
				+
			
 
				+#endif 
			
 
				+
			
 
				+
			
 
				+		
			
 
				+inline bool b3IsAlmostZero(b3Float4ConstArg v)
			
 
				+{
			
 
				+	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )
			
 
				+{
			
 
				+    float maxDot = -B3_INFINITY;
			
 
				+    int i = 0;
			
 
				+    int ptIndex = -1;
			
 
				+    for( i = 0; i < vecLen; i++ )
			
 
				+    {
			
 
				+        float dot = b3Dot3F4(vecArray[i],vec);
			
 
				+            
			
 
				+        if( dot > maxDot )
			
 
				+        {
			
 
				+            maxDot = dot;
			
 
				+            ptIndex = i;
			
 
				+        }
			
 
				+    }
			
 
				+	b3Assert(ptIndex>=0);
			
 
				+    if (ptIndex<0)
			
 
				+	{
			
 
				+		ptIndex = 0;
			
 
				+	}
			
 
				+    *dotOut = maxDot;
			
 
				+    return ptIndex;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_FLOAT4_H
			
--- a/include/Bullet3Common/shared/b3Int2.h
+++ b/include/Bullet3Common/shared/b3Int2.h
@@ -0,0 +1,64 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_INT2_H
			
 
				+#define B3_INT2_H
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+struct b3UnsignedInt2
			
 
				+{
			
 
				+	union
			
 
				+	{
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int x,y;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int s[2];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+struct b3Int2
			
 
				+{
			
 
				+	union
			
 
				+	{
			
 
				+		struct
			
 
				+		{
			
 
				+			int x,y;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			int s[2];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+inline b3Int2 b3MakeInt2(int x, int y)
			
 
				+{
			
 
				+	b3Int2 v;
			
 
				+	v.s[0] = x; v.s[1] = y;
			
 
				+	return v;
			
 
				+}
			
 
				+#else
			
 
				+
			
 
				+#define b3UnsignedInt2 uint2
			
 
				+#define b3Int2 int2
			
 
				+#define b3MakeInt2 (int2)
			
 
				+
			
 
				+#endif //__cplusplus
			
 
				+#endif
			
--- a/include/Bullet3Common/shared/b3Int4.h
+++ b/include/Bullet3Common/shared/b3Int4.h
@@ -0,0 +1,68 @@
 
				+#ifndef B3_INT4_H
			
 
				+#define B3_INT4_H
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3UnsignedInt4
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	union
			
 
				+	{
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int x,y,z,w;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			unsigned int s[4];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3Int4
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	union
			
 
				+	{
			
 
				+		struct
			
 
				+		{
			
 
				+			int x,y,z,w;
			
 
				+		};
			
 
				+		struct
			
 
				+		{
			
 
				+			int s[4];
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+B3_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0)
			
 
				+{
			
 
				+	b3Int4 v;
			
 
				+	v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
			
 
				+	return v;
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
			
 
				+{
			
 
				+	b3UnsignedInt4 v;
			
 
				+	v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
			
 
				+	return v;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+
			
 
				+#define b3UnsignedInt4 uint4
			
 
				+#define b3Int4 int4
			
 
				+#define b3MakeInt4 (int4)
			
 
				+#define b3MakeUnsignedInt4 (uint4)
			
 
				+
			
 
				+
			
 
				+#endif //__cplusplus
			
 
				+
			
 
				+#endif //B3_INT4_H
			
--- a/include/Bullet3Common/shared/b3Mat3x3.h
+++ b/include/Bullet3Common/shared/b3Mat3x3.h
@@ -0,0 +1,179 @@
 
				+
			
 
				+#ifndef B3_MAT3x3_H
			
 
				+#define B3_MAT3x3_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Quat.h"
			
 
				+
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+
			
 
				+#define b3Mat3x3 b3Matrix3x3
			
 
				+#define b3Mat3x3ConstArg const b3Matrix3x3&
			
 
				+
			
 
				+inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat)
			
 
				+{
			
 
				+	return b3Mat3x3(quat);
			
 
				+}
			
 
				+
			
 
				+inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat)
			
 
				+{
			
 
				+	return mat.absolute();
			
 
				+}
			
 
				+
			
 
				+#define b3GetRow(m,row) m.getRow(row)
			
 
				+
			
 
				+__inline
			
 
				+b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b)
			
 
				+{
			
 
				+	return b*a;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	b3Float4 m_row[3];
			
 
				+}b3Mat3x3;
			
 
				+
			
 
				+#define b3Mat3x3ConstArg const b3Mat3x3
			
 
				+#define b3GetRow(m,row) (m.m_row[row])
			
 
				+
			
 
				+inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)
			
 
				+{
			
 
				+	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
			
 
				+	b3Mat3x3 out;
			
 
				+
			
 
				+	out.m_row[0].x=1-2*quat2.y-2*quat2.z;
			
 
				+	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;
			
 
				+	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;
			
 
				+	out.m_row[0].w = 0.f;
			
 
				+
			
 
				+	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;
			
 
				+	out.m_row[1].y=1-2*quat2.x-2*quat2.z;
			
 
				+	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;
			
 
				+	out.m_row[1].w = 0.f;
			
 
				+
			
 
				+	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;
			
 
				+	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;
			
 
				+	out.m_row[2].z=1-2*quat2.x-2*quat2.y;
			
 
				+	out.m_row[2].w = 0.f;
			
 
				+
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)
			
 
				+{
			
 
				+	b3Mat3x3 out;
			
 
				+	out.m_row[0] = fabs(matIn.m_row[0]);
			
 
				+	out.m_row[1] = fabs(matIn.m_row[1]);
			
 
				+	out.m_row[2] = fabs(matIn.m_row[2]);
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtZero();
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtIdentity();
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtTranspose(b3Mat3x3 m);
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);
			
 
				+
			
 
				+__inline
			
 
				+b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);
			
 
				+
			
 
				+__inline
			
 
				+b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtZero()
			
 
				+{
			
 
				+	b3Mat3x3 m;
			
 
				+	m.m_row[0] = (b3Float4)(0.f);
			
 
				+	m.m_row[1] = (b3Float4)(0.f);
			
 
				+	m.m_row[2] = (b3Float4)(0.f);
			
 
				+	return m;
			
 
				+}
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtIdentity()
			
 
				+{
			
 
				+	b3Mat3x3 m;
			
 
				+	m.m_row[0] = (b3Float4)(1,0,0,0);
			
 
				+	m.m_row[1] = (b3Float4)(0,1,0,0);
			
 
				+	m.m_row[2] = (b3Float4)(0,0,1,0);
			
 
				+	return m;
			
 
				+}
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtTranspose(b3Mat3x3 m)
			
 
				+{
			
 
				+	b3Mat3x3 out;
			
 
				+	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
			
 
				+	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
			
 
				+	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+__inline
			
 
				+b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)
			
 
				+{
			
 
				+	b3Mat3x3 transB;
			
 
				+	transB = mtTranspose( b );
			
 
				+	b3Mat3x3 ans;
			
 
				+	//	why this doesn't run when 0ing in the for{}
			
 
				+	a.m_row[0].w = 0.f;
			
 
				+	a.m_row[1].w = 0.f;
			
 
				+	a.m_row[2].w = 0.f;
			
 
				+	for(int i=0; i<3; i++)
			
 
				+	{
			
 
				+//	a.m_row[i].w = 0.f;
			
 
				+		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);
			
 
				+		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);
			
 
				+		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);
			
 
				+		ans.m_row[i].w = 0.f;
			
 
				+	}
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+__inline
			
 
				+b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)
			
 
				+{
			
 
				+	b3Float4 ans;
			
 
				+	ans.x = b3Dot3F4( a.m_row[0], b );
			
 
				+	ans.y = b3Dot3F4( a.m_row[1], b );
			
 
				+	ans.z = b3Dot3F4( a.m_row[2], b );
			
 
				+	ans.w = 0.f;
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+__inline
			
 
				+b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)
			
 
				+{
			
 
				+	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
			
 
				+	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
			
 
				+	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
			
 
				+
			
 
				+	b3Float4 ans;
			
 
				+	ans.x = b3Dot3F4( a, colx );
			
 
				+	ans.y = b3Dot3F4( a, coly );
			
 
				+	ans.z = b3Dot3F4( a, colz );
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_MAT3x3_H
			
--- a/include/Bullet3Common/shared/b3PlatformDefinitions.h
+++ b/include/Bullet3Common/shared/b3PlatformDefinitions.h
@@ -0,0 +1,37 @@
 
				+#ifndef B3_PLATFORM_DEFINITIONS_H
			
 
				+#define B3_PLATFORM_DEFINITIONS_H
			
 
				+
			
 
				+struct MyTest
			
 
				+{
			
 
				+	int bla;
			
 
				+};
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+//#define b3ConstArray(a) const b3AlignedObjectArray<a>&
			
 
				+#define b3ConstArray(a) const a*
			
 
				+#define b3AtomicInc(a) ((*a)++)
			
 
				+
			
 
				+inline int b3AtomicAdd (volatile int *p, int val)
			
 
				+{
			
 
				+	int oldValue = *p;
			
 
				+	int newValue = oldValue+val;
			
 
				+	*p = newValue;
			
 
				+	return oldValue;
			
 
				+}
			
 
				+
			
 
				+#define __global 
			
 
				+#else
			
 
				+//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
			
 
				+#define B3_LARGE_FLOAT 1e18f
			
 
				+#define B3_INFINITY 1e18f
			
 
				+#define b3Assert(a)
			
 
				+#define b3ConstArray(a) __global const a*
			
 
				+#define b3AtomicInc atomic_inc
			
 
				+#define b3AtomicAdd atomic_add
			
 
				+#define b3Fabs fabs
			
 
				+#define b3Sqrt native_sqrt
			
 
				+#define b3Sin native_sin
			
 
				+#define b3Cos native_cos
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/include/Bullet3Common/shared/b3Quat.h
+++ b/include/Bullet3Common/shared/b3Quat.h
@@ -0,0 +1,103 @@
 
				+#ifndef B3_QUAT_H
			
 
				+#define B3_QUAT_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3PlatformDefinitions.h"
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+	#include "Bullet3Common/b3Quaternion.h"
			
 
				+	#include "Bullet3Common/b3Transform.h"
			
 
				+
			
 
				+	#define b3Quat b3Quaternion
			
 
				+	#define b3QuatConstArg const b3Quaternion&
			
 
				+	inline b3Quat b3QuatInverse(b3QuatConstArg orn)
			
 
				+	{
			
 
				+		return orn.inverse();
			
 
				+	}
			
 
				+
			
 
				+	inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)
			
 
				+	{
			
 
				+		b3Transform tr;
			
 
				+		tr.setOrigin(translation);
			
 
				+		tr.setRotation(orientation);
			
 
				+		return tr(point);
			
 
				+	}
			
 
				+
			
 
				+#else
			
 
				+	typedef float4	b3Quat;
			
 
				+	#define b3QuatConstArg const b3Quat
			
 
				+	
			
 
				+	
			
 
				+inline float4 b3FastNormalize4(float4 v)
			
 
				+{
			
 
				+	v = (float4)(v.xyz,0.f);
			
 
				+	return fast_normalize(v);
			
 
				+}
			
 
				+	
			
 
				+inline b3Quat b3QuatMul(b3Quat a, b3Quat b);
			
 
				+inline b3Quat b3QuatNormalized(b3QuatConstArg in);
			
 
				+inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);
			
 
				+inline b3Quat b3QuatInvert(b3QuatConstArg q);
			
 
				+inline b3Quat b3QuatInverse(b3QuatConstArg q);
			
 
				+
			
 
				+inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
			
 
				+{
			
 
				+	b3Quat ans;
			
 
				+	ans = b3Cross3( a, b );
			
 
				+	ans += a.w*b+b.w*a;
			
 
				+//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
			
 
				+	ans.w = a.w*b.w - b3Dot3F4(a, b);
			
 
				+	return ans;
			
 
				+}
			
 
				+
			
 
				+inline b3Quat b3QuatNormalized(b3QuatConstArg in)
			
 
				+{
			
 
				+	b3Quat q;
			
 
				+	q=in;
			
 
				+	//return b3FastNormalize4(in);
			
 
				+	float len = native_sqrt(dot(q, q));
			
 
				+	if(len > 0.f)
			
 
				+	{
			
 
				+		q *= 1.f / len;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		q.x = q.y = q.z = 0.f;
			
 
				+		q.w = 1.f;
			
 
				+	}
			
 
				+	return q;
			
 
				+}
			
 
				+inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)
			
 
				+{
			
 
				+	b3Quat qInv = b3QuatInvert( q );
			
 
				+	float4 vcpy = vec;
			
 
				+	vcpy.w = 0.f;
			
 
				+	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);
			
 
				+	return out;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline b3Quat b3QuatInverse(b3QuatConstArg q)
			
 
				+{
			
 
				+	return (b3Quat)(-q.xyz, q.w);
			
 
				+}
			
 
				+
			
 
				+inline b3Quat b3QuatInvert(b3QuatConstArg q)
			
 
				+{
			
 
				+	return (b3Quat)(-q.xyz, q.w);
			
 
				+}
			
 
				+
			
 
				+inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)
			
 
				+{
			
 
				+	return b3QuatRotate( b3QuatInvert( q ), vec );
			
 
				+}
			
 
				+
			
 
				+inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)
			
 
				+{
			
 
				+	return b3QuatRotate( orientation, point ) + (translation);
			
 
				+}
			
 
				+	
			
 
				+#endif 
			
 
				+
			
 
				+#endif //B3_QUAT_H
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h
@@ -0,0 +1,159 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONTACT_SOLVER_INFO
			
 
				+#define B3_CONTACT_SOLVER_INFO
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+
			
 
				+enum	b3SolverMode
			
 
				+{
			
 
				+	B3_SOLVER_RANDMIZE_ORDER = 1,
			
 
				+	B3_SOLVER_FRICTION_SEPARATE = 2,
			
 
				+	B3_SOLVER_USE_WARMSTARTING = 4,
			
 
				+	B3_SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
			
 
				+	B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
			
 
				+	B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
			
 
				+	B3_SOLVER_CACHE_FRIENDLY = 128,
			
 
				+	B3_SOLVER_SIMD = 256,
			
 
				+	B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512,
			
 
				+	B3_SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024
			
 
				+};
			
 
				+
			
 
				+struct b3ContactSolverInfoData
			
 
				+{
			
 
				+	
			
 
				+
			
 
				+	b3Scalar	m_tau;
			
 
				+	b3Scalar	m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
			
 
				+	b3Scalar	m_friction;
			
 
				+	b3Scalar	m_timeStep;
			
 
				+	b3Scalar	m_restitution;
			
 
				+	int		m_numIterations;
			
 
				+	b3Scalar	m_maxErrorReduction;
			
 
				+	b3Scalar	m_sor;
			
 
				+	b3Scalar	m_erp;//used as Baumgarte factor
			
 
				+	b3Scalar	m_erp2;//used in Split Impulse
			
 
				+	b3Scalar	m_globalCfm;//constraint force mixing
			
 
				+	int			m_splitImpulse;
			
 
				+	b3Scalar	m_splitImpulsePenetrationThreshold;
			
 
				+	b3Scalar	m_splitImpulseTurnErp;
			
 
				+	b3Scalar	m_linearSlop;
			
 
				+	b3Scalar	m_warmstartingFactor;
			
 
				+
			
 
				+	int			m_solverMode;
			
 
				+	int	m_restingContactRestitutionThreshold;
			
 
				+	int			m_minimumSolverBatchSize;
			
 
				+	b3Scalar	m_maxGyroscopicForce;
			
 
				+	b3Scalar	m_singleAxisRollingFrictionThreshold;
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+struct b3ContactSolverInfo : public b3ContactSolverInfoData
			
 
				+{
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	inline b3ContactSolverInfo()
			
 
				+	{
			
 
				+		m_tau = b3Scalar(0.6);
			
 
				+		m_damping = b3Scalar(1.0);
			
 
				+		m_friction = b3Scalar(0.3);
			
 
				+		m_timeStep = b3Scalar(1.f/60.f);
			
 
				+		m_restitution = b3Scalar(0.);
			
 
				+		m_maxErrorReduction = b3Scalar(20.);
			
 
				+		m_numIterations = 10;
			
 
				+		m_erp = b3Scalar(0.2);
			
 
				+		m_erp2 = b3Scalar(0.8);
			
 
				+		m_globalCfm = b3Scalar(0.);
			
 
				+		m_sor = b3Scalar(1.);
			
 
				+		m_splitImpulse = true;
			
 
				+		m_splitImpulsePenetrationThreshold = -.04f;
			
 
				+		m_splitImpulseTurnErp = 0.1f;
			
 
				+		m_linearSlop = b3Scalar(0.0);
			
 
				+		m_warmstartingFactor=b3Scalar(0.85);
			
 
				+		//m_solverMode =  B3_SOLVER_USE_WARMSTARTING |  B3_SOLVER_SIMD | B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | B3_SOLVER_RANDMIZE_ORDER;
			
 
				+		m_solverMode = B3_SOLVER_USE_WARMSTARTING | B3_SOLVER_SIMD;// | B3_SOLVER_RANDMIZE_ORDER;
			
 
				+		m_restingContactRestitutionThreshold = 2;//unused as of 2.81
			
 
				+		m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit
			
 
				+		m_maxGyroscopicForce = 100.f; ///only used to clamp forces for bodies that have their B3_ENABLE_GYROPSCOPIC_FORCE flag set (using b3RigidBody::setFlag)
			
 
				+		m_singleAxisRollingFrictionThreshold = 1e30f;///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows.
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct b3ContactSolverInfoDoubleData
			
 
				+{
			
 
				+	double		m_tau;
			
 
				+	double		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
			
 
				+	double		m_friction;
			
 
				+	double		m_timeStep;
			
 
				+	double		m_restitution;
			
 
				+	double		m_maxErrorReduction;
			
 
				+	double		m_sor;
			
 
				+	double		m_erp;//used as Baumgarte factor
			
 
				+	double		m_erp2;//used in Split Impulse
			
 
				+	double		m_globalCfm;//constraint force mixing
			
 
				+	double		m_splitImpulsePenetrationThreshold;
			
 
				+	double		m_splitImpulseTurnErp;
			
 
				+	double		m_linearSlop;
			
 
				+	double		m_warmstartingFactor;
			
 
				+	double		m_maxGyroscopicForce;
			
 
				+	double		m_singleAxisRollingFrictionThreshold;
			
 
				+
			
 
				+	int			m_numIterations;
			
 
				+	int			m_solverMode;
			
 
				+	int			m_restingContactRestitutionThreshold;
			
 
				+	int			m_minimumSolverBatchSize;
			
 
				+	int			m_splitImpulse;
			
 
				+	char		m_padding[4];
			
 
				+
			
 
				+};
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct b3ContactSolverInfoFloatData
			
 
				+{
			
 
				+	float		m_tau;
			
 
				+	float		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
			
 
				+	float		m_friction;
			
 
				+	float		m_timeStep;
			
 
				+
			
 
				+	float		m_restitution;
			
 
				+	float		m_maxErrorReduction;
			
 
				+	float		m_sor;
			
 
				+	float		m_erp;//used as Baumgarte factor
			
 
				+
			
 
				+	float		m_erp2;//used in Split Impulse
			
 
				+	float		m_globalCfm;//constraint force mixing
			
 
				+	float		m_splitImpulsePenetrationThreshold;
			
 
				+	float		m_splitImpulseTurnErp;
			
 
				+
			
 
				+	float		m_linearSlop;
			
 
				+	float		m_warmstartingFactor;
			
 
				+	float		m_maxGyroscopicForce;
			
 
				+	float		m_singleAxisRollingFrictionThreshold;
			
 
				+
			
 
				+	int			m_numIterations;
			
 
				+	int			m_solverMode;
			
 
				+	int			m_restingContactRestitutionThreshold;
			
 
				+	int			m_minimumSolverBatchSize;
			
 
				+
			
 
				+	int			m_splitImpulse;
			
 
				+	char		m_padding[4];
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_CONTACT_SOLVER_INFO
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h
@@ -0,0 +1,35 @@
 
				+
			
 
				+#ifndef B3_FIXED_CONSTRAINT_H
			
 
				+#define B3_FIXED_CONSTRAINT_H
			
 
				+
			
 
				+#include "b3TypedConstraint.h"
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3FixedConstraint : public b3TypedConstraint
			
 
				+{
			
 
				+	b3Vector3 m_pivotInA;
			
 
				+	b3Vector3 m_pivotInB;
			
 
				+	b3Quaternion m_relTargetAB;
			
 
				+
			
 
				+public:
			
 
				+	b3FixedConstraint(int  rbA,int rbB, const b3Transform& frameInA,const b3Transform& frameInB);
			
 
				+	
			
 
				+	virtual ~b3FixedConstraint();
			
 
				+
			
 
				+	
			
 
				+	virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual	void	setParam(int num, b3Scalar value, int axis = -1)
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+	}
			
 
				+	virtual	b3Scalar getParam(int num, int axis = -1) const
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+		return 0.f;
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_FIXED_CONSTRAINT_H
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h
@@ -0,0 +1,550 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+/// 2009 March: b3Generic6DofConstraint refactored by Roman Ponomarev
			
 
				+/// Added support for generic constraint solver through getInfo1/getInfo2 methods
			
 
				+
			
 
				+/*
			
 
				+2007-09-09
			
 
				+b3Generic6DofConstraint Refactored by Francisco Le?n
			
 
				+email: [email protected]
			
 
				+http://gimpact.sf.net
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_GENERIC_6DOF_CONSTRAINT_H
			
 
				+#define B3_GENERIC_6DOF_CONSTRAINT_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "b3JacobianEntry.h"
			
 
				+#include "b3TypedConstraint.h"
			
 
				+
			
 
				+struct b3RigidBodyData;
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+//! Rotation Limit structure for generic joints
			
 
				+class b3RotationalLimitMotor
			
 
				+{
			
 
				+public:
			
 
				+    //! limit_parameters
			
 
				+    //!@{
			
 
				+    b3Scalar m_loLimit;//!< joint limit
			
 
				+    b3Scalar m_hiLimit;//!< joint limit
			
 
				+    b3Scalar m_targetVelocity;//!< target motor velocity
			
 
				+    b3Scalar m_maxMotorForce;//!< max force on motor
			
 
				+    b3Scalar m_maxLimitForce;//!< max force on limit
			
 
				+    b3Scalar m_damping;//!< Damping.
			
 
				+    b3Scalar m_limitSoftness;//! Relaxation factor
			
 
				+    b3Scalar m_normalCFM;//!< Constraint force mixing factor
			
 
				+    b3Scalar m_stopERP;//!< Error tolerance factor when joint is at limit
			
 
				+    b3Scalar m_stopCFM;//!< Constraint force mixing factor when joint is at limit
			
 
				+    b3Scalar m_bounce;//!< restitution factor
			
 
				+    bool m_enableMotor;
			
 
				+
			
 
				+    //!@}
			
 
				+
			
 
				+    //! temp_variables
			
 
				+    //!@{
			
 
				+    b3Scalar m_currentLimitError;//!  How much is violated this limit
			
 
				+    b3Scalar m_currentPosition;     //!  current value of angle 
			
 
				+    int m_currentLimit;//!< 0=free, 1=at lo limit, 2=at hi limit
			
 
				+    b3Scalar m_accumulatedImpulse;
			
 
				+    //!@}
			
 
				+
			
 
				+    b3RotationalLimitMotor()
			
 
				+    {
			
 
				+    	m_accumulatedImpulse = 0.f;
			
 
				+        m_targetVelocity = 0;
			
 
				+        m_maxMotorForce = 0.1f;
			
 
				+        m_maxLimitForce = 300.0f;
			
 
				+        m_loLimit = 1.0f;
			
 
				+        m_hiLimit = -1.0f;
			
 
				+		m_normalCFM = 0.f;
			
 
				+		m_stopERP = 0.2f;
			
 
				+		m_stopCFM = 0.f;
			
 
				+        m_bounce = 0.0f;
			
 
				+        m_damping = 1.0f;
			
 
				+        m_limitSoftness = 0.5f;
			
 
				+        m_currentLimit = 0;
			
 
				+        m_currentLimitError = 0;
			
 
				+        m_enableMotor = false;
			
 
				+    }
			
 
				+
			
 
				+    b3RotationalLimitMotor(const b3RotationalLimitMotor & limot)
			
 
				+    {
			
 
				+        m_targetVelocity = limot.m_targetVelocity;
			
 
				+        m_maxMotorForce = limot.m_maxMotorForce;
			
 
				+        m_limitSoftness = limot.m_limitSoftness;
			
 
				+        m_loLimit = limot.m_loLimit;
			
 
				+        m_hiLimit = limot.m_hiLimit;
			
 
				+		m_normalCFM = limot.m_normalCFM;
			
 
				+		m_stopERP = limot.m_stopERP;
			
 
				+		m_stopCFM =	limot.m_stopCFM;
			
 
				+        m_bounce = limot.m_bounce;
			
 
				+        m_currentLimit = limot.m_currentLimit;
			
 
				+        m_currentLimitError = limot.m_currentLimitError;
			
 
				+        m_enableMotor = limot.m_enableMotor;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+	//! Is limited
			
 
				+    bool isLimited()
			
 
				+    {
			
 
				+    	if(m_loLimit > m_hiLimit) return false;
			
 
				+    	return true;
			
 
				+    }
			
 
				+
			
 
				+	//! Need apply correction
			
 
				+    bool needApplyTorques()
			
 
				+    {
			
 
				+    	if(m_currentLimit == 0 && m_enableMotor == false) return false;
			
 
				+    	return true;
			
 
				+    }
			
 
				+
			
 
				+	//! calculates  error
			
 
				+	/*!
			
 
				+	calculates m_currentLimit and m_currentLimitError.
			
 
				+	*/
			
 
				+	int testLimitValue(b3Scalar test_value);
			
 
				+
			
 
				+	//! apply the correction impulses for two bodies
			
 
				+    b3Scalar solveAngularLimits(b3Scalar timeStep,b3Vector3& axis, b3Scalar jacDiagABInv,b3RigidBodyData * body0, b3RigidBodyData * body1);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+class b3TranslationalLimitMotor
			
 
				+{
			
 
				+public:
			
 
				+	b3Vector3 m_lowerLimit;//!< the constraint lower limits
			
 
				+    b3Vector3 m_upperLimit;//!< the constraint upper limits
			
 
				+    b3Vector3 m_accumulatedImpulse;
			
 
				+    //! Linear_Limit_parameters
			
 
				+    //!@{
			
 
				+	b3Vector3	m_normalCFM;//!< Constraint force mixing factor
			
 
				+    b3Vector3	m_stopERP;//!< Error tolerance factor when joint is at limit
			
 
				+	b3Vector3	m_stopCFM;//!< Constraint force mixing factor when joint is at limit
			
 
				+    b3Vector3	m_targetVelocity;//!< target motor velocity
			
 
				+    b3Vector3	m_maxMotorForce;//!< max force on motor
			
 
				+    b3Vector3	m_currentLimitError;//!  How much is violated this limit
			
 
				+    b3Vector3	m_currentLinearDiff;//!  Current relative offset of constraint frames
			
 
				+	b3Scalar	m_limitSoftness;//!< Softness for linear limit
			
 
				+    b3Scalar	m_damping;//!< Damping for linear limit
			
 
				+    b3Scalar	m_restitution;//! Bounce parameter for linear limit
			
 
				+	//!@}
			
 
				+	bool		m_enableMotor[3];
			
 
				+	int			m_currentLimit[3];//!< 0=free, 1=at lower limit, 2=at upper limit
			
 
				+
			
 
				+    b3TranslationalLimitMotor()
			
 
				+    {
			
 
				+    	m_lowerLimit.setValue(0.f,0.f,0.f);
			
 
				+    	m_upperLimit.setValue(0.f,0.f,0.f);
			
 
				+    	m_accumulatedImpulse.setValue(0.f,0.f,0.f);
			
 
				+		m_normalCFM.setValue(0.f, 0.f, 0.f);
			
 
				+		m_stopERP.setValue(0.2f, 0.2f, 0.2f);
			
 
				+		m_stopCFM.setValue(0.f, 0.f, 0.f);
			
 
				+
			
 
				+    	m_limitSoftness = 0.7f;
			
 
				+    	m_damping = b3Scalar(1.0f);
			
 
				+    	m_restitution = b3Scalar(0.5f);
			
 
				+		for(int i=0; i < 3; i++) 
			
 
				+		{
			
 
				+			m_enableMotor[i] = false;
			
 
				+			m_targetVelocity[i] = b3Scalar(0.f);
			
 
				+			m_maxMotorForce[i] = b3Scalar(0.f);
			
 
				+		}
			
 
				+    }
			
 
				+
			
 
				+    b3TranslationalLimitMotor(const b3TranslationalLimitMotor & other )
			
 
				+    {
			
 
				+    	m_lowerLimit = other.m_lowerLimit;
			
 
				+    	m_upperLimit = other.m_upperLimit;
			
 
				+    	m_accumulatedImpulse = other.m_accumulatedImpulse;
			
 
				+
			
 
				+    	m_limitSoftness = other.m_limitSoftness ;
			
 
				+    	m_damping = other.m_damping;
			
 
				+    	m_restitution = other.m_restitution;
			
 
				+		m_normalCFM = other.m_normalCFM;
			
 
				+		m_stopERP = other.m_stopERP;
			
 
				+		m_stopCFM = other.m_stopCFM;
			
 
				+
			
 
				+		for(int i=0; i < 3; i++) 
			
 
				+		{
			
 
				+			m_enableMotor[i] = other.m_enableMotor[i];
			
 
				+			m_targetVelocity[i] = other.m_targetVelocity[i];
			
 
				+			m_maxMotorForce[i] = other.m_maxMotorForce[i];
			
 
				+		}
			
 
				+    }
			
 
				+
			
 
				+    //! Test limit
			
 
				+	/*!
			
 
				+    - free means upper < lower,
			
 
				+    - locked means upper == lower
			
 
				+    - limited means upper > lower
			
 
				+    - limitIndex: first 3 are linear, next 3 are angular
			
 
				+    */
			
 
				+    inline bool	isLimited(int limitIndex)
			
 
				+    {
			
 
				+       return (m_upperLimit[limitIndex] >= m_lowerLimit[limitIndex]);
			
 
				+    }
			
 
				+    inline bool needApplyForce(int limitIndex)
			
 
				+    {
			
 
				+    	if(m_currentLimit[limitIndex] == 0 && m_enableMotor[limitIndex] == false) return false;
			
 
				+    	return true;
			
 
				+    }
			
 
				+	int testLimitValue(int limitIndex, b3Scalar test_value);
			
 
				+
			
 
				+
			
 
				+    b3Scalar solveLinearAxis(
			
 
				+    	b3Scalar timeStep,
			
 
				+        b3Scalar jacDiagABInv,
			
 
				+        b3RigidBodyData& body1,const b3Vector3 &pointInA,
			
 
				+        b3RigidBodyData& body2,const b3Vector3 &pointInB,
			
 
				+        int limit_index,
			
 
				+        const b3Vector3 & axis_normal_on_a,
			
 
				+		const b3Vector3 & anchorPos);
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+enum b36DofFlags
			
 
				+{
			
 
				+	B3_6DOF_FLAGS_CFM_NORM = 1,
			
 
				+	B3_6DOF_FLAGS_CFM_STOP = 2,
			
 
				+	B3_6DOF_FLAGS_ERP_STOP = 4
			
 
				+};
			
 
				+#define B3_6DOF_FLAGS_AXIS_SHIFT 3 // bits per axis
			
 
				+
			
 
				+
			
 
				+/// b3Generic6DofConstraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
			
 
				+/*!
			
 
				+b3Generic6DofConstraint can leave any of the 6 degree of freedom 'free' or 'locked'.
			
 
				+currently this limit supports rotational motors<br>
			
 
				+<ul>
			
 
				+<li> For Linear limits, use b3Generic6DofConstraint.setLinearUpperLimit, b3Generic6DofConstraint.setLinearLowerLimit. You can set the parameters with the b3TranslationalLimitMotor structure accsesible through the b3Generic6DofConstraint.getTranslationalLimitMotor method.
			
 
				+At this moment translational motors are not supported. May be in the future. </li>
			
 
				+
			
 
				+<li> For Angular limits, use the b3RotationalLimitMotor structure for configuring the limit.
			
 
				+This is accessible through b3Generic6DofConstraint.getLimitMotor method,
			
 
				+This brings support for limit parameters and motors. </li>
			
 
				+
			
 
				+<li> Angulars limits have these possible ranges:
			
 
				+<table border=1 >
			
 
				+<tr>
			
 
				+	<td><b>AXIS</b></td>
			
 
				+	<td><b>MIN ANGLE</b></td>
			
 
				+	<td><b>MAX ANGLE</b></td>
			
 
				+</tr><tr>
			
 
				+	<td>X</td>
			
 
				+	<td>-PI</td>
			
 
				+	<td>PI</td>
			
 
				+</tr><tr>
			
 
				+	<td>Y</td>
			
 
				+	<td>-PI/2</td>
			
 
				+	<td>PI/2</td>
			
 
				+</tr><tr>
			
 
				+	<td>Z</td>
			
 
				+	<td>-PI</td>
			
 
				+	<td>PI</td>
			
 
				+</tr>
			
 
				+</table>
			
 
				+</li>
			
 
				+</ul>
			
 
				+
			
 
				+*/
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3Generic6DofConstraint : public b3TypedConstraint
			
 
				+{
			
 
				+protected:
			
 
				+
			
 
				+	//! relative_frames
			
 
				+    //!@{
			
 
				+	b3Transform	m_frameInA;//!< the constraint space w.r.t body A
			
 
				+    b3Transform	m_frameInB;//!< the constraint space w.r.t body B
			
 
				+    //!@}
			
 
				+
			
 
				+    //! Jacobians
			
 
				+    //!@{
			
 
				+//    b3JacobianEntry	m_jacLinear[3];//!< 3 orthogonal linear constraints
			
 
				+//    b3JacobianEntry	m_jacAng[3];//!< 3 orthogonal angular constraints
			
 
				+    //!@}
			
 
				+
			
 
				+	//! Linear_Limit_parameters
			
 
				+    //!@{
			
 
				+    b3TranslationalLimitMotor m_linearLimits;
			
 
				+    //!@}
			
 
				+
			
 
				+
			
 
				+    //! hinge_parameters
			
 
				+    //!@{
			
 
				+    b3RotationalLimitMotor m_angularLimits[3];
			
 
				+	//!@}
			
 
				+
			
 
				+
			
 
				+protected:
			
 
				+    //! temporal variables
			
 
				+    //!@{
			
 
				+    b3Transform m_calculatedTransformA;
			
 
				+    b3Transform m_calculatedTransformB;
			
 
				+    b3Vector3 m_calculatedAxisAngleDiff;
			
 
				+    b3Vector3 m_calculatedAxis[3];
			
 
				+    b3Vector3 m_calculatedLinearDiff;
			
 
				+    b3Scalar m_timeStep;
			
 
				+	b3Scalar	m_factA;
			
 
				+	b3Scalar	m_factB;
			
 
				+	bool		m_hasStaticBody;
			
 
				+    
			
 
				+	b3Vector3 m_AnchorPos; // point betwen pivots of bodies A and B to solve linear axes
			
 
				+
			
 
				+    bool	m_useLinearReferenceFrameA;
			
 
				+	bool	m_useOffsetForConstraintFrame;
			
 
				+    
			
 
				+	int		m_flags;
			
 
				+
			
 
				+    //!@}
			
 
				+
			
 
				+    b3Generic6DofConstraint&	operator=(b3Generic6DofConstraint&	other)
			
 
				+    {
			
 
				+        b3Assert(0);
			
 
				+        (void) other;
			
 
				+        return *this;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+	int setAngularLimits(b3ConstraintInfo2 *info, int row_offset,const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB);
			
 
				+
			
 
				+	int setLinearLimits(b3ConstraintInfo2 *info, int row, const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB);
			
 
				+
			
 
				+
			
 
				+	// tests linear limits
			
 
				+	void calculateLinearInfo();
			
 
				+
			
 
				+	//! calcs the euler angles between the two bodies.
			
 
				+    void calculateAngleInfo();
			
 
				+
			
 
				+
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+	
			
 
				+    b3Generic6DofConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB ,bool useLinearReferenceFrameA,const b3RigidBodyData* bodies);
			
 
				+    
			
 
				+	//! Calcs global transform of the offsets
			
 
				+	/*!
			
 
				+	Calcs the global transform for the joint offset for body A an B, and also calcs the agle differences between the bodies.
			
 
				+	\sa b3Generic6DofConstraint.getCalculatedTransformA , b3Generic6DofConstraint.getCalculatedTransformB, b3Generic6DofConstraint.calculateAngleInfo
			
 
				+	*/
			
 
				+    void calculateTransforms(const b3Transform& transA,const b3Transform& transB,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void calculateTransforms(const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	//! Gets the global transform of the offset for body A
			
 
				+    /*!
			
 
				+    \sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo.
			
 
				+    */
			
 
				+    const b3Transform & getCalculatedTransformA() const
			
 
				+    {
			
 
				+    	return m_calculatedTransformA;
			
 
				+    }
			
 
				+
			
 
				+    //! Gets the global transform of the offset for body B
			
 
				+    /*!
			
 
				+    \sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo.
			
 
				+    */
			
 
				+    const b3Transform & getCalculatedTransformB() const
			
 
				+    {
			
 
				+    	return m_calculatedTransformB;
			
 
				+    }
			
 
				+
			
 
				+    const b3Transform & getFrameOffsetA() const
			
 
				+    {
			
 
				+    	return m_frameInA;
			
 
				+    }
			
 
				+
			
 
				+    const b3Transform & getFrameOffsetB() const
			
 
				+    {
			
 
				+    	return m_frameInB;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    b3Transform & getFrameOffsetA()
			
 
				+    {
			
 
				+    	return m_frameInA;
			
 
				+    }
			
 
				+
			
 
				+    b3Transform & getFrameOffsetB()
			
 
				+    {
			
 
				+    	return m_frameInB;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+
			
 
				+	virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo1NonVirtual (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void getInfo2 (b3ConstraintInfo2* info,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo2NonVirtual (b3ConstraintInfo2* info,const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+
			
 
				+    void	updateRHS(b3Scalar	timeStep);
			
 
				+
			
 
				+	//! Get the rotation axis in global coordinates
			
 
				+    b3Vector3 getAxis(int axis_index) const;
			
 
				+
			
 
				+    //! Get the relative Euler angle
			
 
				+    /*!
			
 
				+	\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
			
 
				+	*/
			
 
				+    b3Scalar getAngle(int axis_index) const;
			
 
				+
			
 
				+	//! Get the relative position of the constraint pivot
			
 
				+    /*!
			
 
				+	\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
			
 
				+	*/
			
 
				+	b3Scalar getRelativePivotPosition(int axis_index) const;
			
 
				+
			
 
				+	void setFrames(const b3Transform & frameA, const b3Transform & frameB, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	//! Test angular limit.
			
 
				+	/*!
			
 
				+	Calculates angular correction and returns true if limit needs to be corrected.
			
 
				+	\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
			
 
				+	*/
			
 
				+    bool testAngularLimitMotor(int axis_index);
			
 
				+
			
 
				+    void	setLinearLowerLimit(const b3Vector3& linearLower)
			
 
				+    {
			
 
				+    	m_linearLimits.m_lowerLimit = linearLower;
			
 
				+    }
			
 
				+
			
 
				+	void	getLinearLowerLimit(b3Vector3& linearLower)
			
 
				+	{
			
 
				+		linearLower = m_linearLimits.m_lowerLimit;
			
 
				+	}
			
 
				+
			
 
				+	void	setLinearUpperLimit(const b3Vector3& linearUpper)
			
 
				+	{
			
 
				+		m_linearLimits.m_upperLimit = linearUpper;
			
 
				+	}
			
 
				+
			
 
				+	void	getLinearUpperLimit(b3Vector3& linearUpper)
			
 
				+	{
			
 
				+		linearUpper = m_linearLimits.m_upperLimit;
			
 
				+	}
			
 
				+
			
 
				+    void	setAngularLowerLimit(const b3Vector3& angularLower)
			
 
				+    {
			
 
				+		for(int i = 0; i < 3; i++) 
			
 
				+			m_angularLimits[i].m_loLimit = b3NormalizeAngle(angularLower[i]);
			
 
				+    }
			
 
				+
			
 
				+	void	getAngularLowerLimit(b3Vector3& angularLower)
			
 
				+	{
			
 
				+		for(int i = 0; i < 3; i++) 
			
 
				+			angularLower[i] = m_angularLimits[i].m_loLimit;
			
 
				+	}
			
 
				+
			
 
				+    void	setAngularUpperLimit(const b3Vector3& angularUpper)
			
 
				+    {
			
 
				+		for(int i = 0; i < 3; i++)
			
 
				+			m_angularLimits[i].m_hiLimit = b3NormalizeAngle(angularUpper[i]);
			
 
				+    }
			
 
				+
			
 
				+	void	getAngularUpperLimit(b3Vector3& angularUpper)
			
 
				+	{
			
 
				+		for(int i = 0; i < 3; i++)
			
 
				+			angularUpper[i] = m_angularLimits[i].m_hiLimit;
			
 
				+	}
			
 
				+
			
 
				+	//! Retrieves the angular limit informacion
			
 
				+    b3RotationalLimitMotor * getRotationalLimitMotor(int index)
			
 
				+    {
			
 
				+    	return &m_angularLimits[index];
			
 
				+    }
			
 
				+
			
 
				+    //! Retrieves the  limit informacion
			
 
				+    b3TranslationalLimitMotor * getTranslationalLimitMotor()
			
 
				+    {
			
 
				+    	return &m_linearLimits;
			
 
				+    }
			
 
				+
			
 
				+    //first 3 are linear, next 3 are angular
			
 
				+    void setLimit(int axis, b3Scalar lo, b3Scalar hi)
			
 
				+    {
			
 
				+    	if(axis<3)
			
 
				+    	{
			
 
				+    		m_linearLimits.m_lowerLimit[axis] = lo;
			
 
				+    		m_linearLimits.m_upperLimit[axis] = hi;
			
 
				+    	}
			
 
				+    	else
			
 
				+    	{
			
 
				+			lo = b3NormalizeAngle(lo);
			
 
				+			hi = b3NormalizeAngle(hi);
			
 
				+    		m_angularLimits[axis-3].m_loLimit = lo;
			
 
				+    		m_angularLimits[axis-3].m_hiLimit = hi;
			
 
				+    	}
			
 
				+    }
			
 
				+
			
 
				+	//! Test limit
			
 
				+	/*!
			
 
				+    - free means upper < lower,
			
 
				+    - locked means upper == lower
			
 
				+    - limited means upper > lower
			
 
				+    - limitIndex: first 3 are linear, next 3 are angular
			
 
				+    */
			
 
				+    bool	isLimited(int limitIndex)
			
 
				+    {
			
 
				+    	if(limitIndex<3)
			
 
				+    	{
			
 
				+			return m_linearLimits.isLimited(limitIndex);
			
 
				+
			
 
				+    	}
			
 
				+        return m_angularLimits[limitIndex-3].isLimited();
			
 
				+    }
			
 
				+
			
 
				+	virtual void calcAnchorPos(const b3RigidBodyData* bodies); // overridable
			
 
				+
			
 
				+	int get_limit_motor_info2(	b3RotationalLimitMotor * limot,
			
 
				+								const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB,
			
 
				+								b3ConstraintInfo2 *info, int row, b3Vector3& ax1, int rotational, int rotAllowed = false);
			
 
				+
			
 
				+	// access for UseFrameOffset
			
 
				+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
			
 
				+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
			
 
				+
			
 
				+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
			
 
				+	///If no axis is provided, it uses the default axis for this constraint.
			
 
				+	virtual	void setParam(int num, b3Scalar value, int axis = -1);
			
 
				+	///return the local value of parameter
			
 
				+	virtual	b3Scalar getParam(int num, int axis = -1) const;
			
 
				+
			
 
				+	void setAxis( const b3Vector3& axis1, const b3Vector3& axis2,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+
			
 
				+
			
 
				+	
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_GENERIC_6DOF_CONSTRAINT_H
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h
@@ -0,0 +1,155 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_JACOBIAN_ENTRY_H
			
 
				+#define B3_JACOBIAN_ENTRY_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+
			
 
				+
			
 
				+//notes:
			
 
				+// Another memory optimization would be to store m_1MinvJt in the remaining 3 w components
			
 
				+// which makes the b3JacobianEntry memory layout 16 bytes
			
 
				+// if you only are interested in angular part, just feed massInvA and massInvB zero
			
 
				+
			
 
				+/// Jacobian entry is an abstraction that allows to describe constraints
			
 
				+/// it can be used in combination with a constraint solver
			
 
				+/// Can be used to relate the effect of an impulse to the constraint error
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3JacobianEntry
			
 
				+{
			
 
				+public:
			
 
				+	b3JacobianEntry() {};
			
 
				+	//constraint between two different rigidbodies
			
 
				+	b3JacobianEntry(
			
 
				+		const b3Matrix3x3& world2A,
			
 
				+		const b3Matrix3x3& world2B,
			
 
				+		const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
			
 
				+		const b3Vector3& jointAxis,
			
 
				+		const b3Vector3& inertiaInvA, 
			
 
				+		const b3Scalar massInvA,
			
 
				+		const b3Vector3& inertiaInvB,
			
 
				+		const b3Scalar massInvB)
			
 
				+		:m_linearJointAxis(jointAxis)
			
 
				+	{
			
 
				+		m_aJ = world2A*(rel_pos1.cross(m_linearJointAxis));
			
 
				+		m_bJ = world2B*(rel_pos2.cross(-m_linearJointAxis));
			
 
				+		m_0MinvJt	= inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = inertiaInvB * m_bJ;
			
 
				+		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	//angular constraint between two different rigidbodies
			
 
				+	b3JacobianEntry(const b3Vector3& jointAxis,
			
 
				+		const b3Matrix3x3& world2A,
			
 
				+		const b3Matrix3x3& world2B,
			
 
				+		const b3Vector3& inertiaInvA,
			
 
				+		const b3Vector3& inertiaInvB)
			
 
				+		:m_linearJointAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)))
			
 
				+	{
			
 
				+		m_aJ= world2A*jointAxis;
			
 
				+		m_bJ = world2B*-jointAxis;
			
 
				+		m_0MinvJt	= inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = inertiaInvB * m_bJ;
			
 
				+		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	//angular constraint between two different rigidbodies
			
 
				+	b3JacobianEntry(const b3Vector3& axisInA,
			
 
				+		const b3Vector3& axisInB,
			
 
				+		const b3Vector3& inertiaInvA,
			
 
				+		const b3Vector3& inertiaInvB)
			
 
				+		: m_linearJointAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)))
			
 
				+		, m_aJ(axisInA)
			
 
				+		, m_bJ(-axisInB)
			
 
				+	{
			
 
				+		m_0MinvJt	= inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = inertiaInvB * m_bJ;
			
 
				+		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	//constraint on one rigidbody
			
 
				+	b3JacobianEntry(
			
 
				+		const b3Matrix3x3& world2A,
			
 
				+		const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
			
 
				+		const b3Vector3& jointAxis,
			
 
				+		const b3Vector3& inertiaInvA, 
			
 
				+		const b3Scalar massInvA)
			
 
				+		:m_linearJointAxis(jointAxis)
			
 
				+	{
			
 
				+		m_aJ= world2A*(rel_pos1.cross(jointAxis));
			
 
				+		m_bJ = world2A*(rel_pos2.cross(-jointAxis));
			
 
				+		m_0MinvJt	= inertiaInvA * m_aJ;
			
 
				+		m_1MinvJt = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.));
			
 
				+		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ);
			
 
				+
			
 
				+		b3Assert(m_Adiag > b3Scalar(0.0));
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar	getDiagonal() const { return m_Adiag; }
			
 
				+
			
 
				+	// for two constraints on the same rigidbody (for example vehicle friction)
			
 
				+	b3Scalar	getNonDiagonal(const b3JacobianEntry& jacB, const b3Scalar massInvA) const
			
 
				+	{
			
 
				+		const b3JacobianEntry& jacA = *this;
			
 
				+		b3Scalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis);
			
 
				+		b3Scalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ);
			
 
				+		return lin + ang;
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	// for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies)
			
 
				+	b3Scalar	getNonDiagonal(const b3JacobianEntry& jacB,const b3Scalar massInvA,const b3Scalar massInvB) const
			
 
				+	{
			
 
				+		const b3JacobianEntry& jacA = *this;
			
 
				+		b3Vector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis;
			
 
				+		b3Vector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ;
			
 
				+		b3Vector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ;
			
 
				+		b3Vector3 lin0 = massInvA * lin ;
			
 
				+		b3Vector3 lin1 = massInvB * lin;
			
 
				+		b3Vector3 sum = ang0+ang1+lin0+lin1;
			
 
				+		return sum[0]+sum[1]+sum[2];
			
 
				+	}
			
 
				+
			
 
				+	b3Scalar getRelativeVelocity(const b3Vector3& linvelA,const b3Vector3& angvelA,const b3Vector3& linvelB,const b3Vector3& angvelB)
			
 
				+	{
			
 
				+		b3Vector3 linrel = linvelA - linvelB;
			
 
				+		b3Vector3 angvela  = angvelA * m_aJ;
			
 
				+		b3Vector3 angvelb  = angvelB * m_bJ;
			
 
				+		linrel *= m_linearJointAxis;
			
 
				+		angvela += angvelb;
			
 
				+		angvela += linrel;
			
 
				+		b3Scalar rel_vel2 = angvela[0]+angvela[1]+angvela[2];
			
 
				+		return rel_vel2 + B3_EPSILON;
			
 
				+	}
			
 
				+//private:
			
 
				+
			
 
				+	b3Vector3	m_linearJointAxis;
			
 
				+	b3Vector3	m_aJ;
			
 
				+	b3Vector3	m_bJ;
			
 
				+	b3Vector3	m_0MinvJt;
			
 
				+	b3Vector3	m_1MinvJt;
			
 
				+	//Optimization: can be stored in the w/last component of one of the vectors
			
 
				+	b3Scalar	m_Adiag;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_JACOBIAN_ENTRY_H
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h
@@ -0,0 +1,149 @@
 
				+#ifndef B3_PGS_JACOBI_SOLVER
			
 
				+#define B3_PGS_JACOBI_SOLVER
			
 
				+
			
 
				+
			
 
				+struct b3Contact4;
			
 
				+struct b3ContactPoint;
			
 
				+
			
 
				+
			
 
				+class b3Dispatcher;
			
 
				+
			
 
				+#include "b3TypedConstraint.h"
			
 
				+#include "b3ContactSolverInfo.h"
			
 
				+#include "b3SolverBody.h"
			
 
				+#include "b3SolverConstraint.h"
			
 
				+
			
 
				+struct b3RigidBodyData;
			
 
				+struct b3InertiaData;
			
 
				+
			
 
				+class b3PgsJacobiSolver
			
 
				+{
			
 
				+
			
 
				+protected:
			
 
				+	b3AlignedObjectArray<b3SolverBody>      m_tmpSolverBodyPool;
			
 
				+	b3ConstraintArray			m_tmpSolverContactConstraintPool;
			
 
				+	b3ConstraintArray			m_tmpSolverNonContactConstraintPool;
			
 
				+	b3ConstraintArray			m_tmpSolverContactFrictionConstraintPool;
			
 
				+	b3ConstraintArray			m_tmpSolverContactRollingFrictionConstraintPool;
			
 
				+
			
 
				+	b3AlignedObjectArray<int>	m_orderTmpConstraintPool;
			
 
				+	b3AlignedObjectArray<int>	m_orderNonContactConstraintPool;
			
 
				+	b3AlignedObjectArray<int>	m_orderFrictionConstraintPool;
			
 
				+	b3AlignedObjectArray<b3TypedConstraint::b3ConstraintInfo1> m_tmpConstraintSizesPool;
			
 
				+	
			
 
				+	b3AlignedObjectArray<int>		m_bodyCount;
			
 
				+	b3AlignedObjectArray<int>		m_bodyCountCheck;
			
 
				+	
			
 
				+	b3AlignedObjectArray<b3Vector3>	m_deltaLinearVelocities;
			
 
				+	b3AlignedObjectArray<b3Vector3>	m_deltaAngularVelocities;
			
 
				+
			
 
				+	bool						m_usePgs;
			
 
				+	void						averageVelocities();
			
 
				+
			
 
				+	int							m_maxOverrideNumSolverIterations;
			
 
				+
			
 
				+	int							m_numSplitImpulseRecoveries;
			
 
				+
			
 
				+	b3Scalar	getContactProcessingThreshold(b3Contact4* contact)
			
 
				+	{
			
 
				+		return 0.02f;
			
 
				+	}
			
 
				+	void setupFrictionConstraint(	b3RigidBodyData* bodies,b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
			
 
				+									b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
			
 
				+									b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation, 
			
 
				+									b3Scalar desiredVelocity=0., b3Scalar cfmSlip=0.);
			
 
				+
			
 
				+	void setupRollingFrictionConstraint(b3RigidBodyData* bodies,b3InertiaData* inertias,	b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
			
 
				+									b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
			
 
				+									b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation, 
			
 
				+									b3Scalar desiredVelocity=0., b3Scalar cfmSlip=0.);
			
 
				+
			
 
				+	b3SolverConstraint&	addFrictionConstraint(b3RigidBodyData* bodies,b3InertiaData* inertias,const b3Vector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity=0., b3Scalar cfmSlip=0.);
			
 
				+	b3SolverConstraint&	addRollingFrictionConstraint(b3RigidBodyData* bodies,b3InertiaData* inertias,const b3Vector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity=0, b3Scalar cfmSlip=0.f);
			
 
				+
			
 
				+
			
 
				+	void setupContactConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias,
			
 
				+								b3SolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, b3ContactPoint& cp, 
			
 
				+								const b3ContactSolverInfo& infoGlobal, b3Vector3& vel, b3Scalar& rel_vel, b3Scalar& relaxation, 
			
 
				+								b3Vector3& rel_pos1, b3Vector3& rel_pos2);
			
 
				+
			
 
				+	void setFrictionConstraintImpulse( b3RigidBodyData* bodies, b3InertiaData* inertias,b3SolverConstraint& solverConstraint, int solverBodyIdA,int solverBodyIdB, 
			
 
				+										 b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
			
 
				+	unsigned long	m_btSeed2;
			
 
				+
			
 
				+	
			
 
				+	b3Scalar restitutionCurve(b3Scalar rel_vel, b3Scalar restitution);
			
 
				+
			
 
				+	void	convertContact(b3RigidBodyData* bodies, b3InertiaData* inertias,b3Contact4* manifold,const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+
			
 
				+	void	resolveSplitPenetrationSIMD(
			
 
				+     b3SolverBody& bodyA,b3SolverBody& bodyB,
			
 
				+        const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	void	resolveSplitPenetrationImpulseCacheFriendly(
			
 
				+       b3SolverBody& bodyA,b3SolverBody& bodyB,
			
 
				+        const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	//internal method
			
 
				+	int		getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies,b3InertiaData* inertias);
			
 
				+	void	initSolverBody(int bodyIndex, b3SolverBody* solverBody, b3RigidBodyData* collisionObject);
			
 
				+
			
 
				+	void	resolveSingleConstraintRowGeneric(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
			
 
				+
			
 
				+	void	resolveSingleConstraintRowGenericSIMD(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
			
 
				+	
			
 
				+	void	resolveSingleConstraintRowLowerLimit(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
			
 
				+	
			
 
				+	void	resolveSingleConstraintRowLowerLimitSIMD(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
			
 
				+		
			
 
				+protected:
			
 
				+
			
 
				+	virtual b3Scalar solveGroupCacheFriendlySetup(b3RigidBodyData* bodies, b3InertiaData* inertias,int numBodies,b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+
			
 
				+	virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
			
 
				+	virtual void solveGroupCacheFriendlySplitImpulseIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
			
 
				+	b3Scalar solveSingleIteration(int iteration, b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+
			
 
				+	virtual b3Scalar solveGroupCacheFriendlyFinish(b3RigidBodyData* bodies, b3InertiaData* inertias,int numBodies,const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+	
			
 
				+	b3PgsJacobiSolver(bool usePgs);
			
 
				+	virtual ~b3PgsJacobiSolver();
			
 
				+
			
 
				+//	void	solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts);
			
 
				+	void	solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts, int numConstraints, b3TypedConstraint** constraints);
			
 
				+
			
 
				+	b3Scalar solveGroup(b3RigidBodyData* bodies,b3InertiaData* inertias,int numBodies,b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
			
 
				+
			
 
				+	///clear internal cached data and reset random seed
			
 
				+	virtual	void	reset();
			
 
				+	
			
 
				+	unsigned long b3Rand2();
			
 
				+
			
 
				+	int b3RandInt2 (int n);
			
 
				+
			
 
				+	void	setRandSeed(unsigned long seed)
			
 
				+	{
			
 
				+		m_btSeed2 = seed;
			
 
				+	}
			
 
				+	unsigned long	getRandSeed() const
			
 
				+	{
			
 
				+		return m_btSeed2;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_PGS_JACOBI_SOLVER
			
 
				+
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h
@@ -0,0 +1,159 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_POINT2POINTCONSTRAINT_H
			
 
				+#define B3_POINT2POINTCONSTRAINT_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+//#include "b3JacobianEntry.h"
			
 
				+#include "b3TypedConstraint.h"
			
 
				+
			
 
				+class b3RigidBody;
			
 
				+
			
 
				+
			
 
				+#ifdef B3_USE_DOUBLE_PRECISION
			
 
				+#define b3Point2PointConstraintData	b3Point2PointConstraintDoubleData
			
 
				+#define b3Point2PointConstraintDataName	"b3Point2PointConstraintDoubleData"
			
 
				+#else
			
 
				+#define b3Point2PointConstraintData	b3Point2PointConstraintFloatData
			
 
				+#define b3Point2PointConstraintDataName	"b3Point2PointConstraintFloatData"
			
 
				+#endif //B3_USE_DOUBLE_PRECISION
			
 
				+
			
 
				+struct	b3ConstraintSetting
			
 
				+{
			
 
				+	b3ConstraintSetting()	:
			
 
				+		m_tau(b3Scalar(0.3)),
			
 
				+		m_damping(b3Scalar(1.)),
			
 
				+		m_impulseClamp(b3Scalar(0.))
			
 
				+	{
			
 
				+	}
			
 
				+	b3Scalar		m_tau;
			
 
				+	b3Scalar		m_damping;
			
 
				+	b3Scalar		m_impulseClamp;
			
 
				+};
			
 
				+
			
 
				+enum b3Point2PointFlags
			
 
				+{
			
 
				+	B3_P2P_FLAGS_ERP = 1,
			
 
				+	B3_P2P_FLAGS_CFM = 2
			
 
				+};
			
 
				+
			
 
				+/// point to point constraint between two rigidbodies each with a pivotpoint that descibes the 'ballsocket' location in local space
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3Point2PointConstraint : public b3TypedConstraint
			
 
				+{
			
 
				+#ifdef IN_PARALLELL_SOLVER
			
 
				+public:
			
 
				+#endif
			
 
				+	
			
 
				+	b3Vector3	m_pivotInA;
			
 
				+	b3Vector3	m_pivotInB;
			
 
				+	
			
 
				+	int			m_flags;
			
 
				+	b3Scalar	m_erp;
			
 
				+	b3Scalar	m_cfm;
			
 
				+	
			
 
				+public:
			
 
				+
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3ConstraintSetting	m_setting;
			
 
				+
			
 
				+	b3Point2PointConstraint(int  rbA,int rbB, const b3Vector3& pivotInA,const b3Vector3& pivotInB);
			
 
				+
			
 
				+	//b3Point2PointConstraint(int  rbA,const b3Vector3& pivotInA);
			
 
				+
			
 
				+
			
 
				+
			
 
				+	virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo1NonVirtual (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	virtual void getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies);
			
 
				+
			
 
				+	void getInfo2NonVirtual (b3ConstraintInfo2* info, const b3Transform& body0_trans, const b3Transform& body1_trans);
			
 
				+
			
 
				+	void	updateRHS(b3Scalar	timeStep);
			
 
				+
			
 
				+	void	setPivotA(const b3Vector3& pivotA)
			
 
				+	{
			
 
				+		m_pivotInA = pivotA;
			
 
				+	}
			
 
				+
			
 
				+	void	setPivotB(const b3Vector3& pivotB)
			
 
				+	{
			
 
				+		m_pivotInB = pivotB;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getPivotInA() const
			
 
				+	{
			
 
				+		return m_pivotInA;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getPivotInB() const
			
 
				+	{
			
 
				+		return m_pivotInB;
			
 
				+	}
			
 
				+
			
 
				+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
			
 
				+	///If no axis is provided, it uses the default axis for this constraint.
			
 
				+	virtual	void	setParam(int num, b3Scalar value, int axis = -1);
			
 
				+	///return the local value of parameter
			
 
				+	virtual	b3Scalar getParam(int num, int axis = -1) const;
			
 
				+
			
 
				+//	virtual	int	calculateSerializeBufferSize() const;
			
 
				+
			
 
				+	///fills the dataBuffer and returns the struct name (and 0 on failure)
			
 
				+//	virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const;
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct	b3Point2PointConstraintFloatData
			
 
				+{
			
 
				+	b3TypedConstraintData	m_typeConstraintData;
			
 
				+	b3Vector3FloatData	m_pivotInA;
			
 
				+	b3Vector3FloatData	m_pivotInB;
			
 
				+};
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct	b3Point2PointConstraintDoubleData
			
 
				+{
			
 
				+	b3TypedConstraintData	m_typeConstraintData;
			
 
				+	b3Vector3DoubleData	m_pivotInA;
			
 
				+	b3Vector3DoubleData	m_pivotInB;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+B3_FORCE_INLINE	int	b3Point2PointConstraint::calculateSerializeBufferSize() const
			
 
				+{
			
 
				+	return sizeof(b3Point2PointConstraintData);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+	///fills the dataBuffer and returns the struct name (and 0 on failure)
			
 
				+B3_FORCE_INLINE	const char*	b3Point2PointConstraint::serialize(void* dataBuffer, b3Serializer* serializer) const
			
 
				+{
			
 
				+	b3Point2PointConstraintData* p2pData = (b3Point2PointConstraintData*)dataBuffer;
			
 
				+
			
 
				+	b3TypedConstraint::serialize(&p2pData->m_typeConstraintData,serializer);
			
 
				+	m_pivotInA.serialize(p2pData->m_pivotInA);
			
 
				+	m_pivotInB.serialize(p2pData->m_pivotInB);
			
 
				+
			
 
				+	return b3Point2PointConstraintDataName;
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+#endif //B3_POINT2POINTCONSTRAINT_H
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
@@ -0,0 +1,302 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SOLVER_BODY_H
			
 
				+#define B3_SOLVER_BODY_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedAllocator.h"
			
 
				+#include "Bullet3Common/b3TransformUtil.h"
			
 
				+
			
 
				+///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision
			
 
				+#ifdef B3_USE_SSE
			
 
				+#define USE_SIMD 1
			
 
				+#endif //
			
 
				+
			
 
				+
			
 
				+#ifdef USE_SIMD
			
 
				+
			
 
				+struct	b3SimdScalar
			
 
				+{
			
 
				+	B3_FORCE_INLINE	b3SimdScalar()
			
 
				+	{
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	b3SimdScalar(float	fl)
			
 
				+	:m_vec128 (_mm_set1_ps(fl))
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	b3SimdScalar(__m128 v128)
			
 
				+		:m_vec128(v128)
			
 
				+	{
			
 
				+	}
			
 
				+	union
			
 
				+	{
			
 
				+		__m128		m_vec128;
			
 
				+		float		m_floats[4];
			
 
				+		float		x,y,z,w;
			
 
				+		int			m_ints[4];
			
 
				+		b3Scalar	m_unusedPadding;
			
 
				+	};
			
 
				+	B3_FORCE_INLINE	__m128	get128()
			
 
				+	{
			
 
				+		return m_vec128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	const __m128	get128() const
			
 
				+	{
			
 
				+		return m_vec128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	void	set128(__m128 v128)
			
 
				+	{
			
 
				+		m_vec128 = v128;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE	operator       __m128()       
			
 
				+	{ 
			
 
				+		return m_vec128; 
			
 
				+	}
			
 
				+	B3_FORCE_INLINE	operator const __m128() const 
			
 
				+	{ 
			
 
				+		return m_vec128; 
			
 
				+	}
			
 
				+	
			
 
				+	B3_FORCE_INLINE	operator float() const 
			
 
				+	{ 
			
 
				+		return m_floats[0]; 
			
 
				+	}
			
 
				+
			
 
				+};
			
 
				+
			
 
				+///@brief Return the elementwise product of two b3SimdScalar
			
 
				+B3_FORCE_INLINE b3SimdScalar 
			
 
				+operator*(const b3SimdScalar& v1, const b3SimdScalar& v2) 
			
 
				+{
			
 
				+	return b3SimdScalar(_mm_mul_ps(v1.get128(),v2.get128()));
			
 
				+}
			
 
				+
			
 
				+///@brief Return the elementwise product of two b3SimdScalar
			
 
				+B3_FORCE_INLINE b3SimdScalar 
			
 
				+operator+(const b3SimdScalar& v1, const b3SimdScalar& v2) 
			
 
				+{
			
 
				+	return b3SimdScalar(_mm_add_ps(v1.get128(),v2.get128()));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#else
			
 
				+#define b3SimdScalar b3Scalar
			
 
				+#endif
			
 
				+
			
 
				+///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
			
 
				+B3_ATTRIBUTE_ALIGNED16 (struct)	b3SolverBody
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+	b3Transform		m_worldTransform;
			
 
				+	b3Vector3		m_deltaLinearVelocity;
			
 
				+	b3Vector3		m_deltaAngularVelocity;
			
 
				+	b3Vector3		m_angularFactor;
			
 
				+	b3Vector3		m_linearFactor;
			
 
				+	b3Vector3		m_invMass;
			
 
				+	b3Vector3		m_pushVelocity;
			
 
				+	b3Vector3		m_turnVelocity;
			
 
				+	b3Vector3		m_linearVelocity;
			
 
				+	b3Vector3		m_angularVelocity;
			
 
				+
			
 
				+	union 
			
 
				+	{
			
 
				+		void*	m_originalBody;
			
 
				+		int		m_originalBodyIndex;
			
 
				+	};
			
 
				+
			
 
				+	int padding[3];
			
 
				+
			
 
				+
			
 
				+	void	setWorldTransform(const b3Transform& worldTransform)
			
 
				+	{
			
 
				+		m_worldTransform = worldTransform;
			
 
				+	}
			
 
				+
			
 
				+	const b3Transform& getWorldTransform() const
			
 
				+	{
			
 
				+		return m_worldTransform;
			
 
				+	}
			
 
				+	
			
 
				+	B3_FORCE_INLINE void	getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity ) const
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+			velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
			
 
				+		else
			
 
				+			velocity.setValue(0,0,0);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void	getAngularVelocity(b3Vector3& angVel) const
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+			angVel =m_angularVelocity+m_deltaAngularVelocity;
			
 
				+		else
			
 
				+			angVel.setValue(0,0,0);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
			
 
				+	B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,const b3Scalar impulseMagnitude)
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+		{
			
 
				+			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
			
 
				+			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,b3Scalar impulseMagnitude)
			
 
				+	{
			
 
				+		if (m_originalBody)
			
 
				+		{
			
 
				+			m_pushVelocity += linearComponent*impulseMagnitude*m_linearFactor;
			
 
				+			m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+
			
 
				+	const b3Vector3& getDeltaLinearVelocity() const
			
 
				+	{
			
 
				+		return m_deltaLinearVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getDeltaAngularVelocity() const
			
 
				+	{
			
 
				+		return m_deltaAngularVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getPushVelocity() const 
			
 
				+	{
			
 
				+		return m_pushVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getTurnVelocity() const 
			
 
				+	{
			
 
				+		return m_turnVelocity;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	////////////////////////////////////////////////
			
 
				+	///some internal methods, don't use them
			
 
				+		
			
 
				+	b3Vector3& internalGetDeltaLinearVelocity()
			
 
				+	{
			
 
				+		return m_deltaLinearVelocity;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3& internalGetDeltaAngularVelocity()
			
 
				+	{
			
 
				+		return m_deltaAngularVelocity;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& internalGetAngularFactor() const
			
 
				+	{
			
 
				+		return m_angularFactor;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& internalGetInvMass() const
			
 
				+	{
			
 
				+		return m_invMass;
			
 
				+	}
			
 
				+
			
 
				+	void internalSetInvMass(const b3Vector3& invMass)
			
 
				+	{
			
 
				+		m_invMass = invMass;
			
 
				+	}
			
 
				+	
			
 
				+	b3Vector3& internalGetPushVelocity()
			
 
				+	{
			
 
				+		return m_pushVelocity;
			
 
				+	}
			
 
				+
			
 
				+	b3Vector3& internalGetTurnVelocity()
			
 
				+	{
			
 
				+		return m_turnVelocity;
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void	internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity ) const
			
 
				+	{
			
 
				+		velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
			
 
				+	}
			
 
				+
			
 
				+	B3_FORCE_INLINE void	internalGetAngularVelocity(b3Vector3& angVel) const
			
 
				+	{
			
 
				+		angVel = m_angularVelocity+m_deltaAngularVelocity;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
			
 
				+	B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,const b3Scalar impulseMagnitude)
			
 
				+	{
			
 
				+		//if (m_originalBody)
			
 
				+		{
			
 
				+			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
			
 
				+			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
			
 
				+		}
			
 
				+	}
			
 
				+		
			
 
				+	
			
 
				+	
			
 
				+
			
 
				+	void	writebackVelocity()
			
 
				+	{
			
 
				+		//if (m_originalBody>=0)
			
 
				+		{
			
 
				+			m_linearVelocity +=m_deltaLinearVelocity;
			
 
				+			m_angularVelocity += m_deltaAngularVelocity;
			
 
				+			
			
 
				+			//m_originalBody->setCompanionId(-1);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	void	writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp)
			
 
				+	{
			
 
				+        (void) timeStep;
			
 
				+		if (m_originalBody)
			
 
				+		{
			
 
				+			m_linearVelocity += m_deltaLinearVelocity;
			
 
				+			m_angularVelocity += m_deltaAngularVelocity;
			
 
				+			
			
 
				+			//correct the position/orientation based on push/turn recovery
			
 
				+			b3Transform newTransform;
			
 
				+			if (m_pushVelocity[0]!=0.f || m_pushVelocity[1]!=0 || m_pushVelocity[2]!=0 || m_turnVelocity[0]!=0.f || m_turnVelocity[1]!=0 || m_turnVelocity[2]!=0)
			
 
				+			{
			
 
				+			//	b3Quaternion orn = m_worldTransform.getRotation();
			
 
				+				b3TransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform);
			
 
				+				m_worldTransform = newTransform;
			
 
				+			}
			
 
				+			//m_worldTransform.setRotation(orn);
			
 
				+			//m_originalBody->setCompanionId(-1);
			
 
				+		}
			
 
				+	}
			
 
				+	
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_SOLVER_BODY_H
			
 
				+
			
 
				+
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h
@@ -0,0 +1,80 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_SOLVER_CONSTRAINT_H
			
 
				+#define B3_SOLVER_CONSTRAINT_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Matrix3x3.h"
			
 
				+//#include "b3JacobianEntry.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+//#define NO_FRICTION_TANGENTIALS 1
			
 
				+#include "b3SolverBody.h"
			
 
				+
			
 
				+
			
 
				+///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
			
 
				+B3_ATTRIBUTE_ALIGNED16 (struct)	b3SolverConstraint
			
 
				+{
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	b3Vector3		m_relpos1CrossNormal;
			
 
				+	b3Vector3		m_contactNormal;
			
 
				+
			
 
				+	b3Vector3		m_relpos2CrossNormal;
			
 
				+	//b3Vector3		m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal
			
 
				+
			
 
				+	b3Vector3		m_angularComponentA;
			
 
				+	b3Vector3		m_angularComponentB;
			
 
				+	
			
 
				+	mutable b3SimdScalar	m_appliedPushImpulse;
			
 
				+	mutable b3SimdScalar	m_appliedImpulse;
			
 
				+	int m_padding1;
			
 
				+	int m_padding2;
			
 
				+	b3Scalar	m_friction;
			
 
				+	b3Scalar	m_jacDiagABInv;
			
 
				+	b3Scalar		m_rhs;
			
 
				+	b3Scalar		m_cfm;
			
 
				+	
			
 
				+    b3Scalar		m_lowerLimit;
			
 
				+	b3Scalar		m_upperLimit;
			
 
				+	b3Scalar		m_rhsPenetration;
			
 
				+    union
			
 
				+	{
			
 
				+		void*		m_originalContactPoint;
			
 
				+		b3Scalar	m_unusedPadding4;
			
 
				+	};
			
 
				+
			
 
				+	int	m_overrideNumSolverIterations;
			
 
				+    int			m_frictionIndex;
			
 
				+	int m_solverBodyIdA;
			
 
				+	int m_solverBodyIdB;
			
 
				+
			
 
				+    
			
 
				+	enum		b3SolverConstraintType
			
 
				+	{
			
 
				+		B3_SOLVER_CONTACT_1D = 0,
			
 
				+		B3_SOLVER_FRICTION_1D
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+typedef b3AlignedObjectArray<b3SolverConstraint>	b3ConstraintArray;
			
 
				+
			
 
				+
			
 
				+#endif //B3_SOLVER_CONSTRAINT_H
			
 
				+
			
 
				+
			
 
				+
			
--- a/include/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h
+++ b/include/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h
@@ -0,0 +1,483 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2010 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_TYPED_CONSTRAINT_H
			
 
				+#define B3_TYPED_CONSTRAINT_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+#include "b3SolverConstraint.h"
			
 
				+
			
 
				+class b3Serializer;
			
 
				+
			
 
				+//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
			
 
				+enum b3TypedConstraintType
			
 
				+{
			
 
				+	B3_POINT2POINT_CONSTRAINT_TYPE=3,
			
 
				+	B3_HINGE_CONSTRAINT_TYPE,
			
 
				+	B3_CONETWIST_CONSTRAINT_TYPE,
			
 
				+	B3_D6_CONSTRAINT_TYPE,
			
 
				+	B3_SLIDER_CONSTRAINT_TYPE,
			
 
				+	B3_CONTACT_CONSTRAINT_TYPE,
			
 
				+	B3_D6_SPRING_CONSTRAINT_TYPE,
			
 
				+	B3_GEAR_CONSTRAINT_TYPE,
			
 
				+	B3_FIXED_CONSTRAINT_TYPE,
			
 
				+	B3_MAX_CONSTRAINT_TYPE
			
 
				+};
			
 
				+
			
 
				+
			
 
				+enum b3ConstraintParams
			
 
				+{
			
 
				+	B3_CONSTRAINT_ERP=1,
			
 
				+	B3_CONSTRAINT_STOP_ERP,
			
 
				+	B3_CONSTRAINT_CFM,
			
 
				+	B3_CONSTRAINT_STOP_CFM
			
 
				+};
			
 
				+
			
 
				+#if 1
			
 
				+	#define b3AssertConstrParams(_par) b3Assert(_par) 
			
 
				+#else
			
 
				+	#define b3AssertConstrParams(_par)
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct)	b3JointFeedback
			
 
				+{
			
 
				+	b3Vector3	m_appliedForceBodyA;
			
 
				+	b3Vector3	m_appliedTorqueBodyA;
			
 
				+	b3Vector3	m_appliedForceBodyB;
			
 
				+	b3Vector3	m_appliedTorqueBodyB;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct b3RigidBodyData;
			
 
				+
			
 
				+
			
 
				+///TypedConstraint is the baseclass for Bullet constraints and vehicles
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3TypedConstraint : public b3TypedObject
			
 
				+{
			
 
				+	int	m_userConstraintType;
			
 
				+
			
 
				+	union
			
 
				+	{
			
 
				+		int	m_userConstraintId;
			
 
				+		void* m_userConstraintPtr;
			
 
				+	};
			
 
				+
			
 
				+	b3Scalar	m_breakingImpulseThreshold;
			
 
				+	bool		m_isEnabled;
			
 
				+	bool		m_needsFeedback;
			
 
				+	int			m_overrideNumSolverIterations;
			
 
				+
			
 
				+
			
 
				+	b3TypedConstraint&	operator=(b3TypedConstraint&	other)
			
 
				+	{
			
 
				+		b3Assert(0);
			
 
				+		(void) other;
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+protected:
			
 
				+	int				m_rbA;
			
 
				+	int				m_rbB;
			
 
				+	b3Scalar	m_appliedImpulse;
			
 
				+	b3Scalar	m_dbgDrawSize;
			
 
				+	b3JointFeedback*	m_jointFeedback;
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	b3Scalar getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact);
			
 
				+	
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	virtual ~b3TypedConstraint() {};
			
 
				+	b3TypedConstraint(b3TypedConstraintType type, int bodyA,int bodyB);
			
 
				+
			
 
				+	struct b3ConstraintInfo1 {
			
 
				+		int m_numConstraintRows,nub;
			
 
				+	};
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	struct b3ConstraintInfo2 {
			
 
				+		// integrator parameters: frames per second (1/stepsize), default error
			
 
				+		// reduction parameter (0..1).
			
 
				+		b3Scalar fps,erp;
			
 
				+
			
 
				+		// for the first and second body, pointers to two (linear and angular)
			
 
				+		// n*3 jacobian sub matrices, stored by rows. these matrices will have
			
 
				+		// been initialized to 0 on entry. if the second body is zero then the
			
 
				+		// J2xx pointers may be 0.
			
 
				+		b3Scalar *m_J1linearAxis,*m_J1angularAxis,*m_J2linearAxis,*m_J2angularAxis;
			
 
				+
			
 
				+		// elements to jump from one row to the next in J's
			
 
				+		int rowskip;
			
 
				+
			
 
				+		// right hand sides of the equation J*v = c + cfm * lambda. cfm is the
			
 
				+		// "constraint force mixing" vector. c is set to zero on entry, cfm is
			
 
				+		// set to a constant value (typically very small or zero) value on entry.
			
 
				+		b3Scalar *m_constraintError,*cfm;
			
 
				+
			
 
				+		// lo and hi limits for variables (set to -/+ infinity on entry).
			
 
				+		b3Scalar *m_lowerLimit,*m_upperLimit;
			
 
				+
			
 
				+		// findex vector for variables. see the LCP solver interface for a
			
 
				+		// description of what this does. this is set to -1 on entry.
			
 
				+		// note that the returned indexes are relative to the first index of
			
 
				+		// the constraint.
			
 
				+		int *findex;
			
 
				+		// number of solver iterations
			
 
				+		int m_numIterations;
			
 
				+
			
 
				+		//damping of the velocity
			
 
				+		b3Scalar	m_damping;
			
 
				+	};
			
 
				+
			
 
				+	int	getOverrideNumSolverIterations() const
			
 
				+	{
			
 
				+		return m_overrideNumSolverIterations;
			
 
				+	}
			
 
				+
			
 
				+	///override the number of constraint solver iterations used to solve this constraint
			
 
				+	///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
			
 
				+	void setOverrideNumSolverIterations(int overideNumIterations)
			
 
				+	{
			
 
				+		m_overrideNumSolverIterations = overideNumIterations;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual	void	setupSolverConstraint(b3ConstraintArray& ca, int solverBodyA,int solverBodyB, b3Scalar timeStep)
			
 
				+	{
			
 
				+        (void)ca;
			
 
				+        (void)solverBodyA;
			
 
				+        (void)solverBodyB;
			
 
				+        (void)timeStep;
			
 
				+	}
			
 
				+	
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)=0;
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual void getInfo2 (b3ConstraintInfo2* info,  const b3RigidBodyData* bodies)=0;
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	void	internalSetAppliedImpulse(b3Scalar appliedImpulse)
			
 
				+	{
			
 
				+		m_appliedImpulse = appliedImpulse;
			
 
				+	}
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	b3Scalar	internalGetAppliedImpulse()
			
 
				+	{
			
 
				+		return m_appliedImpulse;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	b3Scalar	getBreakingImpulseThreshold() const
			
 
				+	{
			
 
				+		return 	m_breakingImpulseThreshold;
			
 
				+	}
			
 
				+
			
 
				+	void	setBreakingImpulseThreshold(b3Scalar threshold)
			
 
				+	{
			
 
				+		m_breakingImpulseThreshold = threshold;
			
 
				+	}
			
 
				+
			
 
				+	bool	isEnabled() const
			
 
				+	{
			
 
				+		return m_isEnabled;
			
 
				+	}
			
 
				+
			
 
				+	void	setEnabled(bool enabled)
			
 
				+	{
			
 
				+		m_isEnabled=enabled;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	///internal method used by the constraint solver, don't use them directly
			
 
				+	virtual	void	solveConstraintObsolete(b3SolverBody& /*bodyA*/,b3SolverBody& /*bodyB*/,b3Scalar	/*timeStep*/) {};
			
 
				+
			
 
				+	
			
 
				+	int getRigidBodyA() const
			
 
				+	{
			
 
				+		return m_rbA;
			
 
				+	}
			
 
				+	int getRigidBodyB() const
			
 
				+	{
			
 
				+		return m_rbB;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	int getRigidBodyA() 
			
 
				+	{
			
 
				+		return m_rbA;
			
 
				+	}
			
 
				+	int getRigidBodyB()
			
 
				+	{
			
 
				+		return m_rbB;
			
 
				+	}
			
 
				+
			
 
				+	int getUserConstraintType() const
			
 
				+	{
			
 
				+		return m_userConstraintType ;
			
 
				+	}
			
 
				+
			
 
				+	void	setUserConstraintType(int userConstraintType)
			
 
				+	{
			
 
				+		m_userConstraintType = userConstraintType;
			
 
				+	};
			
 
				+
			
 
				+	void	setUserConstraintId(int uid)
			
 
				+	{
			
 
				+		m_userConstraintId = uid;
			
 
				+	}
			
 
				+
			
 
				+	int getUserConstraintId() const
			
 
				+	{
			
 
				+		return m_userConstraintId;
			
 
				+	}
			
 
				+
			
 
				+	void	setUserConstraintPtr(void* ptr)
			
 
				+	{
			
 
				+		m_userConstraintPtr = ptr;
			
 
				+	}
			
 
				+
			
 
				+	void*	getUserConstraintPtr()
			
 
				+	{
			
 
				+		return m_userConstraintPtr;
			
 
				+	}
			
 
				+
			
 
				+	void	setJointFeedback(b3JointFeedback* jointFeedback)
			
 
				+	{
			
 
				+		m_jointFeedback = jointFeedback;
			
 
				+	}
			
 
				+
			
 
				+	const b3JointFeedback* getJointFeedback() const
			
 
				+	{
			
 
				+		return m_jointFeedback;
			
 
				+	}
			
 
				+
			
 
				+	b3JointFeedback* getJointFeedback()
			
 
				+	{
			
 
				+		return m_jointFeedback;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	int getUid() const
			
 
				+	{
			
 
				+		return m_userConstraintId;   
			
 
				+	} 
			
 
				+
			
 
				+	bool	needsFeedback() const
			
 
				+	{
			
 
				+		return m_needsFeedback;
			
 
				+	}
			
 
				+
			
 
				+	///enableFeedback will allow to read the applied linear and angular impulse
			
 
				+	///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information
			
 
				+	void	enableFeedback(bool needsFeedback)
			
 
				+	{
			
 
				+		m_needsFeedback = needsFeedback;
			
 
				+	}
			
 
				+
			
 
				+	///getAppliedImpulse is an estimated total applied impulse. 
			
 
				+	///This feedback could be used to determine breaking constraints or playing sounds.
			
 
				+	b3Scalar	getAppliedImpulse() const
			
 
				+	{
			
 
				+		b3Assert(m_needsFeedback);
			
 
				+		return m_appliedImpulse;
			
 
				+	}
			
 
				+
			
 
				+	b3TypedConstraintType getConstraintType () const
			
 
				+	{
			
 
				+		return b3TypedConstraintType(m_objectType);
			
 
				+	}
			
 
				+	
			
 
				+	void setDbgDrawSize(b3Scalar dbgDrawSize)
			
 
				+	{
			
 
				+		m_dbgDrawSize = dbgDrawSize;
			
 
				+	}
			
 
				+	b3Scalar getDbgDrawSize()
			
 
				+	{
			
 
				+		return m_dbgDrawSize;
			
 
				+	}
			
 
				+
			
 
				+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
			
 
				+	///If no axis is provided, it uses the default axis for this constraint.
			
 
				+	virtual	void	setParam(int num, b3Scalar value, int axis = -1) = 0;
			
 
				+
			
 
				+	///return the local value of parameter
			
 
				+	virtual	b3Scalar getParam(int num, int axis = -1) const = 0;
			
 
				+	
			
 
				+//	virtual	int	calculateSerializeBufferSize() const;
			
 
				+
			
 
				+	///fills the dataBuffer and returns the struct name (and 0 on failure)
			
 
				+	//virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+// returns angle in range [-B3_2_PI, B3_2_PI], closest to one of the limits 
			
 
				+// all arguments should be normalized angles (i.e. in range [-B3_PI, B3_PI])
			
 
				+B3_FORCE_INLINE b3Scalar b3AdjustAngleToLimits(b3Scalar angleInRadians, b3Scalar angleLowerLimitInRadians, b3Scalar angleUpperLimitInRadians)
			
 
				+{
			
 
				+	if(angleLowerLimitInRadians >= angleUpperLimitInRadians)
			
 
				+	{
			
 
				+		return angleInRadians;
			
 
				+	}
			
 
				+	else if(angleInRadians < angleLowerLimitInRadians)
			
 
				+	{
			
 
				+		b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleLowerLimitInRadians - angleInRadians));
			
 
				+		b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleUpperLimitInRadians - angleInRadians));
			
 
				+		return (diffLo < diffHi) ? angleInRadians : (angleInRadians + B3_2_PI);
			
 
				+	}
			
 
				+	else if(angleInRadians > angleUpperLimitInRadians)
			
 
				+	{
			
 
				+		b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleInRadians - angleUpperLimitInRadians));
			
 
				+		b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleInRadians - angleLowerLimitInRadians));
			
 
				+		return (diffLo < diffHi) ? (angleInRadians - B3_2_PI) : angleInRadians;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return angleInRadians;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
			
 
				+struct	b3TypedConstraintData
			
 
				+{
			
 
				+	int		m_bodyA;
			
 
				+	int		m_bodyB;
			
 
				+	char	*m_name;
			
 
				+
			
 
				+	int	m_objectType;
			
 
				+	int	m_userConstraintType;
			
 
				+	int	m_userConstraintId;
			
 
				+	int	m_needsFeedback;
			
 
				+
			
 
				+	float	m_appliedImpulse;
			
 
				+	float	m_dbgDrawSize;
			
 
				+
			
 
				+	int	m_disableCollisionsBetweenLinkedBodies;
			
 
				+	int	m_overrideNumSolverIterations;
			
 
				+
			
 
				+	float	m_breakingImpulseThreshold;
			
 
				+	int		m_isEnabled;
			
 
				+	
			
 
				+};
			
 
				+
			
 
				+/*B3_FORCE_INLINE	int	b3TypedConstraint::calculateSerializeBufferSize() const
			
 
				+{
			
 
				+	return sizeof(b3TypedConstraintData);
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+class b3AngularLimit
			
 
				+{
			
 
				+private:
			
 
				+	b3Scalar 
			
 
				+		m_center,
			
 
				+		m_halfRange,
			
 
				+		m_softness,
			
 
				+		m_biasFactor,
			
 
				+		m_relaxationFactor,
			
 
				+		m_correction,
			
 
				+		m_sign;
			
 
				+
			
 
				+	bool
			
 
				+		m_solveLimit;
			
 
				+
			
 
				+public:
			
 
				+	/// Default constructor initializes limit as inactive, allowing free constraint movement
			
 
				+	b3AngularLimit()
			
 
				+		:m_center(0.0f),
			
 
				+		m_halfRange(-1.0f),
			
 
				+		m_softness(0.9f),
			
 
				+		m_biasFactor(0.3f),
			
 
				+		m_relaxationFactor(1.0f),
			
 
				+		m_correction(0.0f),
			
 
				+		m_sign(0.0f),
			
 
				+		m_solveLimit(false)
			
 
				+	{}
			
 
				+
			
 
				+	/// Sets all limit's parameters.
			
 
				+	/// When low > high limit becomes inactive.
			
 
				+	/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
			
 
				+	void set(b3Scalar low, b3Scalar high, b3Scalar _softness = 0.9f, b3Scalar _biasFactor = 0.3f, b3Scalar _relaxationFactor = 1.0f);
			
 
				+
			
 
				+	/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
			
 
				+	/// correction is calculated.
			
 
				+	void test(const b3Scalar angle);
			
 
				+
			
 
				+	/// Returns limit's softness
			
 
				+	inline b3Scalar getSoftness() const
			
 
				+	{
			
 
				+		return m_softness;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns limit's bias factor
			
 
				+	inline b3Scalar getBiasFactor() const
			
 
				+	{
			
 
				+		return m_biasFactor;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns limit's relaxation factor
			
 
				+	inline b3Scalar getRelaxationFactor() const
			
 
				+	{
			
 
				+		return m_relaxationFactor;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns correction value evaluated when test() was invoked 
			
 
				+	inline b3Scalar getCorrection() const
			
 
				+	{
			
 
				+		return m_correction;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns sign value evaluated when test() was invoked 
			
 
				+	inline b3Scalar getSign() const
			
 
				+	{
			
 
				+		return m_sign;
			
 
				+	}
			
 
				+
			
 
				+	/// Gives half of the distance between min and max limit angle
			
 
				+	inline b3Scalar getHalfRange() const
			
 
				+	{
			
 
				+		return m_halfRange;
			
 
				+	}
			
 
				+
			
 
				+	/// Returns true when the last test() invocation recognized limit violation
			
 
				+	inline bool isLimit() const
			
 
				+	{
			
 
				+		return m_solveLimit;
			
 
				+	}
			
 
				+
			
 
				+	/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
			
 
				+	/// returned is modified so it equals to the limit closest to given angle.
			
 
				+	void fit(b3Scalar& angle) const;
			
 
				+
			
 
				+	/// Returns correction value multiplied by sign value
			
 
				+	b3Scalar getError() const;
			
 
				+
			
 
				+	b3Scalar getLow() const;
			
 
				+
			
 
				+	b3Scalar getHigh() const;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_TYPED_CONSTRAINT_H
			
--- a/include/Bullet3Dynamics/b3CpuRigidBodyPipeline.h
+++ b/include/Bullet3Dynamics/b3CpuRigidBodyPipeline.h
@@ -0,0 +1,67 @@
 
				+/*
			
 
				+Copyright (c) 2013 Advanced Micro Devices, Inc.  
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Originally written by Erwin Coumans
			
 
				+
			
 
				+#ifndef B3_CPU_RIGIDBODY_PIPELINE_H
			
 
				+#define B3_CPU_RIGIDBODY_PIPELINE_H
			
 
				+
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
			
 
				+
			
 
				+class b3CpuRigidBodyPipeline
			
 
				+{
			
 
				+protected:
			
 
				+	struct b3CpuRigidBodyPipelineInternalData*	m_data;
			
 
				+
			
 
				+	int allocateCollidable();
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+
			
 
				+	b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const struct b3Config& config);
			
 
				+	virtual ~b3CpuRigidBodyPipeline();
			
 
				+
			
 
				+	virtual void	stepSimulation(float deltaTime);
			
 
				+	virtual void	integrate(float timeStep);
			
 
				+	virtual void	updateAabbWorldSpace();
			
 
				+	virtual void	computeOverlappingPairs();
			
 
				+	virtual void	computeContactPoints();
			
 
				+	virtual void	solveContactConstraints();
			
 
				+
			
 
				+	int		registerConvexPolyhedron(class b3ConvexUtility* convex);
			
 
				+
			
 
				+	int		registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData);
			
 
				+	void	writeAllInstancesToGpu();
			
 
				+	void	copyConstraintsToHost();
			
 
				+	void	setGravity(const float* grav);
			
 
				+	void	reset();
			
 
				+	
			
 
				+	int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB,float breakingThreshold);
			
 
				+	int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold);
			
 
				+	void removeConstraintByUid(int uid);
			
 
				+
			
 
				+	void	addConstraint(class b3TypedConstraint* constraint);
			
 
				+	void	removeConstraint(b3TypedConstraint* constraint);
			
 
				+
			
 
				+	void	castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults);
			
 
				+
			
 
				+	const struct b3RigidBodyData* getBodyBuffer() const;
			
 
				+
			
 
				+	int	getNumBodies() const;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_CPU_RIGIDBODY_PIPELINE_H
			
--- a/include/Bullet3Dynamics/shared/b3ContactConstraint4.h
+++ b/include/Bullet3Dynamics/shared/b3ContactConstraint4.h
@@ -0,0 +1,34 @@
 
				+#ifndef B3_CONTACT_CONSTRAINT5_H
			
 
				+#define B3_CONTACT_CONSTRAINT5_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Float4.h"
			
 
				+
			
 
				+typedef struct b3ContactConstraint4 b3ContactConstraint4_t;
			
 
				+
			
 
				+
			
 
				+struct b3ContactConstraint4
			
 
				+{
			
 
				+
			
 
				+	b3Float4 m_linear;//normal?
			
 
				+	b3Float4 m_worldPos[4];
			
 
				+	b3Float4 m_center;	//	friction
			
 
				+	float m_jacCoeffInv[4];
			
 
				+	float m_b[4];
			
 
				+	float m_appliedRambdaDt[4];
			
 
				+	float m_fJacCoeffInv[2];	//	friction
			
 
				+	float m_fAppliedRambdaDt[2];	//	friction
			
 
				+
			
 
				+	unsigned int m_bodyA;
			
 
				+	unsigned int m_bodyB;
			
 
				+	int			m_batchIdx;
			
 
				+	unsigned int m_paddings;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+//inline	void setFrictionCoeff(float value) { m_linear[3] = value; }
			
 
				+inline	float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) 
			
 
				+{
			
 
				+	return constraint->m_linear.w; 
			
 
				+}
			
 
				+
			
 
				+#endif //B3_CONTACT_CONSTRAINT5_H
			
--- a/include/Bullet3Dynamics/shared/b3ConvertConstraint4.h
+++ b/include/Bullet3Dynamics/shared/b3ConvertConstraint4.h
@@ -0,0 +1,153 @@
 
				+
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
			
 
				+#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+
			
 
				+void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);
			
 
				+ void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)
			
 
				+{
			
 
				+  if (b3Fabs(n.z) > 0.70710678f) {
			
 
				+    // choose p in y-z plane
			
 
				+    float a = n.y*n.y + n.z*n.z;
			
 
				+    float k = 1.f/sqrt(a);
			
 
				+    p[0].x = 0;
			
 
				+	p[0].y = -n.z*k;
			
 
				+	p[0].z = n.y*k;
			
 
				+    // set q = n x p
			
 
				+    q[0].x = a*k;
			
 
				+	q[0].y = -n.x*p[0].z;
			
 
				+	q[0].z = n.x*p[0].y;
			
 
				+  }
			
 
				+  else {
			
 
				+    // choose p in x-y plane
			
 
				+    float a = n.x*n.x + n.y*n.y;
			
 
				+    float k = 1.f/sqrt(a);
			
 
				+    p[0].x = -n.y*k;
			
 
				+	p[0].y = n.x*k;
			
 
				+	p[0].z = 0;
			
 
				+    // set q = n x p
			
 
				+    q[0].x = -n.z*p[0].y;
			
 
				+	q[0].y = n.z*p[0].x;
			
 
				+	q[0].z = a*k;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+ 
			
 
				+void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)
			
 
				+{
			
 
				+	*linear = b3MakeFloat4(n.x,n.y,n.z,0.f);
			
 
				+	*angular0 = b3Cross3(r0, n);
			
 
				+	*angular1 = -b3Cross3(r1, n);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,
			
 
				+	b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )
			
 
				+{
			
 
				+	return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,
			
 
				+					float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)
			
 
				+{
			
 
				+	//	linear0,1 are normlized
			
 
				+	float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;
			
 
				+	float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);
			
 
				+	float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;
			
 
				+	float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);
			
 
				+	return -1.f/(jmj0+jmj1+jmj2+jmj3);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,
			
 
				+	b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, 
			
 
				+	__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,
			
 
				+	b3ContactConstraint4_t* dstC )
			
 
				+{
			
 
				+	dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
			
 
				+	dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
			
 
				+
			
 
				+	float dtInv = 1.f/dt;
			
 
				+	for(int ic=0; ic<4; ic++)
			
 
				+	{
			
 
				+		dstC->m_appliedRambdaDt[ic] = 0.f;
			
 
				+	}
			
 
				+	dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
			
 
				+
			
 
				+
			
 
				+	dstC->m_linear = src->m_worldNormalOnB;
			
 
				+	dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );
			
 
				+	for(int ic=0; ic<4; ic++)
			
 
				+	{
			
 
				+		b3Float4 r0 = src->m_worldPosB[ic] - posA;
			
 
				+		b3Float4 r1 = src->m_worldPosB[ic] - posB;
			
 
				+
			
 
				+		if( ic >= src->m_worldNormalOnB.w )//npoints
			
 
				+		{
			
 
				+			dstC->m_jacCoeffInv[ic] = 0.f;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		float relVelN;
			
 
				+		{
			
 
				+			b3Float4 linear, angular0, angular1;
			
 
				+			setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);
			
 
				+
			
 
				+			dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
			
 
				+				invMassA, &invInertiaA, invMassB, &invInertiaB );
			
 
				+
			
 
				+			relVelN = calcRelVel(linear, -linear, angular0, angular1,
			
 
				+				linVelA, angVelA, linVelB, angVelB);
			
 
				+
			
 
				+			float e = 0.f;//src->getRestituitionCoeff();
			
 
				+			if( relVelN*relVelN < 0.004f ) e = 0.f;
			
 
				+
			
 
				+			dstC->m_b[ic] = e*relVelN;
			
 
				+			//float penetration = src->m_worldPosB[ic].w;
			
 
				+			dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;
			
 
				+			dstC->m_appliedRambdaDt[ic] = 0.f;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if( src->m_worldNormalOnB.w > 0 )//npoints
			
 
				+	{	//	prepare friction
			
 
				+		b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);
			
 
				+		for(int i=0; i<src->m_worldNormalOnB.w; i++) 
			
 
				+			center += src->m_worldPosB[i];
			
 
				+		center /= (float)src->m_worldNormalOnB.w;
			
 
				+
			
 
				+		b3Float4 tangent[2];
			
 
				+		b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);
			
 
				+		
			
 
				+		b3Float4 r[2];
			
 
				+		r[0] = center - posA;
			
 
				+		r[1] = center - posB;
			
 
				+
			
 
				+		for(int i=0; i<2; i++)
			
 
				+		{
			
 
				+			b3Float4 linear, angular0, angular1;
			
 
				+			setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
			
 
				+
			
 
				+			dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
			
 
				+				invMassA, &invInertiaA, invMassB, &invInertiaB );
			
 
				+			dstC->m_fAppliedRambdaDt[i] = 0.f;
			
 
				+		}
			
 
				+		dstC->m_center = center;
			
 
				+	}
			
 
				+
			
 
				+	for(int i=0; i<4; i++)
			
 
				+	{
			
 
				+		if( i<src->m_worldNormalOnB.w )
			
 
				+		{
			
 
				+			dstC->m_worldPos[i] = src->m_worldPosB[i];
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/include/Bullet3Dynamics/shared/b3Inertia.h
+++ b/include/Bullet3Dynamics/shared/b3Inertia.h
@@ -0,0 +1,15 @@
 
				+
			
 
				+
			
 
				+#ifndef B3_INERTIA_H
			
 
				+#define B3_INERTIA_H
			
 
				+
			
 
				+#include "Bullet3Common/shared/b3Mat3x3.h"
			
 
				+
			
 
				+struct b3Inertia
			
 
				+{
			
 
				+	b3Mat3x3 m_invInertiaWorld;
			
 
				+	b3Mat3x3 m_initInvInertia;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_INERTIA_H
			
--- a/include/Bullet3Dynamics/shared/b3IntegrateTransforms.h
+++ b/include/Bullet3Dynamics/shared/b3IntegrateTransforms.h
@@ -0,0 +1,113 @@
 
				+
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
			
 
				+{
			
 
				+	
			
 
				+	if (bodies[nodeID].m_invMass != 0.f)
			
 
				+	{
			
 
				+		float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
			
 
				+
			
 
				+		//angular velocity
			
 
				+		{
			
 
				+			b3Float4 axis;
			
 
				+			//add some hardcoded angular damping
			
 
				+			bodies[nodeID].m_angVel.x *= angularDamping;
			
 
				+			bodies[nodeID].m_angVel.y *= angularDamping;
			
 
				+			bodies[nodeID].m_angVel.z *= angularDamping;
			
 
				+			
			
 
				+			b3Float4 angvel = bodies[nodeID].m_angVel;
			
 
				+
			
 
				+			float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
			
 
				+			
			
 
				+			//limit the angular motion
			
 
				+			if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
			
 
				+			{
			
 
				+				fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
			
 
				+			}
			
 
				+			if(fAngle < 0.001f)
			
 
				+			{
			
 
				+				// use Taylor's expansions of sync function
			
 
				+				axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// sync(fAngle) = sin(c*fAngle)/t
			
 
				+				axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);
			
 
				+			}
			
 
				+			
			
 
				+			b3Quat dorn;
			
 
				+			dorn.x = axis.x;
			
 
				+			dorn.y = axis.y;
			
 
				+			dorn.z = axis.z;
			
 
				+			dorn.w = b3Cos(fAngle * timeStep * 0.5f);
			
 
				+			b3Quat orn0 = bodies[nodeID].m_quat;
			
 
				+			b3Quat predictedOrn = b3QuatMul(dorn, orn0);
			
 
				+			predictedOrn = b3QuatNormalized(predictedOrn);
			
 
				+			bodies[nodeID].m_quat=predictedOrn;
			
 
				+		}
			
 
				+		//linear velocity		
			
 
				+		bodies[nodeID].m_pos +=  bodies[nodeID].m_linVel * timeStep;
			
 
				+		
			
 
				+		//apply gravity
			
 
				+		bodies[nodeID].m_linVel += gravityAcceleration * timeStep;
			
 
				+		
			
 
				+	}
			
 
				+	
			
 
				+}
			
 
				+
			
 
				+inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
			
 
				+{
			
 
				+	float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
			
 
				+	
			
 
				+	if( (body->m_invMass != 0.f))
			
 
				+	{
			
 
				+		//angular velocity
			
 
				+		{
			
 
				+			b3Float4 axis;
			
 
				+			//add some hardcoded angular damping
			
 
				+			body->m_angVel.x *= angularDamping;
			
 
				+			body->m_angVel.y *= angularDamping;
			
 
				+			body->m_angVel.z *= angularDamping;
			
 
				+			
			
 
				+			b3Float4 angvel = body->m_angVel;
			
 
				+			float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
			
 
				+			//limit the angular motion
			
 
				+			if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
			
 
				+			{
			
 
				+				fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
			
 
				+			}
			
 
				+			if(fAngle < 0.001f)
			
 
				+			{
			
 
				+				// use Taylor's expansions of sync function
			
 
				+				axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				// sync(fAngle) = sin(c*fAngle)/t
			
 
				+				axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);
			
 
				+			}
			
 
				+			b3Quat dorn;
			
 
				+			dorn.x = axis.x;
			
 
				+			dorn.y = axis.y;
			
 
				+			dorn.z = axis.z;
			
 
				+			dorn.w = b3Cos(fAngle * timeStep * 0.5f);
			
 
				+			b3Quat orn0 = body->m_quat;
			
 
				+
			
 
				+			b3Quat predictedOrn = b3QuatMul(dorn, orn0);
			
 
				+			predictedOrn = b3QuatNormalized(predictedOrn);
			
 
				+			body->m_quat=predictedOrn;
			
 
				+		}
			
 
				+
			
 
				+		//apply gravity
			
 
				+		body->m_linVel += gravityAcceleration * timeStep;
			
 
				+
			
 
				+		//linear velocity		
			
 
				+		body->m_pos +=  body->m_linVel * timeStep;
			
 
				+		
			
 
				+	}
			
 
				+	
			
 
				+}
			
--- a/include/Bullet3Geometry/b3AabbUtil.h
+++ b/include/Bullet3Geometry/b3AabbUtil.h
@@ -0,0 +1,232 @@
 
				+/*
			
 
				+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_AABB_UTIL2
			
 
				+#define B3_AABB_UTIL2
			
 
				+
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3MinMax.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE void b3AabbExpand (b3Vector3& aabbMin,
			
 
				+								   b3Vector3& aabbMax,
			
 
				+								   const b3Vector3& expansionMin,
			
 
				+								   const b3Vector3& expansionMax)
			
 
				+{
			
 
				+	aabbMin = aabbMin + expansionMin;
			
 
				+	aabbMax = aabbMax + expansionMax;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between two aabbs
			
 
				+B3_FORCE_INLINE bool b3TestPointAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1,
			
 
				+								const b3Vector3 &point)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1.getX() > point.getX() || aabbMax1.getX() < point.getX()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getZ() > point.getZ() || aabbMax1.getZ() < point.getZ()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getY() > point.getY() || aabbMax1.getY() < point.getY()) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/// conservative test for overlap between two aabbs
			
 
				+B3_FORCE_INLINE bool b3TestAabbAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1,
			
 
				+								const b3Vector3 &aabbMin2, const b3Vector3 &aabbMax2)
			
 
				+{
			
 
				+	bool overlap = true;
			
 
				+	overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap;
			
 
				+	overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap;
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+/// conservative test for overlap between triangle and aabb
			
 
				+B3_FORCE_INLINE bool b3TestTriangleAgainstAabb2(const b3Vector3 *vertices,
			
 
				+									const b3Vector3 &aabbMin, const b3Vector3 &aabbMax)
			
 
				+{
			
 
				+	const b3Vector3 &p1 = vertices[0];
			
 
				+	const b3Vector3 &p2 = vertices[1];
			
 
				+	const b3Vector3 &p3 = vertices[2];
			
 
				+
			
 
				+	if (b3Min(b3Min(p1[0], p2[0]), p3[0]) > aabbMax[0]) return false;
			
 
				+	if (b3Max(b3Max(p1[0], p2[0]), p3[0]) < aabbMin[0]) return false;
			
 
				+
			
 
				+	if (b3Min(b3Min(p1[2], p2[2]), p3[2]) > aabbMax[2]) return false;
			
 
				+	if (b3Max(b3Max(p1[2], p2[2]), p3[2]) < aabbMin[2]) return false;
			
 
				+  
			
 
				+	if (b3Min(b3Min(p1[1], p2[1]), p3[1]) > aabbMax[1]) return false;
			
 
				+	if (b3Max(b3Max(p1[1], p2[1]), p3[1]) < aabbMin[1]) return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE int	b3Outcode(const b3Vector3& p,const b3Vector3& halfExtent) 
			
 
				+{
			
 
				+	return (p.getX()  < -halfExtent.getX() ? 0x01 : 0x0) |    
			
 
				+		   (p.getX() >  halfExtent.getX() ? 0x08 : 0x0) |
			
 
				+		   (p.getY() < -halfExtent.getY() ? 0x02 : 0x0) |    
			
 
				+		   (p.getY() >  halfExtent.getY() ? 0x10 : 0x0) |
			
 
				+		   (p.getZ() < -halfExtent.getZ() ? 0x4 : 0x0) |    
			
 
				+		   (p.getZ() >  halfExtent.getZ() ? 0x20 : 0x0);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3RayAabb2(const b3Vector3& rayFrom,
			
 
				+								  const b3Vector3& rayInvDirection,
			
 
				+								  const unsigned int raySign[3],
			
 
				+								  const b3Vector3 bounds[2],
			
 
				+								  b3Scalar& tmin,
			
 
				+								  b3Scalar lambda_min,
			
 
				+								  b3Scalar lambda_max)
			
 
				+{
			
 
				+	b3Scalar tmax, tymin, tymax, tzmin, tzmax;
			
 
				+	tmin = (bounds[raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
			
 
				+	tmax = (bounds[1-raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
			
 
				+	tymin = (bounds[raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
			
 
				+	tymax = (bounds[1-raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
			
 
				+
			
 
				+	if ( (tmin > tymax) || (tymin > tmax) )
			
 
				+		return false;
			
 
				+
			
 
				+	if (tymin > tmin)
			
 
				+		tmin = tymin;
			
 
				+
			
 
				+	if (tymax < tmax)
			
 
				+		tmax = tymax;
			
 
				+
			
 
				+	tzmin = (bounds[raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
			
 
				+	tzmax = (bounds[1-raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
			
 
				+
			
 
				+	if ( (tmin > tzmax) || (tzmin > tmax) )
			
 
				+		return false;
			
 
				+	if (tzmin > tmin)
			
 
				+		tmin = tzmin;
			
 
				+	if (tzmax < tmax)
			
 
				+		tmax = tzmax;
			
 
				+	return ( (tmin < lambda_max) && (tmax > lambda_min) );
			
 
				+}
			
 
				+
			
 
				+B3_FORCE_INLINE bool b3RayAabb(const b3Vector3& rayFrom, 
			
 
				+								 const b3Vector3& rayTo, 
			
 
				+								 const b3Vector3& aabbMin, 
			
 
				+								 const b3Vector3& aabbMax,
			
 
				+					  b3Scalar& param, b3Vector3& normal) 
			
 
				+{
			
 
				+	b3Vector3 aabbHalfExtent = (aabbMax-aabbMin)* b3Scalar(0.5);
			
 
				+	b3Vector3 aabbCenter = (aabbMax+aabbMin)* b3Scalar(0.5);
			
 
				+	b3Vector3	source = rayFrom - aabbCenter;
			
 
				+	b3Vector3	target = rayTo - aabbCenter;
			
 
				+	int	sourceOutcode = b3Outcode(source,aabbHalfExtent);
			
 
				+	int targetOutcode = b3Outcode(target,aabbHalfExtent);
			
 
				+	if ((sourceOutcode & targetOutcode) == 0x0)
			
 
				+	{
			
 
				+		b3Scalar lambda_enter = b3Scalar(0.0);
			
 
				+		b3Scalar lambda_exit  = param;
			
 
				+		b3Vector3 r = target - source;
			
 
				+		int i;
			
 
				+		b3Scalar	normSign = 1;
			
 
				+		b3Vector3	hitNormal = b3MakeVector3(0,0,0);
			
 
				+		int bit=1;
			
 
				+
			
 
				+		for (int j=0;j<2;j++)
			
 
				+		{
			
 
				+			for (i = 0; i != 3; ++i)
			
 
				+			{
			
 
				+				if (sourceOutcode & bit)
			
 
				+				{
			
 
				+					b3Scalar lambda = (-source[i] - aabbHalfExtent[i]*normSign) / r[i];
			
 
				+					if (lambda_enter <= lambda)
			
 
				+					{
			
 
				+						lambda_enter = lambda;
			
 
				+						hitNormal.setValue(0,0,0);
			
 
				+						hitNormal[i] = normSign;
			
 
				+					}
			
 
				+				}
			
 
				+				else if (targetOutcode & bit) 
			
 
				+				{
			
 
				+					b3Scalar lambda = (-source[i] - aabbHalfExtent[i]*normSign) / r[i];
			
 
				+					b3SetMin(lambda_exit, lambda);
			
 
				+				}
			
 
				+				bit<<=1;
			
 
				+			}
			
 
				+			normSign = b3Scalar(-1.);
			
 
				+		}
			
 
				+		if (lambda_enter <= lambda_exit)
			
 
				+		{
			
 
				+			param = lambda_enter;
			
 
				+			normal = hitNormal;
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void b3TransformAabb(const b3Vector3& halfExtents, b3Scalar margin,const b3Transform& t,b3Vector3& aabbMinOut,b3Vector3& aabbMaxOut)
			
 
				+{
			
 
				+	b3Vector3 halfExtentsWithMargin = halfExtents+b3MakeVector3(margin,margin,margin);
			
 
				+	b3Matrix3x3 abs_b = t.getBasis().absolute();  
			
 
				+	b3Vector3 center = t.getOrigin();
			
 
				+    b3Vector3 extent = halfExtentsWithMargin.dot3( abs_b[0], abs_b[1], abs_b[2] );
			
 
				+	aabbMinOut = center - extent;
			
 
				+	aabbMaxOut = center + extent;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+B3_FORCE_INLINE	void b3TransformAabb(const b3Vector3& localAabbMin,const b3Vector3& localAabbMax, b3Scalar margin,const b3Transform& trans,b3Vector3& aabbMinOut,b3Vector3& aabbMaxOut)
			
 
				+{
			
 
				+		//b3Assert(localAabbMin.getX() <= localAabbMax.getX());
			
 
				+		//b3Assert(localAabbMin.getY() <= localAabbMax.getY());
			
 
				+		//b3Assert(localAabbMin.getZ() <= localAabbMax.getZ());
			
 
				+		b3Vector3 localHalfExtents = b3Scalar(0.5)*(localAabbMax-localAabbMin);
			
 
				+		localHalfExtents+=b3MakeVector3(margin,margin,margin);
			
 
				+
			
 
				+		b3Vector3 localCenter = b3Scalar(0.5)*(localAabbMax+localAabbMin);
			
 
				+		b3Matrix3x3 abs_b = trans.getBasis().absolute();  
			
 
				+		b3Vector3 center = trans(localCenter);
			
 
				+        b3Vector3 extent = localHalfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
			
 
				+		aabbMinOut = center-extent;
			
 
				+		aabbMaxOut = center+extent;
			
 
				+}
			
 
				+
			
 
				+#define B3_USE_BANCHLESS 1
			
 
				+#ifdef B3_USE_BANCHLESS
			
 
				+	//This block replaces the block below and uses no branches, and replaces the 8 bit return with a 32 bit return for improved performance (~3x on XBox 360)
			
 
				+	B3_FORCE_INLINE unsigned b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
			
 
				+	{		
			
 
				+		return static_cast<unsigned int>(b3Select((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
			
 
				+			& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
			
 
				+			& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
			
 
				+			1, 0));
			
 
				+	}
			
 
				+#else
			
 
				+	B3_FORCE_INLINE bool b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
			
 
				+	{
			
 
				+		bool overlap = true;
			
 
				+		overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;
			
 
				+		overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;
			
 
				+		overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;
			
 
				+		return overlap;
			
 
				+	}
			
 
				+#endif //B3_USE_BANCHLESS
			
 
				+
			
 
				+#endif //B3_AABB_UTIL2
			
 
				+
			
 
				+
			
--- a/include/Bullet3Geometry/b3ConvexHullComputer.h
+++ b/include/Bullet3Geometry/b3ConvexHullComputer.h
@@ -0,0 +1,103 @@
 
				+/*
			
 
				+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONVEX_HULL_COMPUTER_H
			
 
				+#define B3_CONVEX_HULL_COMPUTER_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+/// Convex hull implementation based on Preparata and Hong
			
 
				+/// See http://code.google.com/p/bullet/issues/detail?id=275
			
 
				+/// Ole Kniemeyer, MAXON Computer GmbH
			
 
				+class b3ConvexHullComputer
			
 
				+{
			
 
				+	private:
			
 
				+		b3Scalar compute(const void* coords, bool doubleCoords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp);
			
 
				+
			
 
				+	public:
			
 
				+
			
 
				+		class Edge
			
 
				+		{
			
 
				+			private:
			
 
				+				int next;
			
 
				+				int reverse;
			
 
				+				int targetVertex;
			
 
				+
			
 
				+				friend class b3ConvexHullComputer;
			
 
				+
			
 
				+			public:
			
 
				+				int getSourceVertex() const
			
 
				+				{
			
 
				+					return (this + reverse)->targetVertex;
			
 
				+				}
			
 
				+
			
 
				+				int getTargetVertex() const
			
 
				+				{
			
 
				+					return targetVertex;
			
 
				+				}
			
 
				+
			
 
				+				const Edge* getNextEdgeOfVertex() const // clockwise list of all edges of a vertex
			
 
				+				{
			
 
				+					return this + next;
			
 
				+				}
			
 
				+
			
 
				+				const Edge* getNextEdgeOfFace() const // counter-clockwise list of all edges of a face
			
 
				+				{
			
 
				+					return (this + reverse)->getNextEdgeOfVertex();
			
 
				+				}
			
 
				+
			
 
				+				const Edge* getReverseEdge() const
			
 
				+				{
			
 
				+					return this + reverse;
			
 
				+				}
			
 
				+		};
			
 
				+
			
 
				+
			
 
				+		// Vertices of the output hull
			
 
				+		b3AlignedObjectArray<b3Vector3> vertices;
			
 
				+
			
 
				+		// Edges of the output hull
			
 
				+		b3AlignedObjectArray<Edge> edges;
			
 
				+
			
 
				+		// Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons
			
 
				+		b3AlignedObjectArray<int> faces;
			
 
				+
			
 
				+		/*
			
 
				+		Compute convex hull of "count" vertices stored in "coords". "stride" is the difference in bytes
			
 
				+		between the addresses of consecutive vertices. If "shrink" is positive, the convex hull is shrunken
			
 
				+		by that amount (each face is moved by "shrink" length units towards the center along its normal).
			
 
				+		If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius"
			
 
				+		is the minimum distance of a face to the center of the convex hull.
			
 
				+
			
 
				+		The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large
			
 
				+		that the resulting convex hull is empty.
			
 
				+
			
 
				+		The output convex hull can be found in the member variables "vertices", "edges", "faces".
			
 
				+		*/
			
 
				+		b3Scalar compute(const float* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
			
 
				+		{
			
 
				+			return compute(coords, false, stride, count, shrink, shrinkClamp);
			
 
				+		}
			
 
				+
			
 
				+		// same as above, but double precision
			
 
				+		b3Scalar compute(const double* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
			
 
				+		{
			
 
				+			return compute(coords, true, stride, count, shrink, shrinkClamp);
			
 
				+		}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_CONVEX_HULL_COMPUTER_H
			
 
				+
			
--- a/include/Bullet3Geometry/b3GeometryUtil.h
+++ b/include/Bullet3Geometry/b3GeometryUtil.h
@@ -0,0 +1,42 @@
 
				+/*
			
 
				+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_GEOMETRY_UTIL_H
			
 
				+#define B3_GEOMETRY_UTIL_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+///The b3GeometryUtil helper class provides a few methods to convert between plane equations and vertices.
			
 
				+class b3GeometryUtil
			
 
				+{
			
 
				+	public:
			
 
				+	
			
 
				+	
			
 
				+		static void	getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut );
			
 
				+
			
 
				+		static void	getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut );
			
 
				+	
			
 
				+		static bool	isInside(const b3AlignedObjectArray<b3Vector3>& vertices, const b3Vector3& planeNormal, b3Scalar	margin);
			
 
				+		
			
 
				+		static bool	isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar	margin);
			
 
				+
			
 
				+		static bool	areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar	margin);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_GEOMETRY_UTIL_H
			
 
				+
			
--- a/include/Bullet3Geometry/b3GrahamScan2dConvexHull.h
+++ b/include/Bullet3Geometry/b3GrahamScan2dConvexHull.h
@@ -0,0 +1,117 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+
			
 
				+#ifndef B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
			
 
				+#define B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+struct b3GrahamVector3 : public b3Vector3
			
 
				+{
			
 
				+	b3GrahamVector3(const b3Vector3& org, int orgIndex)
			
 
				+		:b3Vector3(org),
			
 
				+			m_orgIndex(orgIndex)
			
 
				+	{
			
 
				+	}
			
 
				+	b3Scalar	m_angle;
			
 
				+	int m_orgIndex;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+struct b3AngleCompareFunc {
			
 
				+	b3Vector3 m_anchor;
			
 
				+	b3AngleCompareFunc(const b3Vector3& anchor)
			
 
				+	: m_anchor(anchor) 
			
 
				+	{
			
 
				+	}
			
 
				+	bool operator()(const b3GrahamVector3& a, const b3GrahamVector3& b) const {
			
 
				+		if (a.m_angle != b.m_angle)
			
 
				+			return a.m_angle < b.m_angle;
			
 
				+		else
			
 
				+		{
			
 
				+			b3Scalar al = (a-m_anchor).length2();
			
 
				+			b3Scalar bl = (b-m_anchor).length2();
			
 
				+			if (al != bl)
			
 
				+				return  al < bl;
			
 
				+			else
			
 
				+			{
			
 
				+				return a.m_orgIndex < b.m_orgIndex;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+inline void b3GrahamScanConvexHull2D(b3AlignedObjectArray<b3GrahamVector3>& originalPoints, b3AlignedObjectArray<b3GrahamVector3>& hull, const b3Vector3& normalAxis)
			
 
				+{
			
 
				+	b3Vector3 axis0,axis1;
			
 
				+	b3PlaneSpace1(normalAxis,axis0,axis1);
			
 
				+	
			
 
				+
			
 
				+	if (originalPoints.size()<=1)
			
 
				+	{
			
 
				+		for (int i=0;i<originalPoints.size();i++)
			
 
				+			hull.push_back(originalPoints[0]);
			
 
				+		return;
			
 
				+	}
			
 
				+	//step1 : find anchor point with smallest projection on axis0 and move it to first location
			
 
				+	for (int i=0;i<originalPoints.size();i++)
			
 
				+	{
			
 
				+//		const b3Vector3& left = originalPoints[i];
			
 
				+//		const b3Vector3& right = originalPoints[0];
			
 
				+		b3Scalar projL = originalPoints[i].dot(axis0);
			
 
				+		b3Scalar projR = originalPoints[0].dot(axis0);
			
 
				+		if (projL < projR)
			
 
				+		{
			
 
				+			originalPoints.swap(0,i);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	//also precompute angles
			
 
				+	originalPoints[0].m_angle = -1e30f;
			
 
				+	for (int i=1;i<originalPoints.size();i++)
			
 
				+	{
			
 
				+		b3Vector3 xvec = axis0;
			
 
				+		b3Vector3 ar = originalPoints[i]-originalPoints[0];
			
 
				+		originalPoints[i].m_angle = b3Cross(xvec, ar).dot(normalAxis) / ar.length();
			
 
				+	}
			
 
				+
			
 
				+	//step 2: sort all points, based on 'angle' with this anchor
			
 
				+	b3AngleCompareFunc comp(originalPoints[0]);
			
 
				+	originalPoints.quickSortInternal(comp,1,originalPoints.size()-1);
			
 
				+
			
 
				+	int i;
			
 
				+	for (i = 0; i<2; i++) 
			
 
				+		hull.push_back(originalPoints[i]);
			
 
				+
			
 
				+	//step 3: keep all 'convex' points and discard concave points (using back tracking)
			
 
				+	for (; i != originalPoints.size(); i++) 
			
 
				+	{
			
 
				+		bool isConvex = false;
			
 
				+		while (!isConvex&& hull.size()>1) {
			
 
				+			b3Vector3& a = hull[hull.size()-2];
			
 
				+			b3Vector3& b = hull[hull.size()-1];
			
 
				+			isConvex = b3Cross(a-b,a-originalPoints[i]).dot(normalAxis)> 0;
			
 
				+			if (!isConvex)
			
 
				+				hull.pop_back();
			
 
				+			else 
			
 
				+				hull.push_back(originalPoints[i]);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#endif //B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h
@@ -0,0 +1,44 @@
 
				+
			
 
				+#ifndef B3_GPU_BROADPHASE_INTERFACE_H
			
 
				+#define B3_GPU_BROADPHASE_INTERFACE_H
			
 
				+
			
 
				+#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "b3SapAabb.h"
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
			
 
				+
			
 
				+class b3GpuBroadphaseInterface
			
 
				+{
			
 
				+public:
			
 
				+
			
 
				+	typedef class b3GpuBroadphaseInterface* (CreateFunc)(cl_context ctx,cl_device_id device, cl_command_queue  q);
			
 
				+
			
 
				+	virtual ~b3GpuBroadphaseInterface()
			
 
				+	{
			
 
				+	}
			
 
				+
			
 
				+	virtual void createProxy(const b3Vector3& aabbMin,  const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)=0;
			
 
				+	virtual void createLargeProxy(const b3Vector3& aabbMin,  const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)=0;
			
 
				+
			
 
				+	virtual void  calculateOverlappingPairs(int maxPairs)=0;
			
 
				+	virtual void  calculateOverlappingPairsHost(int maxPairs)=0;
			
 
				+
			
 
				+	//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+	virtual void writeAabbsToGpu()=0;
			
 
				+
			
 
				+	virtual cl_mem	getAabbBufferWS()=0;
			
 
				+	virtual int	getNumOverlap()=0;
			
 
				+	virtual cl_mem	getOverlappingPairBuffer()=0;
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3SapAabb>&	getAllAabbsGPU()=0;
			
 
				+	virtual b3AlignedObjectArray<b3SapAabb>&	getAllAabbsCPU()=0;
			
 
				+	
			
 
				+	virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() = 0;
			
 
				+	virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() = 0;
			
 
				+	virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() = 0;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_GPU_BROADPHASE_INTERFACE_H
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h
@@ -0,0 +1,88 @@
 
				+#ifndef B3_GPU_GRID_BROADPHASE_H
			
 
				+#define B3_GPU_GRID_BROADPHASE_H
			
 
				+
			
 
				+#include "b3GpuBroadphaseInterface.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
			
 
				+
			
 
				+struct b3ParamsGridBroadphaseCL
			
 
				+{
			
 
				+
			
 
				+	float m_invCellSize[4];
			
 
				+	int   m_gridSize[4];
			
 
				+
			
 
				+	int	getMaxBodiesPerCell() const
			
 
				+	{
			
 
				+		return m_gridSize[3];
			
 
				+	}
			
 
				+
			
 
				+	void setMaxBodiesPerCell(int maxOverlap) 
			
 
				+	{
			
 
				+		m_gridSize[3] = maxOverlap;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+class b3GpuGridBroadphase : public b3GpuBroadphaseInterface
			
 
				+{
			
 
				+protected:
			
 
				+	cl_context				m_context;
			
 
				+	cl_device_id			m_device;
			
 
				+	cl_command_queue		m_queue;
			
 
				+
			
 
				+	b3OpenCLArray<b3SapAabb>	m_allAabbsGPU1;
			
 
				+	b3AlignedObjectArray<b3SapAabb>	m_allAabbsCPU1;
			
 
				+
			
 
				+	b3OpenCLArray<int>	m_smallAabbsMappingGPU;
			
 
				+	b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
			
 
				+
			
 
				+	b3OpenCLArray<int>	m_largeAabbsMappingGPU;
			
 
				+	b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
			
 
				+
			
 
				+	b3AlignedObjectArray<b3Int4> m_hostPairs;
			
 
				+	b3OpenCLArray<b3Int4>			m_gpuPairs;
			
 
				+
			
 
				+	b3OpenCLArray<b3SortData>			m_hashGpu;
			
 
				+	b3OpenCLArray<int>			m_cellStartGpu;
			
 
				+	
			
 
				+
			
 
				+	b3ParamsGridBroadphaseCL		m_paramsCPU;
			
 
				+	b3OpenCLArray<b3ParamsGridBroadphaseCL>		m_paramsGPU;
			
 
				+
			
 
				+	class b3RadixSort32CL*			m_sorter;
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q );
			
 
				+	virtual ~b3GpuGridBroadphase();
			
 
				+
			
 
				+	static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx,cl_device_id device, cl_command_queue  q)
			
 
				+	{
			
 
				+		return new b3GpuGridBroadphase(ctx,device,q);
			
 
				+	}
			
 
				+
			
 
				+	
			
 
				+
			
 
				+
			
 
				+	virtual void createProxy(const b3Vector3& aabbMin,  const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
			
 
				+	virtual void createLargeProxy(const b3Vector3& aabbMin,  const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
			
 
				+
			
 
				+	virtual void  calculateOverlappingPairs(int maxPairs);
			
 
				+	virtual void  calculateOverlappingPairsHost(int maxPairs);
			
 
				+
			
 
				+	//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+	virtual void writeAabbsToGpu();
			
 
				+
			
 
				+	virtual cl_mem	getAabbBufferWS();
			
 
				+	virtual int	getNumOverlap();
			
 
				+	virtual cl_mem	getOverlappingPairBuffer();
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3SapAabb>&	getAllAabbsGPU();
			
 
				+	virtual b3AlignedObjectArray<b3SapAabb>&	getAllAabbsCPU();
			
 
				+	
			
 
				+	virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
			
 
				+	virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
			
 
				+	virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_GPU_GRID_BROADPHASE_H
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
@@ -0,0 +1,125 @@
 
				+/*
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Initial Author Jackson Lee, 2014
			
 
				+
			
 
				+#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H
			
 
				+#define B3_GPU_PARALLEL_LINEAR_BVH_H
			
 
				+
			
 
				+//#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
			
 
				+
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
			
 
				+
			
 
				+#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h"
			
 
				+
			
 
				+#define b3Int64 cl_long
			
 
				+
			
 
				+///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame
			
 
				+///@remarks
			
 
				+///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details.
			
 
				+///@par
			
 
				+///Related papers: \n
			
 
				+///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n
			
 
				+///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n
			
 
				+///@par
			
 
				+///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages:
			
 
				+/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid) 
			
 
				+/// - [fully parallel] Sort morton codes
			
 
				+/// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH) 
			
 
				+/// - [somewhat parallel] Set internal node AABBs 
			
 
				+///@par
			
 
				+///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages.
			
 
				+///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree.
			
 
				+///Instead of searching for the child nodes of each internal node, we search for the parent node of each node.
			
 
				+///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs.
			
 
				+class b3GpuParallelLinearBvh
			
 
				+{
			
 
				+	cl_command_queue m_queue;
			
 
				+	
			
 
				+	cl_program m_parallelLinearBvhProgram;
			
 
				+	
			
 
				+	cl_kernel m_separateAabbsKernel;
			
 
				+	cl_kernel m_findAllNodesMergedAabbKernel;
			
 
				+	cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
			
 
				+	
			
 
				+	//Binary radix tree construction kernels
			
 
				+	cl_kernel m_computeAdjacentPairCommonPrefixKernel;
			
 
				+	cl_kernel m_buildBinaryRadixTreeLeafNodesKernel;
			
 
				+	cl_kernel m_buildBinaryRadixTreeInternalNodesKernel;
			
 
				+	cl_kernel m_findDistanceFromRootKernel;
			
 
				+	cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel;
			
 
				+	
			
 
				+	cl_kernel m_findLeafIndexRangesKernel;
			
 
				+	
			
 
				+	//Traversal kernels
			
 
				+	cl_kernel m_plbvhCalculateOverlappingPairsKernel;
			
 
				+	cl_kernel m_plbvhRayTraverseKernel;
			
 
				+	cl_kernel m_plbvhLargeAabbAabbTestKernel;
			
 
				+	cl_kernel m_plbvhLargeAabbRayTestKernel;
			
 
				+	
			
 
				+	b3RadixSort32CL m_radixSorter;
			
 
				+	
			
 
				+	//1 element
			
 
				+	b3OpenCLArray<int> m_rootNodeIndex;							//Most significant bit(0x80000000) is set to indicate internal node
			
 
				+	b3OpenCLArray<int> m_maxDistanceFromRoot;					//Max number of internal nodes between an internal node and the root node
			
 
				+	b3OpenCLArray<int> m_temp;									//Used to hold the number of pairs in calculateOverlappingPairs()
			
 
				+	
			
 
				+	//1 element per internal node (number_of_internal_nodes == number_of_leaves - 1)
			
 
				+	b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
			
 
				+	b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges;		//x == min leaf index, y == max leaf index
			
 
				+	b3OpenCLArray<b3Int2> m_internalNodeChildNodes;				//x == left child, y == right child; msb(0x80000000) is set to indicate internal node
			
 
				+	b3OpenCLArray<int> m_internalNodeParentNodes;				//For parent node index, msb(0x80000000) is not set since it is always internal
			
 
				+	
			
 
				+	//1 element per internal node; for binary radix tree construction
			
 
				+	b3OpenCLArray<b3Int64> m_commonPrefixes;
			
 
				+	b3OpenCLArray<int> m_commonPrefixLengths;
			
 
				+	b3OpenCLArray<int> m_distanceFromRoot;						//Number of internal nodes between this node and the root
			
 
				+	
			
 
				+	//1 element per leaf node (leaf nodes only include small AABBs)
			
 
				+	b3OpenCLArray<int> m_leafNodeParentNodes;					//For parent node index, msb(0x80000000) is not set since it is always internal
			
 
				+	b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies;		//m_key == morton code, m_value == aabb index in m_leafNodeAabbs
			
 
				+	b3OpenCLArray<b3SapAabb> m_mergedAabb;						//m_mergedAabb[0] contains the merged AABB of all leaf nodes
			
 
				+	b3OpenCLArray<b3SapAabb> m_leafNodeAabbs;					//Contains only small AABBs
			
 
				+	
			
 
				+	//1 element per large AABB, which is not stored in the BVH
			
 
				+	b3OpenCLArray<b3SapAabb> m_largeAabbs;
			
 
				+	
			
 
				+public:
			
 
				+	b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
			
 
				+	virtual ~b3GpuParallelLinearBvh();
			
 
				+	
			
 
				+	///Must be called before any other function
			
 
				+	void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices, 
			
 
				+				const b3OpenCLArray<int>& largeAabbIndices);
			
 
				+	
			
 
				+	///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
			
 
				+	///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs.
			
 
				+	///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized.
			
 
				+	void calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs);
			
 
				+	
			
 
				+	///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections;
			
 
				+	///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough.
			
 
				+	///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index.
			
 
				+	///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded.
			
 
				+	void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays, 
			
 
				+								b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs);
			
 
				+								
			
 
				+private:
			
 
				+	void constructBinaryRadixTree();
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h
@@ -0,0 +1,66 @@
 
				+/*
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+//Initial Author Jackson Lee, 2014
			
 
				+
			
 
				+#ifndef B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H
			
 
				+#define B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H
			
 
				+
			
 
				+#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
			
 
				+
			
 
				+#include "b3GpuParallelLinearBvh.h"
			
 
				+
			
 
				+class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface
			
 
				+{
			
 
				+	b3GpuParallelLinearBvh m_plbvh;
			
 
				+	
			
 
				+	b3OpenCLArray<b3Int4> m_overlappingPairsGpu;
			
 
				+	
			
 
				+	b3OpenCLArray<b3SapAabb> m_aabbsGpu;
			
 
				+	b3OpenCLArray<int> m_smallAabbsMappingGpu;
			
 
				+	b3OpenCLArray<int> m_largeAabbsMappingGpu;
			
 
				+	
			
 
				+	b3AlignedObjectArray<b3SapAabb> m_aabbsCpu;
			
 
				+	b3AlignedObjectArray<int> m_smallAabbsMappingCpu;
			
 
				+	b3AlignedObjectArray<int> m_largeAabbsMappingCpu;
			
 
				+	
			
 
				+public:
			
 
				+	b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue);
			
 
				+	virtual ~b3GpuParallelLinearBvhBroadphase() {}
			
 
				+
			
 
				+	virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask);
			
 
				+	virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask);
			
 
				+	
			
 
				+	virtual void calculateOverlappingPairs(int maxPairs);
			
 
				+	virtual void calculateOverlappingPairsHost(int maxPairs);
			
 
				+
			
 
				+	//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+	virtual void writeAabbsToGpu();
			
 
				+	
			
 
				+	virtual int	getNumOverlap() { return m_overlappingPairsGpu.size(); }
			
 
				+	virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); }
			
 
				+
			
 
				+	virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); }
			
 
				+	virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; }
			
 
				+	
			
 
				+	virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() { return m_overlappingPairsGpu; }
			
 
				+	virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; }
			
 
				+	virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; }
			
 
				+	
			
 
				+	virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() { return m_aabbsCpu; }
			
 
				+	
			
 
				+	static b3GpuBroadphaseInterface* CreateFunc(cl_context context, cl_device_id device, cl_command_queue queue)
			
 
				+	{
			
 
				+		return new b3GpuParallelLinearBvhBroadphase(context, device, queue);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h
@@ -0,0 +1,151 @@
 
				+#ifndef B3_GPU_SAP_BROADPHASE_H
			
 
				+#define B3_GPU_SAP_BROADPHASE_H
			
 
				+
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
			
 
				+class b3Vector3;
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
			
 
				+
			
 
				+#include "b3SapAabb.h"
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+
			
 
				+#include "b3GpuBroadphaseInterface.h"
			
 
				+
			
 
				+
			
 
				+class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
			
 
				+{
			
 
				+	
			
 
				+	cl_context				m_context;
			
 
				+	cl_device_id			m_device;
			
 
				+	cl_command_queue		m_queue;
			
 
				+	cl_kernel				m_flipFloatKernel;
			
 
				+	cl_kernel				m_scatterKernel ;
			
 
				+	cl_kernel				m_copyAabbsKernel;
			
 
				+	cl_kernel				m_sapKernel;
			
 
				+	cl_kernel				m_sap2Kernel;
			
 
				+	cl_kernel				m_prepareSumVarianceKernel;
			
 
				+	
			
 
				+
			
 
				+	class b3RadixSort32CL* m_sorter;
			
 
				+
			
 
				+	///test for 3d SAP
			
 
				+	b3AlignedObjectArray<b3SortData>		m_sortedAxisCPU[3][2];
			
 
				+	b3AlignedObjectArray<b3UnsignedInt2>	m_objectMinMaxIndexCPU[3][2];
			
 
				+	b3OpenCLArray<b3UnsignedInt2>			m_objectMinMaxIndexGPUaxis0;
			
 
				+	b3OpenCLArray<b3UnsignedInt2>			m_objectMinMaxIndexGPUaxis1;
			
 
				+	b3OpenCLArray<b3UnsignedInt2>			m_objectMinMaxIndexGPUaxis2;
			
 
				+	b3OpenCLArray<b3UnsignedInt2>			m_objectMinMaxIndexGPUaxis0prev;
			
 
				+	b3OpenCLArray<b3UnsignedInt2>			m_objectMinMaxIndexGPUaxis1prev;
			
 
				+	b3OpenCLArray<b3UnsignedInt2>			m_objectMinMaxIndexGPUaxis2prev;
			
 
				+
			
 
				+	b3OpenCLArray<b3SortData>				m_sortedAxisGPU0;
			
 
				+	b3OpenCLArray<b3SortData>				m_sortedAxisGPU1;
			
 
				+	b3OpenCLArray<b3SortData>				m_sortedAxisGPU2;
			
 
				+	b3OpenCLArray<b3SortData>				m_sortedAxisGPU0prev;
			
 
				+	b3OpenCLArray<b3SortData>				m_sortedAxisGPU1prev;
			
 
				+	b3OpenCLArray<b3SortData>				m_sortedAxisGPU2prev;
			
 
				+
			
 
				+
			
 
				+	b3OpenCLArray<b3Int4>					m_addedHostPairsGPU;
			
 
				+	b3OpenCLArray<b3Int4>					m_removedHostPairsGPU;
			
 
				+	b3OpenCLArray<int>						m_addedCountGPU;
			
 
				+	b3OpenCLArray<int>						m_removedCountGPU;
			
 
				+	
			
 
				+	int	m_currentBuffer;
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	b3OpenCLArray<int> m_pairCount;
			
 
				+
			
 
				+
			
 
				+	b3OpenCLArray<b3SapAabb>	m_allAabbsGPU;
			
 
				+	b3AlignedObjectArray<b3SapAabb>	m_allAabbsCPU;
			
 
				+
			
 
				+	virtual b3OpenCLArray<b3SapAabb>&	getAllAabbsGPU()
			
 
				+	{
			
 
				+		return m_allAabbsGPU;
			
 
				+	}
			
 
				+	virtual b3AlignedObjectArray<b3SapAabb>&	getAllAabbsCPU()
			
 
				+	{
			
 
				+		return m_allAabbsCPU;
			
 
				+	}
			
 
				+
			
 
				+	b3OpenCLArray<b3Vector3>	m_sum;
			
 
				+	b3OpenCLArray<b3Vector3>	m_sum2;
			
 
				+	b3OpenCLArray<b3Vector3>	m_dst;
			
 
				+
			
 
				+	b3OpenCLArray<int>	m_smallAabbsMappingGPU;
			
 
				+	b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
			
 
				+
			
 
				+	b3OpenCLArray<int>	m_largeAabbsMappingGPU;
			
 
				+	b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
			
 
				+
			
 
				+	
			
 
				+	b3OpenCLArray<b3Int4>		m_overlappingPairs;
			
 
				+
			
 
				+	//temporary gpu work memory
			
 
				+	b3OpenCLArray<b3SortData>	m_gpuSmallSortData;
			
 
				+	b3OpenCLArray<b3SapAabb>	m_gpuSmallSortedAabbs;
			
 
				+
			
 
				+	class b3PrefixScanFloat4CL*		m_prefixScanFloat4;
			
 
				+
			
 
				+	enum b3GpuSapKernelType
			
 
				+	{
			
 
				+		B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1,
			
 
				+		B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
			
 
				+		B3_GPU_SAP_KERNEL_ORIGINAL,
			
 
				+		B3_GPU_SAP_KERNEL_BARRIER,
			
 
				+		B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY
			
 
				+	};
			
 
				+
			
 
				+	b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
			
 
				+	virtual ~b3GpuSapBroadphase();
			
 
				+	
			
 
				+	static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue  q)
			
 
				+	{
			
 
				+		return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
			
 
				+	}
			
 
				+
			
 
				+	static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue  q)
			
 
				+	{
			
 
				+		return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
			
 
				+	}
			
 
				+
			
 
				+	static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue  q)
			
 
				+	{
			
 
				+		return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL);
			
 
				+	}
			
 
				+	static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue  q)
			
 
				+	{
			
 
				+		return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER);
			
 
				+	}
			
 
				+	static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue  q)
			
 
				+	{
			
 
				+		return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
			
 
				+	}
			
 
				+	
			
 
				+
			
 
				+	virtual void  calculateOverlappingPairs(int maxPairs);
			
 
				+	virtual void  calculateOverlappingPairsHost(int maxPairs);
			
 
				+	
			
 
				+	void  reset();
			
 
				+
			
 
				+	void init3dSap();
			
 
				+	virtual void calculateOverlappingPairsHostIncremental3Sap();
			
 
				+
			
 
				+	virtual void createProxy(const b3Vector3& aabbMin,  const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
			
 
				+	virtual void createLargeProxy(const b3Vector3& aabbMin,  const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
			
 
				+
			
 
				+	//call writeAabbsToGpu after done making all changes (createProxy etc)
			
 
				+	virtual void writeAabbsToGpu();
			
 
				+
			
 
				+	virtual cl_mem	getAabbBufferWS();
			
 
				+	virtual int	getNumOverlap();
			
 
				+	virtual cl_mem	getOverlappingPairBuffer();
			
 
				+	
			
 
				+	virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
			
 
				+	virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
			
 
				+	virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
			
 
				+};
			
 
				+
			
 
				+#endif //B3_GPU_SAP_BROADPHASE_H
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h
@@ -0,0 +1,14 @@
 
				+#ifndef B3_SAP_AABB_H
			
 
				+#define B3_SAP_AABB_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Scalar.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+
			
 
				+///just make sure that the b3Aabb is 16-byte aligned
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3SapAabb : public b3Aabb
			
 
				+{
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_SAP_AABB_H
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h
@@ -0,0 +1,199 @@
 
				+//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
			
 
				+static const char* gridBroadphaseCL= \
			
 
				+"int getPosHash(int4 gridPos, __global float4* pParams)\n"
			
 
				+"{\n"
			
 
				+"	int4 gridDim = *((__global int4*)(pParams + 1));\n"
			
 
				+"	gridPos.x &= gridDim.x - 1;\n"
			
 
				+"	gridPos.y &= gridDim.y - 1;\n"
			
 
				+"	gridPos.z &= gridDim.z - 1;\n"
			
 
				+"	int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n"
			
 
				+"	return hash;\n"
			
 
				+"} \n"
			
 
				+"int4 getGridPos(float4 worldPos, __global float4* pParams)\n"
			
 
				+"{\n"
			
 
				+"    int4 gridPos;\n"
			
 
				+"	int4 gridDim = *((__global int4*)(pParams + 1));\n"
			
 
				+"    gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n"
			
 
				+"    gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n"
			
 
				+"    gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n"
			
 
				+"    return gridPos;\n"
			
 
				+"}\n"
			
 
				+"// calculate grid hash value for each body using its AABB\n"
			
 
				+"__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n"
			
 
				+"{\n"
			
 
				+"    int index = get_global_id(0);\n"
			
 
				+"    if(index >= numObjects)\n"
			
 
				+"	{\n"
			
 
				+"		return;\n"
			
 
				+"	}\n"
			
 
				+"	float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n"
			
 
				+"	float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n"
			
 
				+"	float4 pos;\n"
			
 
				+"	pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
			
 
				+"	pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
			
 
				+"	pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
			
 
				+"	pos.w = 0.f;\n"
			
 
				+"    // get address in grid\n"
			
 
				+"    int4 gridPos = getGridPos(pos, pParams);\n"
			
 
				+"    int gridHash = getPosHash(gridPos, pParams);\n"
			
 
				+"    // store grid hash and body index\n"
			
 
				+"    int2 hashVal;\n"
			
 
				+"    hashVal.x = gridHash;\n"
			
 
				+"    hashVal.y = index;\n"
			
 
				+"    pHash[index] = hashVal;\n"
			
 
				+"}\n"
			
 
				+"__kernel void kClearCellStart(	int numCells, \n"
			
 
				+"								__global int* pCellStart )\n"
			
 
				+"{\n"
			
 
				+"    int index = get_global_id(0);\n"
			
 
				+"    if(index >= numCells)\n"
			
 
				+"	{\n"
			
 
				+"		return;\n"
			
 
				+"	}\n"
			
 
				+"	pCellStart[index] = -1;\n"
			
 
				+"}\n"
			
 
				+"__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n"
			
 
				+"{\n"
			
 
				+"	__local int sharedHash[513];\n"
			
 
				+"    int index = get_global_id(0);\n"
			
 
				+"	int2 sortedData;\n"
			
 
				+"    if(index < numObjects)\n"
			
 
				+"	{\n"
			
 
				+"		sortedData = pHash[index];\n"
			
 
				+"		// Load hash data into shared memory so that we can look \n"
			
 
				+"		// at neighboring body's hash value without loading\n"
			
 
				+"		// two hash values per thread\n"
			
 
				+"		sharedHash[get_local_id(0) + 1] = sortedData.x;\n"
			
 
				+"		if((index > 0) && (get_local_id(0) == 0))\n"
			
 
				+"		{\n"
			
 
				+"			// first thread in block must load neighbor body hash\n"
			
 
				+"			sharedHash[0] = pHash[index-1].x;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"    barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"    if(index < numObjects)\n"
			
 
				+"	{\n"
			
 
				+"		if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n"
			
 
				+"		{\n"
			
 
				+"			cellStart[sortedData.x] = index;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n"
			
 
				+"{\n"
			
 
				+"	return	(min0.x <= max1.x)&& (min1.x <= max0.x) && \n"
			
 
				+"			(min0.y <= max1.y)&& (min1.y <= max0.y) && \n"
			
 
				+"			(min0.z <= max1.z)&& (min1.z <= max0.z); \n"
			
 
				+"}\n"
			
 
				+"//search for AABB 'index' against other AABBs' in this cell\n"
			
 
				+"void findPairsInCell(	int numObjects,\n"
			
 
				+"						int4	gridPos,\n"
			
 
				+"						int    index,\n"
			
 
				+"						__global int2*  pHash,\n"
			
 
				+"						__global int*   pCellStart,\n"
			
 
				+"						__global float4* allpAABB, \n"
			
 
				+"						__global const int* smallAabbMapping,\n"
			
 
				+"						__global float4* pParams,\n"
			
 
				+"							volatile  __global int* pairCount,\n"
			
 
				+"						__global int4*   pPairBuff2,\n"
			
 
				+"						int maxPairs\n"
			
 
				+"						)\n"
			
 
				+"{\n"
			
 
				+"	int4 pGridDim = *((__global int4*)(pParams + 1));\n"
			
 
				+"	int maxBodiesPerCell = pGridDim.w;\n"
			
 
				+"    int gridHash = getPosHash(gridPos, pParams);\n"
			
 
				+"    // get start of bucket for this cell\n"
			
 
				+"    int bucketStart = pCellStart[gridHash];\n"
			
 
				+"    if (bucketStart == -1)\n"
			
 
				+"	{\n"
			
 
				+"        return;   // cell empty\n"
			
 
				+"	}\n"
			
 
				+"	// iterate over bodies in this cell\n"
			
 
				+"    int2 sortedData = pHash[index];\n"
			
 
				+"	int unsorted_indx = sortedData.y;\n"
			
 
				+"    float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n"
			
 
				+"	float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
			
 
				+"	int handleIndex =  as_int(min0.w);\n"
			
 
				+"	\n"
			
 
				+"	int bucketEnd = bucketStart + maxBodiesPerCell;\n"
			
 
				+"	bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n"
			
 
				+"	for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n"
			
 
				+"	{\n"
			
 
				+"        int2 cellData = pHash[index2];\n"
			
 
				+"        if (cellData.x != gridHash)\n"
			
 
				+"        {\n"
			
 
				+"			break;   // no longer in same bucket\n"
			
 
				+"		}\n"
			
 
				+"		int unsorted_indx2 = cellData.y;\n"
			
 
				+"        //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n"
			
 
				+"		if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n"
			
 
				+"        {   \n"
			
 
				+"			float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n"
			
 
				+"			float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n"
			
 
				+"			if(testAABBOverlap(min0, max0, min1, max1))\n"
			
 
				+"			{\n"
			
 
				+"				if (pairCount)\n"
			
 
				+"				{\n"
			
 
				+"					int handleIndex2 = as_int(min1.w);\n"
			
 
				+"					if (handleIndex<handleIndex2)\n"
			
 
				+"					{\n"
			
 
				+"						int curPair = atomic_add(pairCount,1);\n"
			
 
				+"						if (curPair<maxPairs)\n"
			
 
				+"						{\n"
			
 
				+"							int4 newpair;\n"
			
 
				+"							newpair.x = handleIndex;\n"
			
 
				+"							newpair.y = handleIndex2;\n"
			
 
				+"							newpair.z = -1;\n"
			
 
				+"							newpair.w = -1;\n"
			
 
				+"							pPairBuff2[curPair] = newpair;\n"
			
 
				+"						}\n"
			
 
				+"					}\n"
			
 
				+"				\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void kFindOverlappingPairs(	int numObjects,\n"
			
 
				+"										__global float4* allpAABB, \n"
			
 
				+"										__global const int* smallAabbMapping,\n"
			
 
				+"										__global int2* pHash, \n"
			
 
				+"										__global int* pCellStart, \n"
			
 
				+"										__global float4* pParams ,\n"
			
 
				+"										volatile  __global int* pairCount,\n"
			
 
				+"										__global int4*   pPairBuff2,\n"
			
 
				+"										int maxPairs\n"
			
 
				+"										)\n"
			
 
				+"{\n"
			
 
				+"    int index = get_global_id(0);\n"
			
 
				+"    if(index >= numObjects)\n"
			
 
				+"	{\n"
			
 
				+"		return;\n"
			
 
				+"	}\n"
			
 
				+"    int2 sortedData = pHash[index];\n"
			
 
				+"	int unsorted_indx = sortedData.y;\n"
			
 
				+"	float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n"
			
 
				+"	float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
			
 
				+"	float4 pos;\n"
			
 
				+"	pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
			
 
				+"	pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
			
 
				+"	pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
			
 
				+"    // get address in grid\n"
			
 
				+"    int4 gridPosA = getGridPos(pos, pParams);\n"
			
 
				+"    int4 gridPosB; \n"
			
 
				+"    // examine only neighbouring cells\n"
			
 
				+"    for(int z=-1; z<=1; z++) \n"
			
 
				+"    {\n"
			
 
				+"		gridPosB.z = gridPosA.z + z;\n"
			
 
				+"        for(int y=-1; y<=1; y++) \n"
			
 
				+"        {\n"
			
 
				+"			gridPosB.y = gridPosA.y + y;\n"
			
 
				+"            for(int x=-1; x<=1; x++) \n"
			
 
				+"            {\n"
			
 
				+"				gridPosB.x = gridPosA.x + x;\n"
			
 
				+"                findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n"
			
 
				+"            }\n"
			
 
				+"        }\n"
			
 
				+"    }\n"
			
 
				+"}\n"
			
 
				+;
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
@@ -0,0 +1,729 @@
 
				+//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
			
 
				+static const char* parallelLinearBvhCL= \
			
 
				+"/*\n"
			
 
				+"This software is provided 'as-is', without any express or implied warranty.\n"
			
 
				+"In no event will the authors be held liable for any damages arising from the use of this software.\n"
			
 
				+"Permission is granted to anyone to use this software for any purpose,\n"
			
 
				+"including commercial applications, and to alter it and redistribute it freely,\n"
			
 
				+"subject to the following restrictions:\n"
			
 
				+"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
			
 
				+"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
			
 
				+"3. This notice may not be removed or altered from any source distribution.\n"
			
 
				+"*/\n"
			
 
				+"//Initial Author Jackson Lee, 2014\n"
			
 
				+"typedef float b3Scalar;\n"
			
 
				+"typedef float4 b3Vector3;\n"
			
 
				+"#define b3Max max\n"
			
 
				+"#define b3Min min\n"
			
 
				+"#define b3Sqrt sqrt\n"
			
 
				+"typedef struct\n"
			
 
				+"{\n"
			
 
				+"	unsigned int m_key;\n"
			
 
				+"	unsigned int m_value;\n"
			
 
				+"} SortDataCL;\n"
			
 
				+"typedef struct \n"
			
 
				+"{\n"
			
 
				+"	union\n"
			
 
				+"	{\n"
			
 
				+"		float4	m_min;\n"
			
 
				+"		float   m_minElems[4];\n"
			
 
				+"		int			m_minIndices[4];\n"
			
 
				+"	};\n"
			
 
				+"	union\n"
			
 
				+"	{\n"
			
 
				+"		float4	m_max;\n"
			
 
				+"		float   m_maxElems[4];\n"
			
 
				+"		int			m_maxIndices[4];\n"
			
 
				+"	};\n"
			
 
				+"} b3AabbCL;\n"
			
 
				+"unsigned int interleaveBits(unsigned int x)\n"
			
 
				+"{\n"
			
 
				+"	//........ ........ ......12 3456789A	//x\n"
			
 
				+"	//....1..2 ..3..4.. 5..6..7. .8..9..A	//x after interleaving bits\n"
			
 
				+"	\n"
			
 
				+"	//......12 3456789A ......12 3456789A	//x ^ (x << 16)\n"
			
 
				+"	//11111111 ........ ........ 11111111	//0x FF 00 00 FF\n"
			
 
				+"	//......12 ........ ........ 3456789A	//x = (x ^ (x << 16)) & 0xFF0000FF;\n"
			
 
				+"	\n"
			
 
				+"	//......12 ........ 3456789A 3456789A	//x ^ (x <<  8)\n"
			
 
				+"	//......11 ........ 1111.... ....1111	//0x 03 00 F0 0F\n"
			
 
				+"	//......12 ........ 3456.... ....789A	//x = (x ^ (x <<  8)) & 0x0300F00F;\n"
			
 
				+"	\n"
			
 
				+"	//..12..12 ....3456 3456.... 789A789A	//x ^ (x <<  4)\n"
			
 
				+"	//......11 ....11.. ..11.... 11....11	//0x 03 0C 30 C3\n"
			
 
				+"	//......12 ....34.. ..56.... 78....9A	//x = (x ^ (x <<  4)) & 0x030C30C3;\n"
			
 
				+"	\n"
			
 
				+"	//....1212 ..3434.. 5656..78 78..9A9A	//x ^ (x <<  2)\n"
			
 
				+"	//....1..1 ..1..1.. 1..1..1. .1..1..1	//0x 09 24 92 49\n"
			
 
				+"	//....1..2 ..3..4.. 5..6..7. .8..9..A	//x = (x ^ (x <<  2)) & 0x09249249;\n"
			
 
				+"	\n"
			
 
				+"	//........ ........ ......11 11111111	//0x000003FF\n"
			
 
				+"	x &= 0x000003FF;		//Clear all bits above bit 10\n"
			
 
				+"	\n"
			
 
				+"	x = (x ^ (x << 16)) & 0xFF0000FF;\n"
			
 
				+"	x = (x ^ (x <<  8)) & 0x0300F00F;\n"
			
 
				+"	x = (x ^ (x <<  4)) & 0x030C30C3;\n"
			
 
				+"	x = (x ^ (x <<  2)) & 0x09249249;\n"
			
 
				+"	\n"
			
 
				+"	return x;\n"
			
 
				+"}\n"
			
 
				+"unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n"
			
 
				+"{\n"
			
 
				+"	return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
			
 
				+"}\n"
			
 
				+"__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n"
			
 
				+"{\n"
			
 
				+"	int separatedAabbIndex = get_global_id(0);\n"
			
 
				+"	if(separatedAabbIndex >= numAabbsToSeparate) return;\n"
			
 
				+"	int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n"
			
 
				+"	out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n"
			
 
				+"}\n"
			
 
				+"//Should replace with an optimized parallel reduction\n"
			
 
				+"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n"
			
 
				+"{\n"
			
 
				+"	//Each time this kernel is added to the command queue, \n"
			
 
				+"	//the number of AABBs needing to be merged is halved\n"
			
 
				+"	//\n"
			
 
				+"	//Example with 159 AABBs:\n"
			
 
				+"	//	numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n"
			
 
				+"	//	numMergedAabbs == 159 - 80 == 79\n"
			
 
				+"	//So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n"
			
 
				+"	\n"
			
 
				+"	int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n"
			
 
				+"	int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n"
			
 
				+"	\n"
			
 
				+"	int aabbIndex = get_global_id(0);\n"
			
 
				+"	if(aabbIndex >= numMergedAabbs) return;\n"
			
 
				+"	\n"
			
 
				+"	int otherAabbIndex = aabbIndex + numRemainingAabbs;\n"
			
 
				+"	\n"
			
 
				+"	b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
			
 
				+"	b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
			
 
				+"		\n"
			
 
				+"	b3AabbCL mergedAabb;\n"
			
 
				+"	mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
			
 
				+"	mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
			
 
				+"	out_mergedAabb[aabbIndex] = mergedAabb;\n"
			
 
				+"}\n"
			
 
				+"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n"
			
 
				+"												__global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n"
			
 
				+"{\n"
			
 
				+"	int leafNodeIndex = get_global_id(0);	//Leaf node index == AABB index\n"
			
 
				+"	if(leafNodeIndex >= numAabbs) return;\n"
			
 
				+"	\n"
			
 
				+"	b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n"
			
 
				+"	b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n"
			
 
				+"	b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n"
			
 
				+"	\n"
			
 
				+"	b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n"
			
 
				+"	b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n"
			
 
				+"	b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n"
			
 
				+"	\n"
			
 
				+"	//Quantize into integer coordinates\n"
			
 
				+"	//floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n"
			
 
				+"	b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n"
			
 
				+"	\n"
			
 
				+"	int4 discretePosition;\n"
			
 
				+"	discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n"
			
 
				+"	discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n"
			
 
				+"	discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n"
			
 
				+"	\n"
			
 
				+"	//Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n"
			
 
				+"	discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n"
			
 
				+"	discretePosition += 512;\n"
			
 
				+"	\n"
			
 
				+"	//Interleave bits(assign a morton code, also known as a z-curve)\n"
			
 
				+"	unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n"
			
 
				+"	\n"
			
 
				+"	//\n"
			
 
				+"	SortDataCL mortonCodeIndexPair;\n"
			
 
				+"	mortonCodeIndexPair.m_key = mortonCode;\n"
			
 
				+"	mortonCodeIndexPair.m_value = leafNodeIndex;\n"
			
 
				+"	\n"
			
 
				+"	out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n"
			
 
				+"}\n"
			
 
				+"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
			
 
				+"//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n"
			
 
				+"//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n"
			
 
				+"//In both cases, the bit should be cleared to access the actual node index.\n"
			
 
				+"int isLeafNode(int index) { return (index >> 31 == 0); }\n"
			
 
				+"int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n"
			
 
				+"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n"
			
 
				+"//From sap.cl\n"
			
 
				+"#define NEW_PAIR_MARKER -1\n"
			
 
				+"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n"
			
 
				+"{\n"
			
 
				+"	bool overlap = true;\n"
			
 
				+"	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
			
 
				+"	return overlap;\n"
			
 
				+"}\n"
			
 
				+"//From sap.cl\n"
			
 
				+"__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n"
			
 
				+"											__global int* rootNodeIndex, \n"
			
 
				+"											__global int2* internalNodeChildIndices, \n"
			
 
				+"											__global b3AabbCL* internalNodeAabbs,\n"
			
 
				+"											__global int2* internalNodeLeafIndexRanges,\n"
			
 
				+"											\n"
			
 
				+"											__global SortDataCL* mortonCodesAndAabbIndices,\n"
			
 
				+"											__global int* out_numPairs, __global int4* out_overlappingPairs, \n"
			
 
				+"											int maxPairs, int numQueryAabbs)\n"
			
 
				+"{\n"
			
 
				+"	//Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n"
			
 
				+"	//mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n"
			
 
				+"	int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
			
 
				+"	if(queryBvhNodeIndex >= numQueryAabbs) return;\n"
			
 
				+"	\n"
			
 
				+"	int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n"
			
 
				+"	b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
			
 
				+"	\n"
			
 
				+"	int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
			
 
				+"	\n"
			
 
				+"	int stackSize = 1;\n"
			
 
				+"	stack[0] = *rootNodeIndex;\n"
			
 
				+"	\n"
			
 
				+"	while(stackSize)\n"
			
 
				+"	{\n"
			
 
				+"		int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
			
 
				+"		--stackSize;\n"
			
 
				+"		\n"
			
 
				+"		int isLeaf = isLeafNode(internalOrLeafNodeIndex);	//Internal node if false\n"
			
 
				+"		int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
			
 
				+"		\n"
			
 
				+"		//Optimization - if the BVH is structured as a binary radix tree, then\n"
			
 
				+"		//each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n"
			
 
				+"		//This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n"
			
 
				+"		{\n"
			
 
				+"			int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n"
			
 
				+"			if(highestLeafIndex <= queryBvhNodeIndex) continue;\n"
			
 
				+"		}\n"
			
 
				+"		\n"
			
 
				+"		//bvhRigidIndex is not used if internal node\n"
			
 
				+"		int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
			
 
				+"	\n"
			
 
				+"		b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
			
 
				+"		if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
			
 
				+"		{\n"
			
 
				+"			if(isLeaf)\n"
			
 
				+"			{\n"
			
 
				+"				int4 pair;\n"
			
 
				+"				pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n"
			
 
				+"				pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
			
 
				+"				pair.z = NEW_PAIR_MARKER;\n"
			
 
				+"				pair.w = NEW_PAIR_MARKER;\n"
			
 
				+"				\n"
			
 
				+"				int pairIndex = atomic_inc(out_numPairs);\n"
			
 
				+"				if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
			
 
				+"			}\n"
			
 
				+"			\n"
			
 
				+"			if(!isLeaf)	//Internal node\n"
			
 
				+"			{\n"
			
 
				+"				if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
			
 
				+"				{\n"
			
 
				+"					//Error\n"
			
 
				+"				}\n"
			
 
				+"				else\n"
			
 
				+"				{\n"
			
 
				+"					stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
			
 
				+"					stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"		\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"//From rayCastKernels.cl\n"
			
 
				+"typedef struct\n"
			
 
				+"{\n"
			
 
				+"	float4 m_from;\n"
			
 
				+"	float4 m_to;\n"
			
 
				+"} b3RayInfo;\n"
			
 
				+"//From rayCastKernels.cl\n"
			
 
				+"b3Vector3 b3Vector3_normalize(b3Vector3 v)\n"
			
 
				+"{\n"
			
 
				+"	b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n"
			
 
				+"	return normalize(normal);	//OpenCL normalize == vector4 normalize\n"
			
 
				+"}\n"
			
 
				+"b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n"
			
 
				+"b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n"
			
 
				+"int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n"
			
 
				+"{\n"
			
 
				+"	//AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n"
			
 
				+"	//t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n"
			
 
				+"	//\n"
			
 
				+"	//if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n"
			
 
				+"	//and min.x will be the far plane; otherwise, it is reversed.\n"
			
 
				+"	//\n"
			
 
				+"	//In order for there to be a collision, the t_min and t_max of each pair must overlap.\n"
			
 
				+"	//This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n"
			
 
				+"	\n"
			
 
				+"	int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) );	//isless(x,y) returns (x < y)\n"
			
 
				+"	\n"
			
 
				+"	//When using vector types, the select() function checks the most signficant bit, \n"
			
 
				+"	//but isless() sets the least significant bit.\n"
			
 
				+"	isNegative <<= 31;\n"
			
 
				+"	//select(b, a, condition) == condition ? a : b\n"
			
 
				+"	//When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n"
			
 
				+"	b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n"
			
 
				+"	b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n"
			
 
				+"	\n"
			
 
				+"	b3Scalar t_min_final = 0.0f;\n"
			
 
				+"	b3Scalar t_max_final = rayLength;\n"
			
 
				+"	\n"
			
 
				+"	//Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n"
			
 
				+"	//Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n"
			
 
				+"	//Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n"
			
 
				+"	t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n"
			
 
				+"	t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n"
			
 
				+"	\n"
			
 
				+"	return (t_min_final <= t_max_final);\n"
			
 
				+"}\n"
			
 
				+"__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n"
			
 
				+"								__global int* rootNodeIndex, \n"
			
 
				+"								__global int2* internalNodeChildIndices, \n"
			
 
				+"								__global b3AabbCL* internalNodeAabbs,\n"
			
 
				+"								__global int2* internalNodeLeafIndexRanges,\n"
			
 
				+"								__global SortDataCL* mortonCodesAndAabbIndices,\n"
			
 
				+"								\n"
			
 
				+"								__global b3RayInfo* rays,\n"
			
 
				+"								\n"
			
 
				+"								__global int* out_numRayRigidPairs, \n"
			
 
				+"								__global int2* out_rayRigidPairs,\n"
			
 
				+"								int maxRayRigidPairs, int numRays)\n"
			
 
				+"{\n"
			
 
				+"	int rayIndex = get_global_id(0);\n"
			
 
				+"	if(rayIndex >= numRays) return;\n"
			
 
				+"	\n"
			
 
				+"	//\n"
			
 
				+"	b3Vector3 rayFrom = rays[rayIndex].m_from;\n"
			
 
				+"	b3Vector3 rayTo = rays[rayIndex].m_to;\n"
			
 
				+"	b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n"
			
 
				+"	b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
			
 
				+"	\n"
			
 
				+"	//\n"
			
 
				+"	int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
			
 
				+"	\n"
			
 
				+"	int stackSize = 1;\n"
			
 
				+"	stack[0] = *rootNodeIndex;\n"
			
 
				+"	\n"
			
 
				+"	while(stackSize)\n"
			
 
				+"	{\n"
			
 
				+"		int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
			
 
				+"		--stackSize;\n"
			
 
				+"		\n"
			
 
				+"		int isLeaf = isLeafNode(internalOrLeafNodeIndex);	//Internal node if false\n"
			
 
				+"		int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
			
 
				+"		\n"
			
 
				+"		//bvhRigidIndex is not used if internal node\n"
			
 
				+"		int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
			
 
				+"	\n"
			
 
				+"		b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
			
 
				+"		if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb)  )\n"
			
 
				+"		{\n"
			
 
				+"			if(isLeaf)\n"
			
 
				+"			{\n"
			
 
				+"				int2 rayRigidPair;\n"
			
 
				+"				rayRigidPair.x = rayIndex;\n"
			
 
				+"				rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
			
 
				+"				\n"
			
 
				+"				int pairIndex = atomic_inc(out_numRayRigidPairs);\n"
			
 
				+"				if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n"
			
 
				+"			}\n"
			
 
				+"			\n"
			
 
				+"			if(!isLeaf)	//Internal node\n"
			
 
				+"			{\n"
			
 
				+"				if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
			
 
				+"				{\n"
			
 
				+"					//Error\n"
			
 
				+"				}\n"
			
 
				+"				else\n"
			
 
				+"				{\n"
			
 
				+"					stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
			
 
				+"					stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n"
			
 
				+"									__global int* out_numPairs, __global int4* out_overlappingPairs, \n"
			
 
				+"									int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n"
			
 
				+"{\n"
			
 
				+"	int smallAabbIndex = get_global_id(0);\n"
			
 
				+"	if(smallAabbIndex >= numSmallAabbRigids) return;\n"
			
 
				+"	\n"
			
 
				+"	b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n"
			
 
				+"	for(int i = 0; i < numLargeAabbRigids; ++i)\n"
			
 
				+"	{\n"
			
 
				+"		b3AabbCL largeAabb = largeAabbs[i];\n"
			
 
				+"		if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n"
			
 
				+"		{\n"
			
 
				+"			int4 pair;\n"
			
 
				+"			pair.x = largeAabb.m_minIndices[3];\n"
			
 
				+"			pair.y = smallAabb.m_minIndices[3];\n"
			
 
				+"			pair.z = NEW_PAIR_MARKER;\n"
			
 
				+"			pair.w = NEW_PAIR_MARKER;\n"
			
 
				+"			\n"
			
 
				+"			int pairIndex = atomic_inc(out_numPairs);\n"
			
 
				+"			if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n"
			
 
				+"									__global int* out_numRayRigidPairs,  __global int2* out_rayRigidPairs,\n"
			
 
				+"									int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n"
			
 
				+"{\n"
			
 
				+"	int rayIndex = get_global_id(0);\n"
			
 
				+"	if(rayIndex >= numRays) return;\n"
			
 
				+"	\n"
			
 
				+"	b3Vector3 rayFrom = rays[rayIndex].m_from;\n"
			
 
				+"	b3Vector3 rayTo = rays[rayIndex].m_to;\n"
			
 
				+"	b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n"
			
 
				+"	b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
			
 
				+"	\n"
			
 
				+"	for(int i = 0; i < numLargeAabbRigids; ++i)\n"
			
 
				+"	{\n"
			
 
				+"		b3AabbCL rigidAabb = largeRigidAabbs[i];\n"
			
 
				+"		if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n"
			
 
				+"		{\n"
			
 
				+"			int2 rayRigidPair;\n"
			
 
				+"			rayRigidPair.x = rayIndex;\n"
			
 
				+"			rayRigidPair.y = rigidAabb.m_minIndices[3];\n"
			
 
				+"			\n"
			
 
				+"			int pairIndex = atomic_inc(out_numRayRigidPairs);\n"
			
 
				+"			if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
			
 
				+"//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
			
 
				+"//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n"
			
 
				+"//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n"
			
 
				+"#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
			
 
				+"#define B3_PLBVH_ROOT_NODE_MARKER -1\n"
			
 
				+"#define b3Int64 long\n"
			
 
				+"int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n"
			
 
				+"b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n"
			
 
				+"{\n"
			
 
				+"	//This function only needs to return (i & j) in order for the algorithm to work,\n"
			
 
				+"	//but it may help with debugging to mask out the lower bits.\n"
			
 
				+"	b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n"
			
 
				+"	b3Int64 sharedBits = i & j;\n"
			
 
				+"	b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength);	//Set all bits after the common prefix to 0\n"
			
 
				+"	\n"
			
 
				+"	return sharedBits & bitmask;\n"
			
 
				+"}\n"
			
 
				+"//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n"
			
 
				+"int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n"
			
 
				+"{\n"
			
 
				+"	return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n"
			
 
				+"}\n"
			
 
				+"__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n"
			
 
				+"											__global b3Int64* out_commonPrefixes,\n"
			
 
				+"											__global int* out_commonPrefixLengths,\n"
			
 
				+"											int numInternalNodes)\n"
			
 
				+"{\n"
			
 
				+"	int internalNodeIndex = get_global_id(0);\n"
			
 
				+"	if (internalNodeIndex >= numInternalNodes) return;\n"
			
 
				+"	\n"
			
 
				+"	//Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n"
			
 
				+"	//and the number of internal nodes is always numLeafNodes - 1\n"
			
 
				+"	int leftLeafIndex = internalNodeIndex;\n"
			
 
				+"	int rightLeafIndex = internalNodeIndex + 1;\n"
			
 
				+"	\n"
			
 
				+"	int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n"
			
 
				+"	int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n"
			
 
				+"	\n"
			
 
				+"	//Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n"
			
 
				+"	//Append the index of each leaf node to each morton code so that there are no duplicates.\n"
			
 
				+"	//The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n"
			
 
				+"	//is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n"
			
 
				+"	//\n"
			
 
				+"	//upsample(a, b) == ( ((b3Int64)a) << 32) | b\n"
			
 
				+"	b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n"
			
 
				+"	b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n"
			
 
				+"	\n"
			
 
				+"	out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n"
			
 
				+"	out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n"
			
 
				+"}\n"
			
 
				+"__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n"
			
 
				+"											__global int2* out_childNodes, int numLeafNodes)\n"
			
 
				+"{\n"
			
 
				+"	int leafNodeIndex = get_global_id(0);\n"
			
 
				+"	if (leafNodeIndex >= numLeafNodes) return;\n"
			
 
				+"	\n"
			
 
				+"	int numInternalNodes = numLeafNodes - 1;\n"
			
 
				+"	\n"
			
 
				+"	int leftSplitIndex = leafNodeIndex - 1;\n"
			
 
				+"	int rightSplitIndex = leafNodeIndex;\n"
			
 
				+"	\n"
			
 
				+"	int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
			
 
				+"	int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
			
 
				+"	\n"
			
 
				+"	//Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n"
			
 
				+"	//Leaf nodes are considered as having the highest common prefix\n"
			
 
				+"	int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n"
			
 
				+"	\n"
			
 
				+"	//Handle cases for the edge nodes; the first and last node\n"
			
 
				+"	//For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n"
			
 
				+"	if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n"
			
 
				+"	if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n"
			
 
				+"	\n"
			
 
				+"	int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n"
			
 
				+"	out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n"
			
 
				+"	\n"
			
 
				+"	int isRightChild = (isLeftHigherCommonPrefix);	//If the left node is the parent, then this node is its right child and vice versa\n"
			
 
				+"	\n"
			
 
				+"	//out_childNodesAsInt[0] == int2.x == left child\n"
			
 
				+"	//out_childNodesAsInt[1] == int2.y == right child\n"
			
 
				+"	int isLeaf = 1;\n"
			
 
				+"	__global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n"
			
 
				+"	out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n"
			
 
				+"}\n"
			
 
				+"__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n"
			
 
				+"												__global int2* out_childNodes,\n"
			
 
				+"												__global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n"
			
 
				+"												int numInternalNodes)\n"
			
 
				+"{\n"
			
 
				+"	int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
			
 
				+"	if(internalNodeIndex >= numInternalNodes) return;\n"
			
 
				+"	\n"
			
 
				+"	b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n"
			
 
				+"	int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n"
			
 
				+"	\n"
			
 
				+"//#define USE_LINEAR_SEARCH\n"
			
 
				+"#ifdef USE_LINEAR_SEARCH\n"
			
 
				+"	int leftIndex = -1;\n"
			
 
				+"	int rightIndex = -1;\n"
			
 
				+"	\n"
			
 
				+"	//Find nearest element to left with a lower common prefix\n"
			
 
				+"	for(int i = internalNodeIndex - 1; i >= 0; --i)\n"
			
 
				+"	{\n"
			
 
				+"		int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
			
 
				+"		if(nodeLeftSharedPrefixLength < nodePrefixLength)\n"
			
 
				+"		{\n"
			
 
				+"			leftIndex = i;\n"
			
 
				+"			break;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"	\n"
			
 
				+"	//Find nearest element to right with a lower common prefix\n"
			
 
				+"	for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n"
			
 
				+"	{\n"
			
 
				+"		int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
			
 
				+"		if(nodeRightSharedPrefixLength < nodePrefixLength)\n"
			
 
				+"		{\n"
			
 
				+"			rightIndex = i;\n"
			
 
				+"			break;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"	\n"
			
 
				+"#else //Use binary search\n"
			
 
				+"	//Find nearest element to left with a lower common prefix\n"
			
 
				+"	int leftIndex = -1;\n"
			
 
				+"	{\n"
			
 
				+"		int lower = 0;\n"
			
 
				+"		int upper = internalNodeIndex - 1;\n"
			
 
				+"		\n"
			
 
				+"		while(lower <= upper)\n"
			
 
				+"		{\n"
			
 
				+"			int mid = (lower + upper) / 2;\n"
			
 
				+"			b3Int64 midPrefix = commonPrefixes[mid];\n"
			
 
				+"			int midPrefixLength = commonPrefixLengths[mid];\n"
			
 
				+"			\n"
			
 
				+"			int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n"
			
 
				+"			if(nodeMidSharedPrefixLength < nodePrefixLength) \n"
			
 
				+"			{\n"
			
 
				+"				int right = mid + 1;\n"
			
 
				+"				if(right < internalNodeIndex)\n"
			
 
				+"				{\n"
			
 
				+"					b3Int64 rightPrefix = commonPrefixes[right];\n"
			
 
				+"					int rightPrefixLength = commonPrefixLengths[right];\n"
			
 
				+"					\n"
			
 
				+"					int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n"
			
 
				+"					if(nodeRightSharedPrefixLength < nodePrefixLength) \n"
			
 
				+"					{\n"
			
 
				+"						lower = right;\n"
			
 
				+"						leftIndex = right;\n"
			
 
				+"					}\n"
			
 
				+"					else \n"
			
 
				+"					{\n"
			
 
				+"						leftIndex = mid;\n"
			
 
				+"						break;\n"
			
 
				+"					}\n"
			
 
				+"				}\n"
			
 
				+"				else \n"
			
 
				+"				{\n"
			
 
				+"					leftIndex = mid;\n"
			
 
				+"					break;\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"			else upper = mid - 1;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"	\n"
			
 
				+"	//Find nearest element to right with a lower common prefix\n"
			
 
				+"	int rightIndex = -1;\n"
			
 
				+"	{\n"
			
 
				+"		int lower = internalNodeIndex + 1;\n"
			
 
				+"		int upper = numInternalNodes - 1;\n"
			
 
				+"		\n"
			
 
				+"		while(lower <= upper)\n"
			
 
				+"		{\n"
			
 
				+"			int mid = (lower + upper) / 2;\n"
			
 
				+"			b3Int64 midPrefix = commonPrefixes[mid];\n"
			
 
				+"			int midPrefixLength = commonPrefixLengths[mid];\n"
			
 
				+"			\n"
			
 
				+"			int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n"
			
 
				+"			if(nodeMidSharedPrefixLength < nodePrefixLength) \n"
			
 
				+"			{\n"
			
 
				+"				int left = mid - 1;\n"
			
 
				+"				if(left > internalNodeIndex)\n"
			
 
				+"				{\n"
			
 
				+"					b3Int64 leftPrefix = commonPrefixes[left];\n"
			
 
				+"					int leftPrefixLength = commonPrefixLengths[left];\n"
			
 
				+"				\n"
			
 
				+"					int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n"
			
 
				+"					if(nodeLeftSharedPrefixLength < nodePrefixLength) \n"
			
 
				+"					{\n"
			
 
				+"						upper = left;\n"
			
 
				+"						rightIndex = left;\n"
			
 
				+"					}\n"
			
 
				+"					else \n"
			
 
				+"					{\n"
			
 
				+"						rightIndex = mid;\n"
			
 
				+"						break;\n"
			
 
				+"					}\n"
			
 
				+"				}\n"
			
 
				+"				else \n"
			
 
				+"				{\n"
			
 
				+"					rightIndex = mid;\n"
			
 
				+"					break;\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"			else lower = mid + 1;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"#endif\n"
			
 
				+"	\n"
			
 
				+"	//Select parent\n"
			
 
				+"	{\n"
			
 
				+"		int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
			
 
				+"		int rightPrefixLength =  (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
			
 
				+"		\n"
			
 
				+"		int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n"
			
 
				+"		\n"
			
 
				+"		if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n"
			
 
				+"		else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n"
			
 
				+"		\n"
			
 
				+"		int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n"
			
 
				+"		\n"
			
 
				+"		int isRootNode = (leftIndex == -1 && rightIndex == -1);\n"
			
 
				+"		out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n"
			
 
				+"		\n"
			
 
				+"		int isLeaf = 0;\n"
			
 
				+"		if(!isRootNode)\n"
			
 
				+"		{\n"
			
 
				+"			int isRightChild = (isLeftHigherPrefixLength);	//If the left node is the parent, then this node is its right child and vice versa\n"
			
 
				+"			\n"
			
 
				+"			//out_childNodesAsInt[0] == int2.x == left child\n"
			
 
				+"			//out_childNodesAsInt[1] == int2.y == right child\n"
			
 
				+"			__global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n"
			
 
				+"			out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
			
 
				+"		}\n"
			
 
				+"		else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n"
			
 
				+"									__global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n"
			
 
				+"{\n"
			
 
				+"	if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n"
			
 
				+"	int internalNodeIndex = get_global_id(0);\n"
			
 
				+"	if(internalNodeIndex >= numInternalNodes) return;\n"
			
 
				+"	\n"
			
 
				+"	//\n"
			
 
				+"	int distanceFromRoot = 0;\n"
			
 
				+"	{\n"
			
 
				+"		int parentIndex = internalNodeParentNodes[internalNodeIndex];\n"
			
 
				+"		while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n"
			
 
				+"		{\n"
			
 
				+"			parentIndex = internalNodeParentNodes[parentIndex];\n"
			
 
				+"			++distanceFromRoot;\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"	out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n"
			
 
				+"	\n"
			
 
				+"	//\n"
			
 
				+"	__local int localMaxDistanceFromRoot;\n"
			
 
				+"	if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n"
			
 
				+"	barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	\n"
			
 
				+"	atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n"
			
 
				+"	barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	\n"
			
 
				+"	if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n"
			
 
				+"}\n"
			
 
				+"__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n"
			
 
				+"												__global int2* childNodes,\n"
			
 
				+"												__global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n"
			
 
				+"												int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n"
			
 
				+"{\n"
			
 
				+"	int internalNodeIndex = get_global_id(0);\n"
			
 
				+"	if(internalNodeIndex >= numInternalNodes) return;\n"
			
 
				+"	\n"
			
 
				+"	int distance = distanceFromRoot[internalNodeIndex];\n"
			
 
				+"	\n"
			
 
				+"	if(distance == processedDistance)\n"
			
 
				+"	{\n"
			
 
				+"		int leftChildIndex = childNodes[internalNodeIndex].x;\n"
			
 
				+"		int rightChildIndex = childNodes[internalNodeIndex].y;\n"
			
 
				+"		\n"
			
 
				+"		int isLeftChildLeaf = isLeafNode(leftChildIndex);\n"
			
 
				+"		int isRightChildLeaf = isLeafNode(rightChildIndex);\n"
			
 
				+"		\n"
			
 
				+"		leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n"
			
 
				+"		rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n"
			
 
				+"		\n"
			
 
				+"		//leftRigidIndex/rightRigidIndex is not used if internal node\n"
			
 
				+"		int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
			
 
				+"		int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
			
 
				+"		\n"
			
 
				+"		b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n"
			
 
				+"		b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n"
			
 
				+"		\n"
			
 
				+"		b3AabbCL mergedAabb;\n"
			
 
				+"		mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
			
 
				+"		mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
			
 
				+"		internalNodeAabbs[internalNodeIndex] = mergedAabb;\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n"
			
 
				+"{\n"
			
 
				+"	int internalNodeIndex = get_global_id(0);\n"
			
 
				+"	if(internalNodeIndex >= numInternalNodes) return;\n"
			
 
				+"	\n"
			
 
				+"	int numLeafNodes = numInternalNodes + 1;\n"
			
 
				+"	\n"
			
 
				+"	int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n"
			
 
				+"	\n"
			
 
				+"	int2 leafIndexRange;	//x == min leaf index, y == max leaf index\n"
			
 
				+"	\n"
			
 
				+"	//Find lowest leaf index covered by this internal node\n"
			
 
				+"	{\n"
			
 
				+"		int lowestIndex = childNodes.x;		//childNodes.x == Left child\n"
			
 
				+"		while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n"
			
 
				+"		leafIndexRange.x = lowestIndex;\n"
			
 
				+"	}\n"
			
 
				+"	\n"
			
 
				+"	//Find highest leaf index covered by this internal node\n"
			
 
				+"	{\n"
			
 
				+"		int highestIndex = childNodes.y;	//childNodes.y == Right child\n"
			
 
				+"		while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n"
			
 
				+"		leafIndexRange.y = highestIndex;\n"
			
 
				+"	}\n"
			
 
				+"	\n"
			
 
				+"	//\n"
			
 
				+"	out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n"
			
 
				+"}\n"
			
 
				+;
			
--- a/include/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h
+++ b/include/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h
@@ -0,0 +1,342 @@
 
				+//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
			
 
				+static const char* sapCL= \
			
 
				+"/*\n"
			
 
				+"Copyright (c) 2012 Advanced Micro Devices, Inc.  \n"
			
 
				+"This software is provided 'as-is', without any express or implied warranty.\n"
			
 
				+"In no event will the authors be held liable for any damages arising from the use of this software.\n"
			
 
				+"Permission is granted to anyone to use this software for any purpose, \n"
			
 
				+"including commercial applications, and to alter it and redistribute it freely, \n"
			
 
				+"subject to the following restrictions:\n"
			
 
				+"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
			
 
				+"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
			
 
				+"3. This notice may not be removed or altered from any source distribution.\n"
			
 
				+"*/\n"
			
 
				+"//Originally written by Erwin Coumans\n"
			
 
				+"#define NEW_PAIR_MARKER -1\n"
			
 
				+"typedef struct \n"
			
 
				+"{\n"
			
 
				+"	union\n"
			
 
				+"	{\n"
			
 
				+"		float4	m_min;\n"
			
 
				+"		float   m_minElems[4];\n"
			
 
				+"		int			m_minIndices[4];\n"
			
 
				+"	};\n"
			
 
				+"	union\n"
			
 
				+"	{\n"
			
 
				+"		float4	m_max;\n"
			
 
				+"		float   m_maxElems[4];\n"
			
 
				+"		int			m_maxIndices[4];\n"
			
 
				+"	};\n"
			
 
				+"} btAabbCL;\n"
			
 
				+"/// conservative test for overlap between two aabbs\n"
			
 
				+"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
			
 
				+"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
			
 
				+"{\n"
			
 
				+"	bool overlap = true;\n"
			
 
				+"	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
			
 
				+"	return overlap;\n"
			
 
				+"}\n"
			
 
				+"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
			
 
				+"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
			
 
				+"{\n"
			
 
				+"	bool overlap = true;\n"
			
 
				+"	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
			
 
				+"	return overlap;\n"
			
 
				+"}\n"
			
 
				+"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
			
 
				+"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
			
 
				+"{\n"
			
 
				+"	bool overlap = true;\n"
			
 
				+"	overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
			
 
				+"	overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
			
 
				+"	return overlap;\n"
			
 
				+"}\n"
			
 
				+"__kernel void   computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping,  __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile  __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numUnsortedAabbs)\n"
			
 
				+"		return;\n"
			
 
				+"	int j = get_global_id(1);\n"
			
 
				+"	if (j>=numUnSortedAabbs2)\n"
			
 
				+"		return;\n"
			
 
				+"	__global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n"
			
 
				+"	__global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n"
			
 
				+"	if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n"
			
 
				+"	{\n"
			
 
				+"		int4 myPair;\n"
			
 
				+"		\n"
			
 
				+"		int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n"
			
 
				+"		int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n"
			
 
				+"		if (xIndex>yIndex)\n"
			
 
				+"		{\n"
			
 
				+"			int tmp = xIndex;\n"
			
 
				+"			xIndex=yIndex;\n"
			
 
				+"			yIndex=tmp;\n"
			
 
				+"		}\n"
			
 
				+"		\n"
			
 
				+"		myPair.x = xIndex;\n"
			
 
				+"		myPair.y = yIndex;\n"
			
 
				+"		myPair.z = NEW_PAIR_MARKER;\n"
			
 
				+"		myPair.w = NEW_PAIR_MARKER;\n"
			
 
				+"		int curPair = atomic_inc (pairCount);\n"
			
 
				+"		if (curPair<maxPairs)\n"
			
 
				+"		{\n"
			
 
				+"				pairsOut[curPair] = myPair; //flush to main memory\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void   computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numObjects)\n"
			
 
				+"		return;\n"
			
 
				+"	for (int j=i+1;j<numObjects;j++)\n"
			
 
				+"	{\n"
			
 
				+"		if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
			
 
				+"		{\n"
			
 
				+"			int4 myPair;\n"
			
 
				+"			myPair.x = aabbs[i].m_minIndices[3];\n"
			
 
				+"			myPair.y = aabbs[j].m_minIndices[3];\n"
			
 
				+"			myPair.z = NEW_PAIR_MARKER;\n"
			
 
				+"			myPair.w = NEW_PAIR_MARKER;\n"
			
 
				+"			int curPair = atomic_inc (pairCount);\n"
			
 
				+"			if (curPair<maxPairs)\n"
			
 
				+"			{\n"
			
 
				+"					pairsOut[curPair] = myPair; //flush to main memory\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void   computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numObjects)\n"
			
 
				+"		return;\n"
			
 
				+"	for (int j=i+1;j<numObjects;j++)\n"
			
 
				+"	{\n"
			
 
				+"  	if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n"
			
 
				+"		{\n"
			
 
				+"			break;\n"
			
 
				+"		}\n"
			
 
				+"		if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
			
 
				+"		{\n"
			
 
				+"			int4 myPair;\n"
			
 
				+"			myPair.x = aabbs[i].m_minIndices[3];\n"
			
 
				+"			myPair.y = aabbs[j].m_minIndices[3];\n"
			
 
				+"			myPair.z = NEW_PAIR_MARKER;\n"
			
 
				+"			myPair.w = NEW_PAIR_MARKER;\n"
			
 
				+"			int curPair = atomic_inc (pairCount);\n"
			
 
				+"			if (curPair<maxPairs)\n"
			
 
				+"			{\n"
			
 
				+"					pairsOut[curPair] = myPair; //flush to main memory\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"	}\n"
			
 
				+"}\n"
			
 
				+"__kernel void   computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	int localId = get_local_id(0);\n"
			
 
				+"	__local int numActiveWgItems[1];\n"
			
 
				+"	__local int breakRequest[1];\n"
			
 
				+"	if (localId==0)\n"
			
 
				+"	{\n"
			
 
				+"		numActiveWgItems[0] = 0;\n"
			
 
				+"		breakRequest[0] = 0;\n"
			
 
				+"	}\n"
			
 
				+"	barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	atomic_inc(numActiveWgItems);\n"
			
 
				+"	barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	int localBreak = 0;\n"
			
 
				+"	int j=i+1;\n"
			
 
				+"	do\n"
			
 
				+"	{\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	\n"
			
 
				+"		if (j<numObjects)\n"
			
 
				+"		{\n"
			
 
				+"	  	if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n"
			
 
				+"			{\n"
			
 
				+"				if (!localBreak)\n"
			
 
				+"				{\n"
			
 
				+"					atomic_inc(breakRequest);\n"
			
 
				+"					localBreak = 1;\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"		\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"		\n"
			
 
				+"		if (j>=numObjects && !localBreak)\n"
			
 
				+"		{\n"
			
 
				+"			atomic_inc(breakRequest);\n"
			
 
				+"			localBreak = 1;\n"
			
 
				+"		}\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"		\n"
			
 
				+"		if (!localBreak)\n"
			
 
				+"		{\n"
			
 
				+"			if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
			
 
				+"			{\n"
			
 
				+"				int4 myPair;\n"
			
 
				+"				myPair.x = aabbs[i].m_minIndices[3];\n"
			
 
				+"				myPair.y = aabbs[j].m_minIndices[3];\n"
			
 
				+"				myPair.z = NEW_PAIR_MARKER;\n"
			
 
				+"				myPair.w = NEW_PAIR_MARKER;\n"
			
 
				+"				int curPair = atomic_inc (pairCount);\n"
			
 
				+"				if (curPair<maxPairs)\n"
			
 
				+"				{\n"
			
 
				+"						pairsOut[curPair] = myPair; //flush to main memory\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"		j++;\n"
			
 
				+"	} while (breakRequest[0]<numActiveWgItems[0]);\n"
			
 
				+"}\n"
			
 
				+"__kernel void   computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile  __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	int localId = get_local_id(0);\n"
			
 
				+"	__local int numActiveWgItems[1];\n"
			
 
				+"	__local int breakRequest[1];\n"
			
 
				+"	__local btAabbCL localAabbs[128];// = aabbs[i];\n"
			
 
				+"	\n"
			
 
				+"	btAabbCL myAabb;\n"
			
 
				+"	\n"
			
 
				+"	myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n"
			
 
				+"	float testValue = 	myAabb.m_maxElems[axis];\n"
			
 
				+"	\n"
			
 
				+"	if (localId==0)\n"
			
 
				+"	{\n"
			
 
				+"		numActiveWgItems[0] = 0;\n"
			
 
				+"		breakRequest[0] = 0;\n"
			
 
				+"	}\n"
			
 
				+"	int localCount=0;\n"
			
 
				+"	int block=0;\n"
			
 
				+"	localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n"
			
 
				+"	localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n"
			
 
				+"	\n"
			
 
				+"	barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	atomic_inc(numActiveWgItems);\n"
			
 
				+"	barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	int localBreak = 0;\n"
			
 
				+"	\n"
			
 
				+"	int j=i+1;\n"
			
 
				+"	do\n"
			
 
				+"	{\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"	\n"
			
 
				+"		if (j<numObjects)\n"
			
 
				+"		{\n"
			
 
				+"	  	if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n"
			
 
				+"			{\n"
			
 
				+"				if (!localBreak)\n"
			
 
				+"				{\n"
			
 
				+"					atomic_inc(breakRequest);\n"
			
 
				+"					localBreak = 1;\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"		\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"		\n"
			
 
				+"		if (j>=numObjects && !localBreak)\n"
			
 
				+"		{\n"
			
 
				+"			atomic_inc(breakRequest);\n"
			
 
				+"			localBreak = 1;\n"
			
 
				+"		}\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"		\n"
			
 
				+"		if (!localBreak)\n"
			
 
				+"		{\n"
			
 
				+"			if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n"
			
 
				+"			{\n"
			
 
				+"				int4 myPair;\n"
			
 
				+"				myPair.x = myAabb.m_minIndices[3];\n"
			
 
				+"				myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n"
			
 
				+"				myPair.z = NEW_PAIR_MARKER;\n"
			
 
				+"				myPair.w = NEW_PAIR_MARKER;\n"
			
 
				+"				int curPair = atomic_inc (pairCount);\n"
			
 
				+"				if (curPair<maxPairs)\n"
			
 
				+"				{\n"
			
 
				+"						pairsOut[curPair] = myPair; //flush to main memory\n"
			
 
				+"				}\n"
			
 
				+"			}\n"
			
 
				+"		}\n"
			
 
				+"		\n"
			
 
				+"		barrier(CLK_LOCAL_MEM_FENCE);\n"
			
 
				+"		localCount++;\n"
			
 
				+"		if (localCount==64)\n"
			
 
				+"		{\n"
			
 
				+"			localCount = 0;\n"
			
 
				+"			block+=64;			\n"
			
 
				+"			localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n"
			
 
				+"			localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n"
			
 
				+"		}\n"
			
 
				+"		j++;\n"
			
 
				+"		\n"
			
 
				+"	} while (breakRequest[0]<numActiveWgItems[0]);\n"
			
 
				+"	\n"
			
 
				+"}\n"
			
 
				+"//http://stereopsis.com/radix.html\n"
			
 
				+"unsigned int FloatFlip(float fl);\n"
			
 
				+"unsigned int FloatFlip(float fl)\n"
			
 
				+"{\n"
			
 
				+"	unsigned int f = *(unsigned int*)&fl;\n"
			
 
				+"	unsigned int mask = -(int)(f >> 31) | 0x80000000;\n"
			
 
				+"	return f ^ mask;\n"
			
 
				+"}\n"
			
 
				+"float IFloatFlip(unsigned int f);\n"
			
 
				+"float IFloatFlip(unsigned int f)\n"
			
 
				+"{\n"
			
 
				+"	unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n"
			
 
				+"	unsigned int fl = f ^ mask;\n"
			
 
				+"	return *(float*)&fl;\n"
			
 
				+"}\n"
			
 
				+"__kernel void   copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numObjects)\n"
			
 
				+"		return;\n"
			
 
				+"	int src = destAabbs[i].m_maxIndices[3];\n"
			
 
				+"	destAabbs[i] = allAabbs[src];\n"
			
 
				+"	destAabbs[i].m_maxIndices[3] = src;\n"
			
 
				+"}\n"
			
 
				+"__kernel void   flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numObjects)\n"
			
 
				+"		return;\n"
			
 
				+"	\n"
			
 
				+"	\n"
			
 
				+"	sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n"
			
 
				+"	sortData[i].y = i;\n"
			
 
				+"		\n"
			
 
				+"}\n"
			
 
				+"__kernel void   scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numObjects)\n"
			
 
				+"		return;\n"
			
 
				+"	\n"
			
 
				+"	sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n"
			
 
				+"}\n"
			
 
				+"__kernel void   prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n"
			
 
				+"{\n"
			
 
				+"	int i = get_global_id(0);\n"
			
 
				+"	if (i>=numAabbs)\n"
			
 
				+"		return;\n"
			
 
				+"	\n"
			
 
				+"	btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n"
			
 
				+"	\n"
			
 
				+"	float4 s;\n"
			
 
				+"	s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n"
			
 
				+"	sum[i]=s;\n"
			
 
				+"	sum2[i]=s*s;	\n"
			
 
				+"}\n"
			
 
				+;
			
--- a/include/Bullet3OpenCL/Initialize/b3OpenCLInclude.h
+++ b/include/Bullet3OpenCL/Initialize/b3OpenCLInclude.h
@@ -0,0 +1,48 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_OPENCL_INCLUDE_H
			
 
				+#define B3_OPENCL_INCLUDE_H
			
 
				+
			
 
				+#ifdef B3_USE_CLEW
			
 
				+	#include "clew/clew.h"
			
 
				+#else
			
 
				+
			
 
				+#ifdef __APPLE__
			
 
				+#ifdef USE_MINICL
			
 
				+#include <MiniCL/cl.h>
			
 
				+#else
			
 
				+#include <OpenCL/cl.h>
			
 
				+#include <OpenCL/cl_ext.h> //clLogMessagesToStderrAPPLE
			
 
				+#endif
			
 
				+#else
			
 
				+#ifdef USE_MINICL
			
 
				+#include <MiniCL/cl.h>
			
 
				+#else
			
 
				+#include <CL/cl.h>
			
 
				+#ifdef _WIN32
			
 
				+#include "CL/cl_gl.h"
			
 
				+#endif //_WIN32
			
 
				+#endif
			
 
				+#endif //__APPLE__
			
 
				+#endif //B3_USE_CLEW
			
 
				+
			
 
				+#include <assert.h>
			
 
				+#include <stdio.h>
			
 
				+#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); assert((a) == (b)); }
			
 
				+
			
 
				+
			
 
				+#endif //B3_OPENCL_INCLUDE_H
			
 
				+
			
--- a/include/Bullet3OpenCL/Initialize/b3OpenCLUtils.h
+++ b/include/Bullet3OpenCL/Initialize/b3OpenCLUtils.h
@@ -0,0 +1,194 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
			
 
				+Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. 
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+//original author: Roman Ponomarev
			
 
				+//cleanup by Erwin Coumans
			
 
				+
			
 
				+#ifndef B3_OPENCL_UTILS_H
			
 
				+#define B3_OPENCL_UTILS_H
			
 
				+
			
 
				+#include "b3OpenCLInclude.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+///C API for OpenCL utilities: convenience functions, see below for C++ API
			
 
				+
			
 
				+/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
			
 
				+/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
			
 
				+cl_context 	b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC , int preferredDeviceIndex , int preferredPlatformIndex, cl_platform_id* platformId);
			
 
				+	
			
 
				+int b3OpenCLUtils_getNumDevices(cl_context cxMainContext);
			
 
				+
			
 
				+cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int nr);
			
 
				+
			
 
				+void b3OpenCLUtils_printDeviceInfo(cl_device_id device);
			
 
				+
			
 
				+cl_kernel b3OpenCLUtils_compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog,const char* additionalMacros);
			
 
				+
			
 
				+//optional
			
 
				+cl_program b3OpenCLUtils_compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum,const char* additionalMacros  , const char* srcFileNameForCaching, bool disableBinaryCaching);
			
 
				+
			
 
				+//the following optional APIs provide access using specific platform information
			
 
				+int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum);
			
 
				+
			
 
				+///get the nr'th platform, where nr is in the range [0..getNumPlatforms)
			
 
				+cl_platform_id b3OpenCLUtils_getPlatform(int nr, cl_int* pErrNum);
			
 
				+
			
 
				+
			
 
				+void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform);
			
 
				+
			
 
				+const char* b3OpenCLUtils_getSdkVendorName();
			
 
				+	
			
 
				+///set the path (directory/folder) where the compiled OpenCL kernel are stored
			
 
				+void b3OpenCLUtils_setCachePath(const char* path);
			
 
				+	
			
 
				+cl_context 	b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC ,int preferredDeviceIndex , int preferredPlatformIndex);
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+
			
 
				+#define B3_MAX_STRING_LENGTH 1024
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	char m_deviceName[B3_MAX_STRING_LENGTH];
			
 
				+	char m_deviceVendor[B3_MAX_STRING_LENGTH];
			
 
				+	char m_driverVersion[B3_MAX_STRING_LENGTH];
			
 
				+	char m_deviceExtensions[B3_MAX_STRING_LENGTH];
			
 
				+
			
 
				+	cl_device_type		m_deviceType;
			
 
				+	cl_uint 				m_computeUnits;
			
 
				+	size_t 					m_workitemDims;
			
 
				+	size_t 					m_workItemSize[3];
			
 
				+	size_t 					m_image2dMaxWidth;
			
 
				+	size_t 					m_image2dMaxHeight;
			
 
				+	size_t 					m_image3dMaxWidth;
			
 
				+	size_t 					m_image3dMaxHeight;
			
 
				+	size_t 					m_image3dMaxDepth;
			
 
				+	size_t 					m_workgroupSize;
			
 
				+	cl_uint 				m_clockFrequency;
			
 
				+	cl_ulong				m_constantBufferSize;
			
 
				+	cl_ulong				m_localMemSize;
			
 
				+	cl_ulong				m_globalMemSize;
			
 
				+    cl_bool					m_errorCorrectionSupport;
			
 
				+	cl_device_local_mem_type m_localMemType;
			
 
				+	cl_uint					m_maxReadImageArgs;
			
 
				+	cl_uint					m_maxWriteImageArgs;
			
 
				+
			
 
				+
			
 
				+
			
 
				+	cl_uint 				m_addressBits;
			
 
				+	cl_ulong				m_maxMemAllocSize;
			
 
				+	cl_command_queue_properties m_queueProperties;
			
 
				+	cl_bool					m_imageSupport;
			
 
				+	cl_uint					m_vecWidthChar;
			
 
				+	cl_uint					m_vecWidthShort;
			
 
				+	cl_uint					m_vecWidthInt;
			
 
				+	cl_uint					m_vecWidthLong;
			
 
				+	cl_uint					m_vecWidthFloat;
			
 
				+	cl_uint					m_vecWidthDouble;
			
 
				+
			
 
				+} b3OpenCLDeviceInfo;
			
 
				+
			
 
				+struct b3OpenCLPlatformInfo
			
 
				+{
			
 
				+	char m_platformVendor[B3_MAX_STRING_LENGTH];
			
 
				+	char m_platformName[B3_MAX_STRING_LENGTH];
			
 
				+	char m_platformVersion[B3_MAX_STRING_LENGTH];
			
 
				+	
			
 
				+	b3OpenCLPlatformInfo()
			
 
				+	{
			
 
				+		m_platformVendor[0]=0;
			
 
				+		m_platformName[0]=0;
			
 
				+		m_platformVersion[0]=0;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+
			
 
				+///C++ API for OpenCL utilities: convenience functions
			
 
				+struct b3OpenCLUtils
			
 
				+{
			
 
				+	/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
			
 
				+	/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
			
 
				+	static inline cl_context 	createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex= - 1, cl_platform_id* platformId=0)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_createContextFromType(deviceType, pErrNum, pGLCtx , pGLDC , preferredDeviceIndex, preferredPlatformIndex, platformId);
			
 
				+	}
			
 
				+	
			
 
				+	static inline int getNumDevices(cl_context cxMainContext)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_getNumDevices(cxMainContext);
			
 
				+	}
			
 
				+	static inline cl_device_id getDevice(cl_context cxMainContext, int nr)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_getDevice(cxMainContext,nr);
			
 
				+	}
			
 
				+
			
 
				+	static void getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info);
			
 
				+
			
 
				+	static inline void printDeviceInfo(cl_device_id device)
			
 
				+	{
			
 
				+		b3OpenCLUtils_printDeviceInfo(device);
			
 
				+	}
			
 
				+
			
 
				+	static inline cl_kernel compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum=0, cl_program prog=0,const char* additionalMacros = "" )
			
 
				+	{
			
 
				+		return b3OpenCLUtils_compileCLKernelFromString(clContext,device, kernelSource,  kernelName, pErrNum, prog,additionalMacros);
			
 
				+	}
			
 
				+
			
 
				+	//optional
			
 
				+	static inline cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0, bool disableBinaryCaching=false)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_compileCLProgramFromString(clContext,device, kernelSource, pErrNum,additionalMacros, srcFileNameForCaching, disableBinaryCaching);
			
 
				+	}
			
 
				+
			
 
				+	//the following optional APIs provide access using specific platform information
			
 
				+	static inline int getNumPlatforms(cl_int* pErrNum=0)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_getNumPlatforms(pErrNum);
			
 
				+	}
			
 
				+	///get the nr'th platform, where nr is in the range [0..getNumPlatforms)
			
 
				+	static inline cl_platform_id getPlatform(int nr, cl_int* pErrNum=0)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_getPlatform(nr,pErrNum);
			
 
				+	}
			
 
				+	
			
 
				+	static void getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo);
			
 
				+
			
 
				+	static inline void printPlatformInfo(cl_platform_id platform)
			
 
				+	{
			
 
				+		b3OpenCLUtils_printPlatformInfo(platform);
			
 
				+	}
			
 
				+
			
 
				+	static inline const char* getSdkVendorName()
			
 
				+	{
			
 
				+		return b3OpenCLUtils_getSdkVendorName();
			
 
				+	}
			
 
				+	static inline cl_context 	createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0,int preferredDeviceIndex = -1, int preferredPlatformIndex= -1)
			
 
				+	{
			
 
				+		return b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLCtx,pGLDC,preferredDeviceIndex, preferredPlatformIndex);
			
 
				+	}
			
 
				+	static void setCachePath(const char* path)
			
 
				+	{
			
 
				+		b3OpenCLUtils_setCachePath(path);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+#endif //__cplusplus
			
 
				+
			
 
				+#endif // B3_OPENCL_UTILS_H
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h
@@ -0,0 +1,18 @@
 
				+#ifndef B3_BVH_INFO_H
			
 
				+#define B3_BVH_INFO_H
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+
			
 
				+struct b3BvhInfo
			
 
				+{
			
 
				+	b3Vector3	m_aabbMin;
			
 
				+	b3Vector3	m_aabbMax;
			
 
				+	b3Vector3	m_quantization;
			
 
				+	int			m_numNodes;
			
 
				+	int			m_numSubTrees;
			
 
				+	int			m_nodeOffset;
			
 
				+	int			m_subTreeOffset;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_BVH_INFO_H
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h
@@ -0,0 +1,80 @@
 
				+
			
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+#ifndef B3_CONTACT_CACHE_H
			
 
				+#define B3_CONTACT_CACHE_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Common/b3AlignedAllocator.h"
			
 
				+
			
 
				+
			
 
				+///maximum contact breaking and merging threshold
			
 
				+extern b3Scalar gContactBreakingThreshold;
			
 
				+
			
 
				+
			
 
				+
			
 
				+#define MANIFOLD_CACHE_SIZE 4
			
 
				+
			
 
				+///b3ContactCache is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase.
			
 
				+///Those contact points are created by the collision narrow phase.
			
 
				+///The cache can be empty, or hold 1,2,3 or 4 points. Some collision algorithms (GJK) might only add one point at a time.
			
 
				+///updates/refreshes old contact points, and throw them away if necessary (distance becomes too large)
			
 
				+///reduces the cache to 4 points, when more then 4 points are added, using following rules:
			
 
				+///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points
			
 
				+///note that some pairs of objects might have more then one contact manifold.
			
 
				+B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache
			
 
				+{
			
 
				+
			
 
				+	
			
 
				+
			
 
				+	
			
 
				+	/// sort cached points so most isolated points come first
			
 
				+	int	sortCachedPoints(const b3Vector3& pt);
			
 
				+
			
 
				+	
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+	
			
 
				+	
			
 
				+	int addManifoldPoint( const b3Vector3& newPoint);
			
 
				+
			
 
				+	/*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex)
			
 
				+	{
			
 
				+		b3Assert(validContactDistance(newPoint));
			
 
				+		m_pointCache[insertIndex] = newPoint;
			
 
				+	}
			
 
				+	*/
			
 
				+
			
 
				+
			
 
				+	
			
 
				+	static bool validContactDistance(const b3Vector3& pt);
			
 
				+	
			
 
				+	/// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin
			
 
				+	static void	refreshContactPoints(  const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& newContactCache);
			
 
				+
			
 
				+	static void removeContactPoint(struct b3Contact4Data& newContactCache,int i);
			
 
				+	
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //B3_CONTACT_CACHE_H
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h
@@ -0,0 +1,118 @@
 
				+
			
 
				+#ifndef _CONVEX_HULL_CONTACT_H
			
 
				+#define _CONVEX_HULL_CONTACT_H
			
 
				+
			
 
				+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
			
 
				+#include "Bullet3Common/shared/b3Int2.h"
			
 
				+#include "Bullet3Common/shared/b3Int4.h"
			
 
				+#include "b3OptimizedBvh.h"
			
 
				+#include "b3BvhInfo.h"
			
 
				+#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
			
 
				+
			
 
				+//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+struct GpuSatCollision
			
 
				+{
			
 
				+	cl_context				m_context;
			
 
				+	cl_device_id			m_device;
			
 
				+	cl_command_queue		m_queue;
			
 
				+	cl_kernel				m_findSeparatingAxisKernel;
			
 
				+	cl_kernel				m_mprPenetrationKernel;
			
 
				+	cl_kernel				m_findSeparatingAxisUnitSphereKernel;
			
 
				+
			
 
				+
			
 
				+	cl_kernel m_findSeparatingAxisVertexFaceKernel;
			
 
				+	cl_kernel m_findSeparatingAxisEdgeEdgeKernel;
			
 
				+	
			
 
				+	cl_kernel				m_findConcaveSeparatingAxisKernel;
			
 
				+    cl_kernel				m_findConcaveSeparatingAxisVertexFaceKernel;
			
 
				+    cl_kernel				m_findConcaveSeparatingAxisEdgeEdgeKernel;
			
 
				+ 
			
 
				+    
			
 
				+    
			
 
				+    
			
 
				+	cl_kernel				m_findCompoundPairsKernel;
			
 
				+	cl_kernel				m_processCompoundPairsKernel;
			
 
				+
			
 
				+	cl_kernel				m_clipHullHullKernel;
			
 
				+	cl_kernel				m_clipCompoundsHullHullKernel;
			
 
				+    
			
 
				+    cl_kernel               m_clipFacesAndFindContacts;
			
 
				+    cl_kernel               m_findClippingFacesKernel;
			
 
				+    
			
 
				+	cl_kernel				m_clipHullHullConcaveConvexKernel;
			
 
				+//	cl_kernel				m_extractManifoldAndAddContactKernel;
			
 
				+    cl_kernel               m_newContactReductionKernel;
			
 
				+
			
 
				+	cl_kernel				m_bvhTraversalKernel;
			
 
				+	cl_kernel				m_primitiveContactsKernel;
			
 
				+	cl_kernel				m_findConcaveSphereContactsKernel;
			
 
				+
			
 
				+	cl_kernel				m_processCompoundPairsPrimitivesKernel;
			
 
				+    
			
 
				+	b3OpenCLArray<b3Vector3> m_unitSphereDirections;
			
 
				+
			
 
				+	b3OpenCLArray<int>		m_totalContactsOut;
			
 
				+
			
 
				+	b3OpenCLArray<b3Vector3> m_sepNormals;
			
 
				+	b3OpenCLArray<float> m_dmins;
			
 
				+
			
 
				+	b3OpenCLArray<int>		m_hasSeparatingNormals;
			
 
				+	b3OpenCLArray<b3Vector3> m_concaveSepNormals;
			
 
				+	b3OpenCLArray<int>		m_concaveHasSeparatingNormals;
			
 
				+	b3OpenCLArray<int>		m_numConcavePairsOut;
			
 
				+	b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs;
			
 
				+	b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals;
			
 
				+	b3OpenCLArray<int>		m_gpuHasCompoundSepNormals;
			
 
				+	b3OpenCLArray<int>		m_numCompoundPairsOut;
			
 
				+	
			
 
				+
			
 
				+	GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue  q );
			
 
				+	virtual ~GpuSatCollision();
			
 
				+	
			
 
				+
			
 
				+	void computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, 
			
 
				+			const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
			
 
				+			b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
			
 
				+			const b3OpenCLArray<b3Contact4>* oldContacts,
			
 
				+			int maxContactCapacity,
			
 
				+			int compoundPairCapacity,
			
 
				+			const b3OpenCLArray<b3ConvexPolyhedronData>& hostConvexData,
			
 
				+			const b3OpenCLArray<b3Vector3>& vertices,
			
 
				+			const b3OpenCLArray<b3Vector3>& uniqueEdges,
			
 
				+			const b3OpenCLArray<b3GpuFace>& faces,
			
 
				+			const b3OpenCLArray<int>& indices,
			
 
				+			const b3OpenCLArray<b3Collidable>& gpuCollidables,
			
 
				+			const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
			
 
				+
			
 
				+			const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace,
			
 
				+			const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace,
			
 
				+
			
 
				+           b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
			
 
				+           b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
			
 
				+           b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
			
 
				+           b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
			
 
				+           b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
			
 
				+		   b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData,
			
 
				+		   b3OpenCLArray<b3QuantizedBvhNode>*	treeNodesGPU,
			
 
				+			b3OpenCLArray<b3BvhSubtreeInfo>*	subTreesGPU,
			
 
				+			b3OpenCLArray<b3BvhInfo>*	bvhInfo,
			
 
				+			int numObjects,
			
 
				+			int maxTriConvexPairCapacity,
			
 
				+			b3OpenCLArray<b3Int4>& triangleConvexPairs,
			
 
				+			int& numTriConvexPairsOut
			
 
				+			);
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //_CONVEX_HULL_CONTACT_H
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h
@@ -0,0 +1,9 @@
 
				+#ifndef CONVEX_POLYHEDRON_CL
			
 
				+#define CONVEX_POLYHEDRON_CL
			
 
				+
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+
			
 
				+
			
 
				+
			
 
				+#endif //CONVEX_POLYHEDRON_CL
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h
@@ -0,0 +1,82 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2008 Erwin Coumans  http://continuousphysics.com/Bullet/
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the
			
 
				+use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose,
			
 
				+including commercial applications, and to alter it and redistribute it
			
 
				+freely,
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not
			
 
				+claim that you wrote the original software. If you use this software in a
			
 
				+product, an acknowledgment in the product documentation would be appreciated
			
 
				+but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be
			
 
				+misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+/*
			
 
				+GJK-EPA collision solver by Nathanael Presson, 2008
			
 
				+*/
			
 
				+#ifndef B3_GJK_EPA2_H
			
 
				+#define B3_GJK_EPA2_H
			
 
				+
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+#include "Bullet3Common/b3Transform.h"
			
 
				+#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
			
 
				+
			
 
				+
			
 
				+///btGjkEpaSolver contributed under zlib by Nathanael Presson
			
 
				+struct	b3GjkEpaSolver2
			
 
				+{
			
 
				+struct	sResults
			
 
				+	{
			
 
				+	enum eStatus
			
 
				+		{
			
 
				+		Separated,		/* Shapes doesnt penetrate												*/ 
			
 
				+		Penetrating,	/* Shapes are penetrating												*/ 
			
 
				+		GJK_Failed,		/* GJK phase fail, no big issue, shapes are probably just 'touching'	*/ 
			
 
				+		EPA_Failed		/* EPA phase fail, bigger problem, need to save parameters, and debug	*/ 
			
 
				+		}		status;
			
 
				+	b3Vector3	witnesses[2];
			
 
				+	b3Vector3	normal;
			
 
				+	b3Scalar	distance;
			
 
				+	};
			
 
				+
			
 
				+static int		StackSizeRequirement();
			
 
				+
			
 
				+static bool		Distance(	 const b3Transform&	transA, const b3Transform&	transB,
			
 
				+							const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, 
			
 
				+							const b3AlignedObjectArray<b3Vector3>& verticesA,
			
 
				+							const b3AlignedObjectArray<b3Vector3>& verticesB,
			
 
				+							const b3Vector3& guess,
			
 
				+							sResults& results);
			
 
				+
			
 
				+static bool		Penetration( const b3Transform&	transA, const b3Transform&	transB,
			
 
				+							const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, 
			
 
				+							const b3AlignedObjectArray<b3Vector3>& verticesA,
			
 
				+							const b3AlignedObjectArray<b3Vector3>& verticesB,
			
 
				+							const b3Vector3& guess,
			
 
				+							sResults& results,
			
 
				+							bool usemargins=true);
			
 
				+#if 0
			
 
				+static b3Scalar	SignedDistance(	const b3Vector3& position,
			
 
				+								b3Scalar margin,
			
 
				+								const btConvexShape* shape,
			
 
				+								const btTransform& wtrs,
			
 
				+								sResults& results);
			
 
				+							
			
 
				+static bool		SignedDistance(	const btConvexShape* shape0,const btTransform& wtrs0,
			
 
				+								const btConvexShape* shape1,const btTransform& wtrs1,
			
 
				+								const b3Vector3& guess,
			
 
				+								sResults& results);
			
 
				+#endif 
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //B3_GJK_EPA2_H
			
 
				+
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.h
@@ -0,0 +1,84 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifndef B3_GJK_PAIR_DETECTOR_H
			
 
				+#define B3_GJK_PAIR_DETECTOR_H
			
 
				+
			
 
				+
			
 
				+#include "Bullet3Common/b3Vector3.h"
			
 
				+#include "Bullet3Common/b3AlignedObjectArray.h"
			
 
				+
			
 
				+class b3Transform;
			
 
				+struct b3GjkEpaSolver2;
			
 
				+class b3VoronoiSimplexSolver;
			
 
				+struct b3ConvexPolyhedronData;
			
 
				+
			
 
				+B3_ATTRIBUTE_ALIGNED16(struct) b3GjkPairDetector
			
 
				+{
			
 
				+	
			
 
				+
			
 
				+	b3Vector3	m_cachedSeparatingAxis;
			
 
				+	b3GjkEpaSolver2*	m_penetrationDepthSolver;
			
 
				+	b3VoronoiSimplexSolver* m_simplexSolver;
			
 
				+
			
 
				+
			
 
				+	bool		m_ignoreMargin;
			
 
				+	b3Scalar	m_cachedSeparatingDistance;
			
 
				+	
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	//some debugging to fix degeneracy problems
			
 
				+	int			m_lastUsedMethod;
			
 
				+	int			m_curIter;
			
 
				+	int			m_degenerateSimplex;
			
 
				+	int			m_catchDegeneracies;
			
 
				+	int			m_fixContactNormalDirection;
			
 
				+
			
 
				+	b3GjkPairDetector(b3VoronoiSimplexSolver* simplexSolver,b3GjkEpaSolver2*	penetrationDepthSolver);
			
 
				+	
			
 
				+	virtual ~b3GjkPairDetector() {};
			
 
				+
			
 
				+	
			
 
				+	//void	getClosestPoints(,Result& output);
			
 
				+	
			
 
				+	void setCachedSeperatingAxis(const b3Vector3& seperatingAxis)
			
 
				+	{
			
 
				+		m_cachedSeparatingAxis = seperatingAxis;
			
 
				+	}
			
 
				+
			
 
				+	const b3Vector3& getCachedSeparatingAxis() const
			
 
				+	{
			
 
				+		return m_cachedSeparatingAxis;
			
 
				+	}
			
 
				+	b3Scalar	getCachedSeparatingDistance() const
			
 
				+	{
			
 
				+		return m_cachedSeparatingDistance;
			
 
				+	}
			
 
				+
			
 
				+	void	setPenetrationDepthSolver(b3GjkEpaSolver2*	penetrationDepthSolver)
			
 
				+	{
			
 
				+		m_penetrationDepthSolver = penetrationDepthSolver;
			
 
				+	}
			
 
				+
			
 
				+	///don't use setIgnoreMargin, it's for Bullet's internal use
			
 
				+	void	setIgnoreMargin(bool ignoreMargin)
			
 
				+	{
			
 
				+		m_ignoreMargin = ignoreMargin;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+bool getClosestPoints(b3GjkPairDetector* gjkDetector, const b3Transform&	transA, const b3Transform&	transB,
			
 
				+	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
			
 
				+	const b3AlignedObjectArray<b3Vector3>& verticesA,
			
 
				+	const b3AlignedObjectArray<b3Vector3>& verticesB,
			
 
				+	b3Scalar maximumDistanceSquared,
			
 
				+	b3Vector3& resultSepNormal,
			
 
				+	float& resultSepDistance,
			
 
				+	b3Vector3& resultPointOnB);
			
 
				+
			
 
				+#endif //B3_GJK_PAIR_DETECTOR_H
			
--- a/include/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h
+++ b/include/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h
@@ -0,0 +1,65 @@
 
				+/*
			
 
				+Bullet Continuous Collision Detection and Physics Library
			
 
				+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
			
 
				+
			
 
				+This software is provided 'as-is', without any express or implied warranty.
			
 
				+In no event will the authors be held liable for any damages arising from the use of this software.
			
 
				+Permission is granted to anyone to use this software for any purpose, 
			
 
				+including commercial applications, and to alter it and redistribute it freely, 
			
 
				+subject to the following restrictions:
			
 
				+
			
 
				+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
			
 
				+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
			
 
				+3. This notice may not be removed or altered from any source distribution.
			
 
				+*/
			
 
				+
			
 
				+///Contains contributions from Disney Studio's
			
 
				+
			
 
				+#ifndef B3_OPTIMIZED_BVH_H
			
 
				+#define B3_OPTIMIZED_BVH_H
			
 
				+
			
 
				+#include "b3QuantizedBvh.h"
			
 
				+
			
 
				+class b3StridingMeshInterface;
			
 
				+
			
 
				+
			
 
				+///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface.
			
 
				+B3_ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh
			
 
				+{
			
 
				+	
			
 
				+public:
			
 
				+	B3_DECLARE_ALIGNED_ALLOCATOR();
			
 
				+
			
 
				+protected:
			
 
				+
			
 
				+public:
			
 
				+
			
 
				+	b3OptimizedBvh();
			
 
				+
			
 
				+	virtual ~b3OptimizedBvh();
			
 
				+
			
 
				+	void	build(b3StridingMeshInterface* triangles,bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax);
			
 
				+
			
 
				+	void	refit(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin,const b3Vector3& aabbMax);
			
 
				+
			
 
				+	void	refitPartial(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin, const b3Vector3& aabbMax);
			
 
				+
			
 
				+	void	updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
			
 
				+
			
 
				+	/// Data buffer MUST be 16 byte aligned
			
 
				+	virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const
			
 
				+	{
			
 
				+		return b3QuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
			
 
				+
			
 
				+	}
			
 
				+
			
 
				+	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
			
 
				+	static b3OptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
			
 
				+
			
 
				+
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //B3_OPTIMIZED_BVH_H
			
 
				+
			
 
				+