Browse Source

Initial refactoring of Direct3D9 shaders. ShaderCompiler removed. Bytecode not cached into files yet.

Lasse Öörni 12 years ago
parent
commit
caf080ecbb
64 changed files with 8033 additions and 1398 deletions
  1. 0 2
      Bin/CompileAllShaders.bat
  2. 0 16
      Bin/CoreData/Shaders/HLSL/Basic.xml
  3. 0 3
      Bin/CoreData/Shaders/HLSL/CopyFrameBuffer.xml
  4. 0 17
      Bin/CoreData/Shaders/HLSL/DeferredLight.xml
  5. 0 10
      Bin/CoreData/Shaders/HLSL/Depth.xml
  6. 3 1
      Bin/CoreData/Shaders/HLSL/Fog.hlsl
  7. 52 39
      Bin/CoreData/Shaders/HLSL/Lighting.hlsl
  8. 0 84
      Bin/CoreData/Shaders/HLSL/LitSolid.xml
  9. 0 17
      Bin/CoreData/Shaders/HLSL/PrepassLight.xml
  10. 2 0
      Bin/CoreData/Shaders/HLSL/Samplers.hlsl
  11. 2 0
      Bin/CoreData/Shaders/HLSL/ScreenPos.hlsl
  12. 0 10
      Bin/CoreData/Shaders/HLSL/Shadow.xml
  13. 0 3
      Bin/CoreData/Shaders/HLSL/Stencil.xml
  14. 2 0
      Bin/CoreData/Shaders/HLSL/Transform.hlsl
  15. 4 0
      Bin/CoreData/Shaders/HLSL/Uniforms.hlsl
  16. 0 16
      Bin/CoreData/Shaders/HLSL/Unlit.xml
  17. 2 4
      Docs/GettingStarted.dox
  18. 0 25
      Docs/Reference.dox
  19. 3 6
      Readme.txt
  20. 1 1
      Source/CMake/Modules/FindBCM_VC.cmake
  21. 1 1
      Source/CMake/Modules/FindDirect3D.cmake
  22. 1 1
      Source/CMake/Modules/FindUrho3D.cmake
  23. 3 3
      Source/CMake/Modules/Urho3D-CMake-magic.cmake
  24. 1 1
      Source/CMake/Toolchains/raspberrypi.toolchain.cmake
  25. 2 0
      Source/CMakeLists.txt
  26. 3 0
      Source/Engine/Graphics/CMakeLists.txt
  27. 12 12
      Source/Engine/Graphics/Direct3D9/D3D9Graphics.cpp
  28. 0 330
      Source/Engine/Graphics/Direct3D9/D3D9Shader.cpp
  29. 0 76
      Source/Engine/Graphics/Direct3D9/D3D9Shader.h
  30. 179 37
      Source/Engine/Graphics/Direct3D9/D3D9ShaderVariation.cpp
  31. 25 26
      Source/Engine/Graphics/Direct3D9/D3D9ShaderVariation.h
  32. 5 5
      Source/Engine/Graphics/OpenGL/OGLShaderVariation.cpp
  33. 2 2
      Source/Engine/Graphics/OpenGL/OGLShaderVariation.h
  34. 2 2
      Source/Engine/Graphics/Shader.cpp
  35. 1009 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBody.cpp
  36. 466 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBody.h
  37. 527 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp
  38. 166 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraint.h
  39. 795 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp
  40. 85 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.h
  41. 578 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp
  42. 56 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h
  43. 133 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp
  44. 44 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h
  45. 89 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp
  46. 47 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.h
  47. 110 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyLink.h
  48. 92 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h
  49. 143 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp
  50. 60 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyPoint2Point.h
  51. 82 0
      Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodySolverConstraint.h
  52. 2079 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btDantzigLCP.cpp
  53. 77 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btDantzigLCP.h
  54. 112 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btDantzigSolver.h
  55. 626 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btMLCPSolver.cpp
  56. 81 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btMLCPSolver.h
  57. 33 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btMLCPSolverInterface.h
  58. 151 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btPATHSolver.h
  59. 80 0
      Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h
  60. 0 1
      Source/ThirdParty/MojoShader/CMakeLists.txt
  61. 5 0
      Source/ThirdParty/MojoShader/mojoshader.h
  62. 0 5
      Source/Tools/CMakeLists.txt
  63. 0 36
      Source/Tools/ShaderCompiler/CMakeLists.txt
  64. 0 606
      Source/Tools/ShaderCompiler/ShaderCompiler.cpp

+ 0 - 2
Bin/CompileAllShaders.bat

@@ -1,2 +0,0 @@
-shadercompiler CoreData/Shaders/HLSL/*.xml CoreData/Shaders/HLSL/SM3 -d SM3
-shadercompiler CoreData/Shaders/HLSL/*.xml CoreData/Shaders/HLSL/SM2

+ 0 - 16
Bin/CoreData/Shaders/HLSL/Basic.xml

@@ -1,16 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <option name="Diff" define="DIFFMAP" />
-        <option name="VCol" define="VERTEXCOLOR" />
-        <variation name="" />
-        <variation name="Skinned" define="SKINNED" />
-        <variation name="Instanced" define="INSTANCED" require="SM3" />
-        <variation name="Billboard" define="BILLBOARD" />
-    </shader>
-    <shader type="ps">
-        <option name="Diff" define="DIFFMAP" exclude="Alpha" />
-        <option name="Alpha" define="ALPHAMAP" exclude="Diff" />
-        <option name="AlphaMask" define="ALPHAMASK" require="DIFFMAP" />
-        <option name="VCol" define="VERTEXCOLOR" />
-    </shader>
-</shaders>

+ 0 - 3
Bin/CoreData/Shaders/HLSL/CopyFrameBuffer.xml

@@ -1,3 +0,0 @@
-<shaders>
-    <shader />
-</shaders>

+ 0 - 17
Bin/CoreData/Shaders/HLSL/DeferredLight.xml

@@ -1,17 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <option name="Ortho" define="ORTHO" />
-        <option name="Dir" define="DIRLIGHT" />
-    </shader>
-    <shader type="ps">
-        <option name="Ortho" define="ORTHO" />
-        <variation name="Dir" define="DIRLIGHT" />
-        <variation name="Spot" define="SPOTLIGHT" />
-        <variation name="Point" define="POINTLIGHT" />
-        <option name="Mask" define="CUBEMASK" require="POINTLIGHT" />
-        <option name="Spec" define="SPECULAR" />
-        <option name="Shadow" define="SHADOW" />
-        <option name="LQ" define="LQSHADOW" require="HWSHADOW" />
-        <option name="HW" define="HWSHADOW" require="SHADOW" />
-    </shader>
-</shaders>

+ 0 - 10
Bin/CoreData/Shaders/HLSL/Depth.xml

@@ -1,10 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <variation name="" />
-        <variation name="Skinned" define="SKINNED" />
-        <variation name="Instanced" define="INSTANCED" require="SM3" />
-    </shader>
-    <shader type="ps">
-        <option name="AlphaMask" define="ALPHAMASK" />
-    </shader>
-</shaders>

+ 3 - 1
Bin/CoreData/Shaders/HLSL/Fog.hlsl

@@ -1,3 +1,4 @@
+#ifdef COMPILEPS
 float3 GetFog(float3 color, float fogFactor)
 {
     return lerp(cFogColor, color, fogFactor);
@@ -18,5 +19,6 @@ float GetHeightFogFactor(float depth, float height)
     float fogFactor = GetFogFactor(depth);
     float heightFogFactor = (height - cFogParams.z) * cFogParams.w;
     heightFogFactor = 1.0 - saturate(exp(-(heightFogFactor * heightFogFactor)));
-	return min(heightFogFactor, fogFactor);
+    return min(heightFogFactor, fogFactor);
 }
+#endif

+ 52 - 39
Bin/CoreData/Shaders/HLSL/Lighting.hlsl

@@ -1,49 +1,11 @@
 #pragma warning(disable:3571)
 
-float GetDiffuse(float3 normal, float3 lightVec, out float3 lightDir)
-{
-    #ifdef DIRLIGHT
-        #ifdef NORMALMAP
-            // In normal mapped forward lighting, the tangent space light vector needs renormalization
-            lightDir = normalize(lightVec);
-        #else
-            lightDir = lightVec;
-        #endif
-
-        return saturate(dot(normal, lightDir));
-    #else
-        float lightDist = length(lightVec);
-        lightDir = lightVec / lightDist;
-        return saturate(dot(normal, lightDir)) * tex1D(sLightRampMap, lightDist).r;
-    #endif
-}
-
-float GetDiffuseVolumetric(float3 lightVec)
-{
-    #ifdef DIRLIGHT
-        return 1.0;
-    #else
-        float lightDist = length(lightVec);
-        return tex1D(sLightRampMap, lightDist).r;
-    #endif
-}
-
-float GetSpecular(float3 normal, float3 eyeVec, float3 lightDir, float specularPower)
-{
-    float3 halfVec = normalize(normalize(eyeVec) + lightDir);
-    return pow(dot(normal, halfVec), specularPower);
-}
-
+#ifdef COMPILEVS
 float3 GetAmbient(float zonePos)
 {
     return cAmbientStartColor + zonePos * cAmbientEndColor;
 }
 
-float GetIntensity(float3 color)
-{
-    return dot(color, float3(0.333, 0.333, 0.333));
-}
-
 #ifdef NUMVERTEXLIGHTS
 float GetVertexLight(int index, float3 worldPos, float3 normal)
 {
@@ -122,6 +84,56 @@ void GetShadowPos(float4 projWorldPos, out float4 shadowPos[NUMCASCADES])
         shadowPos[0] = float4(projWorldPos.xyz - cLightPos.xyz, 0.0);
     #endif
 }
+#endif
+#endif
+
+#ifdef COMPILEPS
+float GetDiffuse(float3 normal, float3 lightVec, out float3 lightDir)
+{
+    #ifdef DIRLIGHT
+        #ifdef NORMALMAP
+            // In normal mapped forward lighting, the tangent space light vector needs renormalization
+            lightDir = normalize(lightVec);
+        #else
+            lightDir = lightVec;
+        #endif
+
+        return saturate(dot(normal, lightDir));
+    #else
+        float lightDist = length(lightVec);
+        lightDir = lightVec / lightDist;
+        return saturate(dot(normal, lightDir)) * tex1D(sLightRampMap, lightDist).r;
+    #endif
+}
+
+float GetDiffuseVolumetric(float3 lightVec)
+{
+    #ifdef DIRLIGHT
+        return 1.0;
+    #else
+        float lightDist = length(lightVec);
+        return tex1D(sLightRampMap, lightDist).r;
+    #endif
+}
+
+float GetSpecular(float3 normal, float3 eyeVec, float3 lightDir, float specularPower)
+{
+    float3 halfVec = normalize(normalize(eyeVec) + lightDir);
+    return pow(dot(normal, halfVec), specularPower);
+}
+
+float GetIntensity(float3 color)
+{
+    return dot(color, float3(0.333, 0.333, 0.333));
+}
+
+#ifdef SHADOW
+
+#ifdef DIRLIGHT
+    #define NUMCASCADES 4
+#else
+    #define NUMCASCADES 1
+#endif
 
 float GetShadow(float4 shadowPos)
 {
@@ -256,4 +268,5 @@ float GetShadowDeferred(float4 projWorldPos, float depth)
         return GetPointShadow(shadowPos);
     #endif
 }
+#endif
 #endif

+ 0 - 84
Bin/CoreData/Shaders/HLSL/LitSolid.xml

@@ -1,84 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <option name="Normal" define="NORMALMAP" />
-        <option name="EnvCube" define="ENVCUBEMAP" />
-        <option name="LightMap" define="LIGHTMAP" exclude="AO" />
-        <option name="AO" define="AO" exclude="LightMap" />
-        <option name="HeightFog" define="HEIGHTFOG" require="SM3" />
-        <variation name="" define="AMBIENT" />
-        <variation name="1VL" define="NUMVERTEXLIGHTS=1" />
-        <variation name="2VL" define="NUMVERTEXLIGHTS=2" />
-        <variation name="3VL" define="NUMVERTEXLIGHTS=3" />
-        <variation name="4VL" define="NUMVERTEXLIGHTS=4" />
-        <variation name="Dir" exclude="EnvCube">
-            <define name="DIRLIGHT" />
-            <define name="PERPIXEL" />
-        </variation>
-        <variation name="Spot" exclude="EnvCube">
-            <define name="SPOTLIGHT" />
-            <define name="PERPIXEL" />
-        </variation>
-        <variation name="Point" exclude="EnvCube">
-            <define name="POINTLIGHT" />
-            <define name="PERPIXEL" />
-        </variation>
-        <option name="Spec" define="SPECULAR" require="PERPIXEL" />
-        <option name="Shadow" define="SHADOW" require="PERPIXEL" />
-        <variation name="" />
-        <variation name="Skinned" define="SKINNED" />
-        <variation name="Instanced" define="INSTANCED" require="SM3" />
-        <variation name="Billboard" define="BILLBOARD" />
-    </shader>
-    <shader type="ps">
-        <option name="Diff" define="DIFFMAP" />
-        <option name="Normal" define="NORMALMAP" />
-        <option name="Packed" define="PACKEDNORMAL" require="NORMALMAP" />
-        <option name="SpecMap" define="SPECMAP" require="DIFFMAP" />
-        <option name="EnvCube" define="ENVCUBEMAP" />
-        <variation name="" />
-        <variation name="LightMap" define="LIGHTMAP" />
-        <variation name="AO" define="AO" />
-        <variation name="Emissive" define="EMISSIVEMAP" />
-        <option name="AlphaMask" define="ALPHAMASK" require="DIFFMAP" />
-        <option name="Ambient" define="AMBIENT" require="PERPIXEL" />
-        <option name="HeightFog" define="HEIGHTFOG" require="SM3" />
-        <variation name="" define="AMBIENT" />
-        <variation name="Dir">
-            <exclude name="EnvCube" />
-            <exclude name="LightMap" />
-            <exclude name="AO" />
-            <exclude name="Emissive" />
-            <define name="DIRLIGHT" />
-            <define name="PERPIXEL" />
-        </variation>
-        <variation name="Spot">
-            <exclude name="EnvCube" />
-            <exclude name="LightMap" />
-            <exclude name="AO" />
-            <exclude name="Emissive" />
-            <define name="SPOTLIGHT" />
-            <define name="PERPIXEL" />
-        </variation>
-        <variation name="Point">
-            <exclude name="EnvCube" />
-            <exclude name="LightMap" />
-            <exclude name="AO" />
-            <exclude name="Emissive" />
-            <define name="POINTLIGHT" />
-            <define name="PERPIXEL" />
-        </variation>
-        <variation name="Prepass" define="PREPASS">
-            <exclude name="EnvCube" />
-            <exclude name="LightMap" />
-            <exclude name="AO" />
-            <exclude name="Emissive" />
-        </variation>
-        <variation name="Material" define="MATERIAL" />
-        <variation name="Deferred" define="DEFERRED" />
-        <option name="Mask" define="CUBEMASK" require="POINTLIGHT" />
-        <option name="Spec" define="SPECULAR" require="PERPIXEL" />
-        <option name="Shadow" define="SHADOW" require="PERPIXEL" />
-        <option name="LQ" define="LQSHADOW" require="HWSHADOW" />
-        <option name="HW" define="HWSHADOW" require="SHADOW" />
-    </shader>
-</shaders>

+ 0 - 17
Bin/CoreData/Shaders/HLSL/PrepassLight.xml

@@ -1,17 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <option name="Ortho" define="ORTHO" />
-        <option name="Dir" define="DIRLIGHT" />
-    </shader>
-    <shader type="ps">
-        <option name="Ortho" define="ORTHO" />
-        <variation name="Dir" define="DIRLIGHT" />
-        <variation name="Spot" define="SPOTLIGHT" />
-        <variation name="Point" define="POINTLIGHT" />
-        <option name="Mask" define="CUBEMASK" require="POINTLIGHT" />
-        <option name="Spec" define="SPECULAR" />
-        <option name="Shadow" define="SHADOW" />
-        <option name="LQ" define="LQSHADOW" require="HWSHADOW" />
-        <option name="HW" define="HWSHADOW" require="SHADOW" />
-    </shader>
-</shaders>

+ 2 - 0
Bin/CoreData/Shaders/HLSL/Samplers.hlsl

@@ -1,3 +1,4 @@
+#ifdef COMPILEPS
 sampler2D sDiffMap : register(S0);
 samplerCUBE sDiffCubeMap : register(S0);
 sampler2D sAlbedoBuffer : register(S0);
@@ -38,3 +39,4 @@ float3 DecodeNormal(float4 normalInput)
         return normalize(normalInput.rgb * 2.0 - 1.0);
     #endif
 }
+#endif

+ 2 - 0
Bin/CoreData/Shaders/HLSL/ScreenPos.hlsl

@@ -1,3 +1,4 @@
+#ifdef COMPILEVS
 float4 GetScreenPos(float4 clipPos)
 {
     return float4(
@@ -47,3 +48,4 @@ float3 GetNearRay(float4 clipPos)
 
     return mul(viewRay, cCameraRot) * cDepthMode.z;
 }
+#endif

+ 0 - 10
Bin/CoreData/Shaders/HLSL/Shadow.xml

@@ -1,10 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <variation name="" />
-        <variation name="Skinned" define="SKINNED" />
-        <variation name="Instanced" define="INSTANCED" require="SM3" />
-    </shader>
-    <shader type="ps">
-        <option name="AlphaMask" define="ALPHAMASK" />
-    </shader>
-</shaders>

+ 0 - 3
Bin/CoreData/Shaders/HLSL/Stencil.xml

@@ -1,3 +0,0 @@
-<shaders>
-    <shader />
-</shaders>

+ 2 - 0
Bin/CoreData/Shaders/HLSL/Transform.hlsl

@@ -1,3 +1,4 @@
+#ifdef COMPILEVS
 #ifdef SKINNED
 float4x3 GetSkinMatrix(float4 blendWeights, int4 blendIndices)
 {
@@ -59,3 +60,4 @@ float3 GetBillboardNormal()
 #endif
 
 #define GetWorldTangent(modelMatrix) normalize(mul(iTangent.xyz, (float3x3)modelMatrix))
+#endif

+ 4 - 0
Bin/CoreData/Shaders/HLSL/Uniforms.hlsl

@@ -1,3 +1,4 @@
+#ifdef COMPILEVS
 // Vertex shader uniforms
 uniform float3 cAmbientStartColor;
 uniform float3 cAmbientEndColor;
@@ -25,7 +26,9 @@ uniform float4x4 cLightMatrices[4];
 #ifdef NUMVERTEXLIGHTS
     uniform float4 cVertexLights[4*3];
 #endif
+#endif
 
+#ifdef COMPILEPS
 // Pixel shader uniforms
 uniform float3 cAmbientColor;
 uniform float cDeltaTimePS;
@@ -52,3 +55,4 @@ uniform float4 cShadowSplits;
 #else
     uniform float4x4 cLightMatricesPS[3];
 #endif
+#endif

+ 0 - 16
Bin/CoreData/Shaders/HLSL/Unlit.xml

@@ -1,16 +0,0 @@
-<shaders>
-    <shader type="vs">
-        <option name="VCol" define="VERTEXCOLOR" />
-        <option name="HeightFog" define="HEIGHTFOG" require="SM3" />
-        <variation name="" />
-        <variation name="Skinned" define="SKINNED" />
-        <variation name="Instanced" define="INSTANCED" require="SM3" />
-        <variation name="Billboard" define="BILLBOARD" />
-    </shader>
-    <shader type="ps">
-        <option name="Diff" define="DIFFMAP" />
-        <option name="VCol" define="VERTEXCOLOR" />
-        <option name="AlphaMask" define="ALPHAMASK" require="DIFFMAP" />
-        <option name="HeightFog" define="HEIGHTFOG" require="SM3" />
-    </shader>
-</shaders>

+ 2 - 4
Docs/GettingStarted.dox

@@ -115,11 +115,9 @@ Currently CMake build configuration has been set to compile Urho3D as 32bit by d
 
 \section Building_Shaders Compiling Direct3D shaders
 
-When building with the Windows 8 SDK, copy d3dcompiler_46.dll from C:/Program Files (x86)/Windows Kits/8.0/bin/x86 to Urho3D Bin directory so that the ShaderCompiler program will run correctly.
+When building with the Windows 8 SDK, copy d3dcompiler_46.dll from C:/Program Files (x86)/Windows Kits/8.0/bin/x86 to Urho3D Bin directory so that Urho3D executables will run correctly.
 
-To make the Urho3D examples start faster on Windows & Direct3D9 mode, run CompileAllShaders.bat from the Bin directory first.
-
-Note that you can also force an OpenGL mode build on Windows by using the CMake option in the table below; OpenGL does not need a separate shader compilation step or utility.
+Note that you can also force an OpenGL mode build on Windows by using the CMake option in the table below; OpenGL does not depend on a separate shader compiler DLL.
 
 \section Building_Docs Documentation build
 

+ 0 - 25
Docs/Reference.dox

@@ -1831,31 +1831,6 @@ The output files are saved with the extension .asc (compiled AngelScript.) Binar
 
 The script API dump mode can be used to replace the 'ScriptAPI.dox' file in the 'Docs' directory. If the output file name is not provided then the script API would be dumped to standard output (console) instead.
 
-\section Tools_ShaderCompiler ShaderCompiler
-
-Compiles HLSL shaders using an XML definition file that describes the shader permutations, and their associated HLSL preprocessor defines.
-
-The output consists of shader bytecode for each permutation, as well as information of the constant parameters and texture units used. See \ref FileFormats_Shader "Binary shader format" for details.
-
-Usage:
-
-\verbatim
-ShaderCompiler <definitionfile> <outputpath> [options]
-
-Options:
--t <VS|PS>  Compile only vertex or pixel shaders, by default compile both
--v <name>   Compile only the shader variation with name
--d <define> Add a define. Add SM3 to compile for Shader Model 3
-
-If output path is not specified, shader binaries will be output into the same
-directory as the definition file. Specify a wildcard to compile multiple
-shaders.
-\endverbatim
-
-The D3DX library from the DirectX runtime or SDK needs to be installed. Note that when running in Direct3D9 mode, the engine will automatically invoke ShaderCompiler if it can not find a shader in binary form. Depending on shader complexity this can take a substantial amount of time. To avoid this, execute the file CompileAllShaders.bat in the Bin directory to precompile all shader permutations. It is also highly recommended to not ship ShaderCompiler with Urho3D applications, but to instead precompile all shaders. After precompiling you can delete all .hlsl files from the CoreData directory of the shipping build.
-
-Note that GLSL shaders in the Bin/CoreData/Shaders/GLSL directory also use similar XML definition files, but no precompiling tool exists for them; they are always compiled during runtime.
-
 \page Unicode Unicode support
 
 The String class supports UTF-8 encoding. However, by default strings are treated as a sequence of bytes without regard to the encoding. There is a separate

+ 3 - 6
Readme.txt

@@ -363,14 +363,11 @@ Compiling Direct3D shaders
 
 When building with the Windows 8 SDK, copy d3dcompiler_46.dll from
 C:/Program Files (x86)/Windows Kits/8.0/bin/x86 to Urho3D Bin directory so that
-the ShaderCompiler program will run correctly.
-
-To make the Urho3D examples start faster on Windows & Direct3D9 mode, run
-CompileAllShaders.bat from the Bin directory first.
+Urho3D executables will run correctly.
 
 Note that you can also force an OpenGL mode build on Windows by using the CMake
-option in the table below; OpenGL does not need a separate shader compilation 
-step or utility.
+option in the table below; OpenGL does not depend on a separate shader compiler
+DLL.
 
 
 Documentation build

+ 1 - 1
Source/CMake/Modules/FindBCM_VC.cmake

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2008-2013 the Urho3D project.
+# Copyright (c) 2008-2014 the Urho3D project.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal

+ 1 - 1
Source/CMake/Modules/FindDirect3D.cmake

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2008-2013 the Urho3D project.
+# Copyright (c) 2008-2014 the Urho3D project.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal

+ 1 - 1
Source/CMake/Modules/FindUrho3D.cmake

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2008-2013 the Urho3D project.
+# Copyright (c) 2008-2014 the Urho3D project.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal

+ 3 - 3
Source/CMake/Modules/Urho3D-CMake-magic.cmake

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2008-2013 the Urho3D project.
+# Copyright (c) 2008-2014 the Urho3D project.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -600,9 +600,9 @@ macro (define_dependency_libs TARGET)
             endif ()
         else ()
             if (IS_ABSOLUTE ${DIRECT3D_LIBRARIES})
-                list (APPEND ABSOLUTE_PATH_LIBS ${DIRECT3D_LIBRARIES})
+                list (APPEND ABSOLUTE_PATH_LIBS ${DIRECT3D_LIBRARIES} ${DIRECT3D_COMPILER_LIBRARIES})
             else ()
-                list (APPEND LINK_LIBS_ONLY ${DIRECT3D_LIBRARIES})
+                list (APPEND LINK_LIBS_ONLY ${DIRECT3D_LIBRARIES} ${DIRECT3D_COMPILER_LIBRARIES})
             endif ()
         endif ()
 

+ 1 - 1
Source/CMake/Toolchains/raspberrypi.toolchain.cmake

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2008-2013 the Urho3D project.
+# Copyright (c) 2008-2014 the Urho3D project.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal

+ 2 - 0
Source/CMakeLists.txt

@@ -92,6 +92,8 @@ endif ()
 if (NOT IOS AND NOT ANDROID AND NOT RASPI)
     if (USE_OPENGL)
         add_subdirectory (ThirdParty/GLEW)
+    else ()
+        add_subdirectory (ThirdParty/MojoShader)
     endif ()
     add_subdirectory (ThirdParty/LibCpuId)
 endif ()

+ 3 - 0
Source/Engine/Graphics/CMakeLists.txt

@@ -32,3 +32,6 @@ define_source_files (GLOB_CPP_PATTERNS *.cpp ${GRAPHICS_SYS_DIR}/*.cpp GLOB_H_PA
 if (USE_OPENGL AND NOT IOS AND NOT ANDROID AND NOT RASPI)
     set (ENGINE_INCLUDE_DIRS_ONLY ${ENGINE_INCLUDE_DIRS_ONLY} ../ThirdParty/GLEW PARENT_SCOPE)
 endif ()
+if (NOT USE_OPENGL)
+    set (ENGINE_INCLUDE_DIRS_ONLY ${ENGINE_INCLUDE_DIRS_ONLY} ../ThirdParty/MojoShader PARENT_SCOPE)
+endif ()

+ 12 - 12
Source/Engine/Graphics/Direct3D9/D3D9Graphics.cpp

@@ -1026,18 +1026,18 @@ void Graphics::SetShaders(ShaderVariation* vs, ShaderVariation* ps)
         }
         
         // Create the shader now if not yet created. If already attempted, do not retry
-        if (vs && !vs->IsCreated())
+        if (vs && !vs->IsCompiled())
         {
-            if (!vs->IsFailed())
+            if (vs->GetCompilerOutput().Empty())
             {
-                PROFILE(CreateVertexShader);
-                
+                PROFILE(CompileVertexShader);
+
                 bool success = vs->Create();
                 if (success)
-                    LOGDEBUG("Created vertex shader " + vs->GetName());
+                    LOGDEBUG("Compiled vertex shader " + vs->GetName());
                 else
                 {
-                    LOGERROR("Failed to create vertex shader " + vs->GetName());
+                    LOGERROR("Failed to compile vertex shader " + vs->GetName() + ":\n" + vs->GetCompilerOutput());
                     vs = 0;
                 }
             }
@@ -1071,18 +1071,18 @@ void Graphics::SetShaders(ShaderVariation* vs, ShaderVariation* ps)
                 i->second_.register_ = M_MAX_UNSIGNED;
         }
         
-        if (ps && !ps->IsCreated())
+        if (ps && !ps->IsCompiled())
         {
-            if (!ps->IsFailed())
+            if (ps->GetCompilerOutput().Empty())
             {
-                PROFILE(CreatePixelShader);
-                
+                PROFILE(CompilePixelShader);
+
                 bool success = ps->Create();
                 if (success)
-                    LOGDEBUG("Created pixel shader " + ps->GetName());
+                    LOGDEBUG("Compiled pixel shader " + ps->GetName());
                 else
                 {
-                    LOGERROR("Failed to create pixel shader " + ps->GetName());
+                    LOGERROR("Failed to compile pixel shader " + ps->GetName() + ":\n" + ps->GetCompilerOutput());
                     ps = 0;
                 }
             }

+ 0 - 330
Source/Engine/Graphics/Direct3D9/D3D9Shader.cpp

@@ -1,330 +0,0 @@
-//
-// Copyright (c) 2008-2014 the Urho3D project.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-
-#include "Precompiled.h"
-#include "Context.h"
-#include "Deserializer.h"
-#include "File.h"
-#include "FileSystem.h"
-#include "Graphics.h"
-#include "GraphicsImpl.h"
-#include "Log.h"
-#include "Profiler.h"
-#include "ResourceCache.h"
-#include "Shader.h"
-#include "ShaderVariation.h"
-#include "XMLFile.h"
-
-#include "DebugNew.h"
-
-namespace Urho3D
-{
-
-Shader::Shader(Context* context) :
-    Resource(context),
-    sourceModifiedTime_(0)
-{
-}
-
-Shader::~Shader()
-{
-    ResourceCache* cache = GetSubsystem<ResourceCache>();
-    if (cache)
-        cache->ResetDependencies(this);
-}
-
-void Shader::RegisterObject(Context* context)
-{
-    context->RegisterFactory<Shader>();
-}
-
-bool Shader::Load(Deserializer& source)
-{
-    PROFILE(LoadShader);
-    
-    ResourceCache* cache = GetSubsystem<ResourceCache>();
-    cache->ResetDependencies(this);
-    
-    Graphics* graphics = GetSubsystem<Graphics>();
-    if (!graphics)
-        return false;
-    
-    if (graphics->GetSM3Support())
-    {
-        vsExtension_ = ".vs3";
-        psExtension_ = ".ps3";
-        subDir_ = "SM3/";
-    }
-    else
-    {
-        vsExtension_ = ".vs2";
-        psExtension_ = ".ps2";
-        subDir_ = "SM2/";
-    }
-    
-    // Get absolute file name of the shader in case we need to invoke ShaderCompiler. This only works if the shader was not
-    // loaded from a package file
-    fullFileName_.Clear();
-    sourceModifiedTime_ = 0;
-    
-    File* sourceFile = dynamic_cast<File*>(&source);
-    if (sourceFile && !sourceFile->IsPackaged())
-    {
-        PROFILE(CheckTimestamps);
-        
-        FileSystem* fileSystem = GetSubsystem<FileSystem>();
-        
-        if (fileSystem && !fileSystem->HasRegisteredPaths())
-        {
-            fullFileName_ = cache->GetResourceFileName(GetName());
-            if (!fullFileName_.Empty())
-            {
-                // Get last modified time of the main HLSL source file
-                String path, fileName, extension;
-                SplitPath(fullFileName_, path, fileName, extension);
-                String hlslFileName = path + fileName + ".hlsl";
-                sourceModifiedTime_ = fileSystem->GetLastModifiedTime(hlslFileName);
-                
-                String resourcePath = GetPath(GetName());
-                cache->StoreResourceDependency(this, resourcePath + fileName + ".hlsl");
-                
-                // Check also timestamps of any included files and the shader description file
-                if (sourceModifiedTime_)
-                {
-                    SharedPtr<File> file(new File(context_, hlslFileName));
-                    while (file->IsOpen() && !file->IsEof())
-                    {
-                        String line = file->ReadLine();
-                        if (line.StartsWith("#include"))
-                        {
-                            String includeName = line.Substring(9).Replaced("\"", "").Trimmed();
-                            String includeFullName = path + includeName;
-                            cache->StoreResourceDependency(this, resourcePath + includeName);
-                            
-                            unsigned includeFileTime = fileSystem->GetLastModifiedTime(includeFullName);
-                            if (includeFileTime > sourceModifiedTime_)
-                                sourceModifiedTime_ = includeFileTime;
-                        }
-                    }
-                    
-                    unsigned descriptionFileTime = fileSystem->GetLastModifiedTime(fullFileName_);
-                    if (descriptionFileTime > sourceModifiedTime_)
-                        sourceModifiedTime_ = descriptionFileTime;
-                }
-                else
-                {
-                    // If the HLSL file was not found, do not attempt to compile shaders
-                    fullFileName_.Clear();
-                }
-            }
-        }
-    }
-    
-    SharedPtr<XMLFile> xml(new XMLFile(context_));
-    if (!xml->Load(source))
-        return false;
-    
-    XMLElement shaders = xml->GetRoot("shaders");
-    if (!shaders)
-    {
-        LOGERROR("No shaders element in " + source.GetName());
-        return false;
-    }
-    
-    Vector<String> globalDefines;
-    Vector<String> globalDefineValues;
-    
-    if (graphics->GetSM3Support())
-    {
-        globalDefines.Push("SM3");
-        globalDefineValues.Push("1");
-    }
-    
-    {
-        PROFILE(ParseShaderDefinition);
-        
-        if (!vsParser_.Parse(VS, shaders, false, globalDefines, globalDefineValues))
-        {
-            LOGERROR("VS: " + vsParser_.GetErrorMessage());
-            return false;
-        }
-        if (!psParser_.Parse(PS, shaders, false, globalDefines, globalDefineValues))
-        {
-            LOGERROR("PS: " + psParser_.GetErrorMessage());
-            return false;
-        }
-    }
-    
-    // If variations had already been created, clear their bytecode
-    for (HashMap<StringHash, SharedPtr<ShaderVariation> >::Iterator i = vsVariations_.Begin(); i != vsVariations_.End(); ++i)
-    {
-        i->second_->Release();
-        i->second_->SetByteCode(SharedArrayPtr<unsigned char>());
-    }
-    for (HashMap<StringHash, SharedPtr<ShaderVariation> >::Iterator i = psVariations_.Begin(); i != psVariations_.End(); ++i)
-    {
-        i->second_->Release();
-        i->second_->SetByteCode(SharedArrayPtr<unsigned char>());
-    }
-    
-    SetMemoryUse(sizeof(Shader) + 2 * sizeof(ShaderParser) + (vsVariations_.Size() + psVariations_.Size()) *
-        sizeof(ShaderVariation));
-    return true;
-}
-
-ShaderVariation* Shader::GetVariation(ShaderType type, const String& name)
-{
-    Graphics* graphics = GetSubsystem<Graphics>();
-    if (!graphics)
-        return 0;
-    
-    StringHash nameHash(name);
-    ShaderParser& parser = type == VS ? vsParser_ : psParser_;
-    HashMap<StringHash, SharedPtr<ShaderVariation> >& variations = type == VS ? vsVariations_ : psVariations_;
-    
-    if (parser.HasCombination(name))
-    {
-        HashMap<StringHash, SharedPtr<ShaderVariation> >::Iterator i = variations.Find(nameHash);
-        // Create the shader variation now if not created yet
-        if (i == variations.End())
-        {
-            String path, fileName, extension;
-            SplitPath(GetName(), path, fileName, extension);
-            
-            String compiledShaderName = path + subDir_ + fileName;
-            if (!name.Empty())
-                compiledShaderName += "_" + name;
-            compiledShaderName += type == VS ? vsExtension_ : psExtension_;
-            
-            i = variations.Insert(MakePair(nameHash, SharedPtr<ShaderVariation>(new ShaderVariation(this, type))));
-            i->second_->SetName(compiledShaderName);
-            
-            SetMemoryUse(GetMemoryUse() + sizeof(ShaderVariation));
-        }
-        
-        return i->second_;
-    }
-    else
-        return 0;
-}
-
-bool Shader::PrepareVariation(ShaderVariation* variation)
-{
-    if (!variation)
-        return false;
-    
-    FileSystem* fileSystem = GetSubsystem<FileSystem>();
-    Graphics* graphics = GetSubsystem<Graphics>();
-    ResourceCache* cache = GetSubsystem<ResourceCache>();
-    if (!fileSystem || !graphics || !cache)
-        return false;
-    
-    String shaderName = variation->GetName();
-    String variationName = GetFileName(shaderName);
-    unsigned pos = variationName.Find('_');
-    if (pos != String::NPOS)
-        variationName = variationName.Substring(pos + 1);
-    else
-        variationName.Clear();
-    
-    if (!fullFileName_.Empty())
-    {
-        if (fileSystem->GetLastModifiedTime(cache->GetResourceFileName(shaderName)) < sourceModifiedTime_)
-        {
-            PROFILE(CompileShader);
-            
-            LOGINFO("Compiling shader " + shaderName);
-            
-            Vector<String> arguments;
-            arguments.Push("\"" + fullFileName_ + "\"");
-            arguments.Push("\"" + GetPath(fullFileName_) + subDir_ + "\"");
-            arguments.Push(variation->GetShaderType() == VS ? "-t VS" : "-t PS");
-            arguments.Push("-v \"" + variationName + "\"");
-            if (graphics->GetSM3Support())
-                arguments.Push("-d SM3");
-            
-            if (fileSystem->SystemRun(fileSystem->GetProgramDir() + "ShaderCompiler", arguments) != 0)
-            {
-                LOGERROR("Failed to compile shader " + shaderName);
-                return false;
-            }
-        }
-    }
-    
-    SharedPtr<File> file(cache->GetFile(shaderName));
-    if (!file)
-        return false;
-    
-    if (file->ReadFileID() != "USHD")
-    {
-        LOGERROR(shaderName + " is not a valid shader bytecode file");
-        return false;
-    }
-    /// \todo Check that shader type and model match
-    unsigned short shaderType = file->ReadUShort();
-    unsigned short shaderModel = file->ReadUShort();
-    
-    unsigned numParameters = file->ReadUInt();
-    for (unsigned i = 0; i < numParameters; ++i)
-    {
-        String paramName = file->ReadString();
-        unsigned reg = file->ReadUByte();
-        unsigned regCount = file->ReadUByte();
-        
-        StringHash nameHash(paramName);
-        ShaderParameter parameter(variation->GetShaderType(), reg, regCount);
-        variation->AddParameter(nameHash, parameter);
-        
-        // Register the parameter globally
-        graphics->RegisterShaderParameter(nameHash, parameter);
-    }
-    
-    unsigned numTextureUnits = file->ReadUInt();
-    for (unsigned i = 0; i < numTextureUnits; ++i)
-    {
-        String unitName = file->ReadString();
-        unsigned sampler = file->ReadUByte();
-        
-        TextureUnit tuIndex = graphics->GetTextureUnit(unitName);
-        if (tuIndex < MAX_TEXTURE_UNITS)
-            variation->AddTextureUnit(tuIndex);
-        else if (sampler < MAX_TEXTURE_UNITS)
-            variation->AddTextureUnit((TextureUnit)sampler);
-    }
-    
-    unsigned byteCodeSize = file->ReadUInt();
-    if (byteCodeSize)
-    {
-        SharedArrayPtr<unsigned char> byteCode(new unsigned char[byteCodeSize]);
-        file->Read(byteCode.Get(), byteCodeSize);
-        variation->SetByteCode(byteCode);
-        SetMemoryUse(GetMemoryUse() + byteCodeSize);
-        return true;
-    }
-    else
-    {
-        LOGERROR("Shader " + shaderName + " has zero length bytecode");
-        return false;
-    }
-}
-
-}

+ 0 - 76
Source/Engine/Graphics/Direct3D9/D3D9Shader.h

@@ -1,76 +0,0 @@
-//
-// Copyright (c) 2008-2014 the Urho3D project.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-
-#pragma once
-
-#include "HashSet.h"
-#include "Resource.h"
-#include "ShaderParser.h"
-
-namespace Urho3D
-{
-
-class ShaderVariation;
-
-/// %Shader resource consisting of several shader variations.
-class URHO3D_API Shader : public Resource
-{
-    OBJECT(Shader);
-    
-public:
-    /// Construct.
-    Shader(Context* context);
-    /// Destruct.
-    virtual ~Shader();
-    /// Register object factory.
-    static void RegisterObject(Context* context);
-    
-    /// Load resource. Return true if successful.
-    virtual bool Load(Deserializer& source);
-    
-    /// Return a named variation. Return null if not found.
-    ShaderVariation* GetVariation(ShaderType type, const String& name);
-    /// Prepare a shader variation by either compiling it or loading the already compiled bytecode.
-    bool PrepareVariation(ShaderVariation* variation);
-    
-private:
-    /// Vertex shader variation parser.
-    ShaderParser vsParser_;
-    /// Pixel shader variation parser.
-    ShaderParser psParser_;
-    /// Vertex shader variations.
-    HashMap<StringHash, SharedPtr<ShaderVariation> > vsVariations_;
-    /// Pixel shader variations.
-    HashMap<StringHash, SharedPtr<ShaderVariation> > psVariations_;
-    /// Absolute filename of the shader description file.
-    String fullFileName_;
-    /// Compiled vertex shader file extension.
-    String vsExtension_;
-    /// Compiled vertex shader file extension.
-    String psExtension_;
-    /// Subdirectory for compiled shaders.
-    String subDir_;
-    /// Shader source last modified time.
-    unsigned sourceModifiedTime_;
-};
-
-}

+ 179 - 37
Source/Engine/Graphics/Direct3D9/D3D9ShaderVariation.cpp

@@ -26,6 +26,10 @@
 #include "Shader.h"
 #include "ShaderVariation.h"
 
+#include <windows.h>
+#include <d3dcompiler.h>
+#include <mojoshader.h>
+
 #include "DebugNew.h"
 
 namespace Urho3D
@@ -34,10 +38,11 @@ namespace Urho3D
 ShaderVariation::ShaderVariation(Shader* owner, ShaderType type) :
     GPUObject(owner->GetSubsystem<Graphics>()),
     owner_(owner),
-    shaderType_(type),
-    failed_(false)
+    type_(type),
+    compiled_(false)
 {
-    ClearParameters();
+    for (unsigned i = 0; i < MAX_TEXTURE_UNITS; ++i)
+        useTextureUnit_[i] = false;
 }
 
 ShaderVariation::~ShaderVariation()
@@ -52,27 +57,119 @@ bool ShaderVariation::Create()
     if (!graphics_)
         return false;
     
-    // Load bytecode and/or compile shader if necessary
-    if (!byteCode_ && owner_)
-        owner_->PrepareVariation(this);
+    // Compile shader if don't have loadable bytecode
+    PODVector<unsigned> byteCode;
+    
+    if (byteCode.Empty())
+    {
+        Vector<String> defines = defines_.Split(' ');
+        
+        // Set the entrypoint, profile and flags according to the shader being compiled
+        const char* entryPoint = 0;
+        const char* profile = 0;
+        unsigned flags = D3DCOMPILE_OPTIMIZATION_LEVEL3;
+        bool useSM3 = graphics_->GetSM3Support();
+        
+        if (type_ == VS)
+        {
+            entryPoint = "VS";
+            defines.Push("COMPILEVS");
+            if (!useSM3)
+                profile = "vs_2_0";
+            else
+                profile = "vs_3_0";
+        }
+        else
+        {
+            entryPoint = "PS";
+            defines.Push("COMPILEPS");
+            if (!useSM3)
+                profile = "ps_2_0";
+            else
+            {
+                profile = "ps_3_0";
+                flags |= D3DCOMPILE_PREFER_FLOW_CONTROL;
+            }
+        }
+        
+        if (useSM3)
+            defines.Push("SM3");
+        
+        // Collect defines into macros
+        Vector<String> defineValues;
+        PODVector<D3D_SHADER_MACRO> macros;
+        
+        for (unsigned i = 0; i < defines.Size(); ++i)
+        {
+            unsigned equalsPos = defines[i].Find('=');
+            if (equalsPos != String::NPOS)
+            {
+                defineValues.Push(defines[i].Substring(equalsPos + 1));
+                defines[i].Resize(equalsPos);
+            }
+            else
+                defineValues.Push("1");
+        }
+        for (unsigned i = 0; i < defines.Size(); ++i)
+        {
+            D3D_SHADER_MACRO macro;
+            macro.Name = defines[i].CString();
+            macro.Definition = defineValues[i].CString();
+            macros.Push(macro);
+        }
+        
+        D3D_SHADER_MACRO endMacro;
+        endMacro.Name = 0;
+        endMacro.Definition = 0;
+        macros.Push(endMacro);
+        
+        // Compile using D3DCompile
+        const String& sourceCode = owner_->GetSourceCode(type_);
+        LPD3DBLOB shaderCode = 0;
+        LPD3DBLOB errorMsgs = 0;
+        
+        if (FAILED(D3DCompile(sourceCode.CString(), sourceCode.Length(), owner_->GetName().CString(), &macros.Front(), 0,
+            entryPoint, profile, flags, 0, &shaderCode, &errorMsgs)))
+            compilerOutput_ = String((const char*)errorMsgs->GetBufferPointer(), errorMsgs->GetBufferSize());
+        else
+        {
+            // Inspect the produced bytecode using MojoShader, then strip and store it
+            unsigned char* bufData = (unsigned char*)shaderCode->GetBufferPointer();
+            unsigned bufSize = shaderCode->GetBufferSize();
+            ParseParameters(bufData, bufSize);
+            CopyStrippedCode(byteCode, bufData, bufSize);
+        }
+
+        if (shaderCode)
+            shaderCode->Release();
+        if (errorMsgs)
+            errorMsgs->Release();
+        if (byteCode.Empty())
+            return false;
+    }
     
+    // Then create shader from the bytecode
     IDirect3DDevice9* device = graphics_->GetImpl()->GetDevice();
-    if (shaderType_ == VS)
+    if (type_ == VS)
     {
-        if (!byteCode_ || !device || FAILED(device->CreateVertexShader(
-            (const DWORD*)byteCode_.Get(),
+        if (!device || FAILED(device->CreateVertexShader(
+            (const DWORD*)&byteCode[0],
             (IDirect3DVertexShader9**)&object_)))
-            failed_ = true;
+            compilerOutput_ = "Could not create vertex shader";
+        else
+            compiled_ = true;
     }
     else
     {
-        if (!byteCode_ || !device || FAILED(device->CreatePixelShader(
-            (const DWORD*)byteCode_.Get(),
+        if (!device || FAILED(device->CreatePixelShader(
+            (const DWORD*)&byteCode[0],
             (IDirect3DPixelShader9**)&object_)))
-            failed_ = true;
+            compilerOutput_ = "Could not create pixel shader";
+        else
+            compiled_ = true;
     }
     
-    return !failed_;
+    return compiled_;
 }
 
 void ShaderVariation::Release()
@@ -82,7 +179,7 @@ void ShaderVariation::Release()
         if (!graphics_)
             return;
         
-        if (shaderType_ == VS)
+        if (type_ == VS)
         {
             if (graphics_->GetVertexShader() == this)
                 graphics_->SetShaders(0, 0);
@@ -98,46 +195,91 @@ void ShaderVariation::Release()
         }
         
         object_ = 0;
+        compiled_ = false;
+        compilerOutput_.Clear();
+        
+        for (unsigned i = 0; i < MAX_TEXTURE_UNITS; ++i)
+            useTextureUnit_[i] = false;
+        parameters_.Clear();
     }
-    
-    failed_ = false;
 }
 
 void ShaderVariation::SetName(const String& name)
 {
-    name_ = name;
+    name_ = name.Trimmed().Replaced(' ', '_');
 }
 
-void ShaderVariation::SetByteCode(const SharedArrayPtr<unsigned char>& byteCode)
+void ShaderVariation::SetDefines(const String& defines)
 {
-    byteCode_ = byteCode;
+    defines_ = defines;
 }
 
-void ShaderVariation::AddParameter(StringHash param, const ShaderParameter& definition)
+void ShaderVariation::ParseParameters(unsigned char* bufData, unsigned bufSize)
 {
-    parameters_[param] = definition;
-}
+    MOJOSHADER_parseData const *parseData = MOJOSHADER_parse("bytecode", bufData, bufSize, 0, 0, 0, 0, 0, 0, 0);
 
-void ShaderVariation::AddTextureUnit(TextureUnit unit)
-{
-    useTextureUnit_[unit] = true;
-}
+    for (int i = 0; i < parseData->symbol_count; i++)
+    {
+        MOJOSHADER_symbol const& symbol = parseData->symbols[i];
 
-void ShaderVariation::ClearParameters()
-{
-    parameters_.Clear();
-    for (unsigned i = 0; i < MAX_TEXTURE_UNITS; ++i)
-        useTextureUnit_[i] = false;
-}
+        String name(symbol.name);
+        unsigned reg = symbol.register_index;
+        unsigned regCount = symbol.register_count;
 
-void ShaderVariation::OptimizeParameters()
-{
+        // Check if the parameter is a constant or a texture sampler
+        bool isSampler = (name[0] == 's');
+        name = name.Substring(1);
+        
+        if (isSampler)
+        {
+
+            // Skip if it's a G-buffer sampler, which are aliases for the standard texture units
+            if (reg < MAX_TEXTURE_UNITS)
+            {
+                if (name != "AlbedoBuffer" && name != "NormalBuffer" && name != "DepthBuffer" && name != "LightBuffer")
+                    useTextureUnit_[reg] = true;
+            }
+        }
+        else
+        {
+            ShaderParameter newParam(type_, reg, regCount);
+            HashMap<StringHash, ShaderParameter>::Iterator i = parameters_.Insert(MakePair(StringHash(name), newParam));
+            graphics_->RegisterShaderParameter(i->first_, i->second_);
+        }
+    }
+    
+    MOJOSHADER_freeParseData(parseData);
+    
+    // Optimize shader parameter lookup by rehashing to next power of two
     parameters_.Rehash(NextPowerOfTwo(parameters_.Size()));
 }
 
-bool ShaderVariation::IsCreated() const
+void ShaderVariation::CopyStrippedCode(PODVector<unsigned>& dest, unsigned char* bufData, unsigned bufSize)
 {
-    return object_ != 0;
+    unsigned const D3DSIO_COMMENT = 0xFFFE;
+    unsigned* srcWords = (unsigned*)bufData;
+    unsigned srcWordSize = bufSize >> 2;
+    
+    dest.Clear();
+    
+    for (unsigned i = 0; i < srcWordSize; ++i)
+    {
+        unsigned opcode = srcWords[i] & 0xffff;
+        unsigned paramLength = (srcWords[i] & 0x0f000000) >> 24;
+        unsigned commentLength = srcWords[i] >> 16;
+        
+        // For now, skip comment only at fixed position to prevent false positives
+        if (i == 1 && opcode == D3DSIO_COMMENT)
+        {
+            // Skip the comment
+            i += commentLength;
+        }
+        else
+        {
+            // Not a comment, copy the data
+            dest.Push(srcWords[i]);
+        }
+    }
 }
 
 }

+ 25 - 26
Source/Engine/Graphics/Direct3D9/D3D9ShaderVariation.h

@@ -69,34 +69,26 @@ public:
     /// Destruct.
     virtual ~ShaderVariation();
     
-    /// Create the shader program. Return true if successful.
-    bool Create();
-    /// Release shader.
+    /// Release the shader.
     virtual void Release();
     
+    /// Compile the shader. Return true if successful.
+    bool Create();
     /// Set name.
     void SetName(const String& name);
-    /// Set bytecode.
-    void SetByteCode(const SharedArrayPtr<unsigned char>& byteCode);
-    /// Add a parameter.
-    void AddParameter(StringHash param, const ShaderParameter& definition);
-    /// Add a texture unit.
-    void AddTextureUnit(TextureUnit unit);
-    /// Clear parameters and texture unit use flags.
-    void ClearParameters();
-    /// Optimize the parameter map for optimal query speed
-    void OptimizeParameters();
+    /// Set defines.
+    void SetDefines(const String& defines);
     
     /// Return shader type.
-    ShaderType GetShaderType() const { return shaderType_; }
+    ShaderType GetShaderType() const { return type_; }
     /// Return full shader name.
     const String& GetName() const { return name_; }
-    /// Return parent shader resource.
-    Shader* GetOwner() const;
-    /// Return whether created successfully.
-    bool IsCreated() const;
-    /// Return whether compile failed.
-    bool IsFailed() const { return failed_; }
+    /// Return defines.
+    const String& GetDefines() const { return defines_; }
+    /// Return whether successfully compiled.
+    bool IsCompiled() const { return compiled_; }
+    /// Return compile error/warning string.
+    const String& GetCompilerOutput() const { return compilerOutput_; }
     /// Return whether uses a parameter.
     bool HasParameter(StringHash param) const { return parameters_.Contains(param); }
     /// Return whether uses a texture unit (only for pixel shaders.)
@@ -105,16 +97,23 @@ public:
     const HashMap<StringHash, ShaderParameter>& GetParameters() const { return parameters_; }
     
 private:
-    /// Parent shader resource.
+    /// Inspect the constant parameters of the shader bytecode using MojoShader.
+    void ParseParameters(unsigned char* bufData, unsigned bufSize);
+    /// Strip comments from shader bytecode and store it.
+    void CopyStrippedCode(PODVector<unsigned>& dest, unsigned char* bufData, unsigned bufSize);
+    
+    /// Shader this variation belongs to.
     WeakPtr<Shader> owner_;
     /// Shader type.
-    ShaderType shaderType_;
+    ShaderType type_;
     /// Full shader name.
     String name_;
-    /// Shader bytecode.
-    SharedArrayPtr<unsigned char> byteCode_;
-    /// Compile failed flag.
-    bool failed_;
+    /// Defines to use in compiling.
+    String defines_;
+    /// Shader compile error string.
+    String compilerOutput_;
+    /// Compiled flag.
+    bool compiled_;
     /// Shader parameters.
     HashMap<StringHash, ShaderParameter> parameters_;
     /// Texture unit use flags.

+ 5 - 5
Source/Engine/Graphics/OpenGL/OGLShaderVariation.cpp

@@ -36,7 +36,7 @@ namespace Urho3D
 ShaderVariation::ShaderVariation(Shader* owner, ShaderType type) :
     GPUObject(owner->GetSubsystem<Graphics>()),
     owner_(owner),
-    shaderType_(type),
+    type_(type),
     compiled_(false)
 {
 }
@@ -66,7 +66,7 @@ void ShaderVariation::Release()
         
         if (!graphics_->IsDeviceLost())
         {
-            if (shaderType_ == VS)
+            if (type_ == VS)
             {
                 if (graphics_->GetVertexShader() == this)
                     graphics_->SetShaders(0, 0);
@@ -98,17 +98,17 @@ bool ShaderVariation::Create()
         return false;
     }
     
-    object_ = glCreateShader(shaderType_ == VS ? GL_VERTEX_SHADER : GL_FRAGMENT_SHADER);
+    object_ = glCreateShader(type_ == VS ? GL_VERTEX_SHADER : GL_FRAGMENT_SHADER);
     if (!object_)
     {
         compilerOutput_ = "Could not create shader object";
         return false;
     }
     
-    const String& originalShaderCode = owner_->GetSourceCode(shaderType_);
+    const String& originalShaderCode = owner_->GetSourceCode(type_);
     String shaderCode;
     // Distinguish between VS and PS compile in case the shader code wants to include/omit different things
-    shaderCode += shaderType_ == VS ? "#define COMPILEVS\n" : "#define COMPILEPS\n";
+    shaderCode += type_ == VS ? "#define COMPILEVS\n" : "#define COMPILEPS\n";
     
     // Prepend the defines to the shader code
     Vector<String> defineVec = defines_.Split(' ');

+ 2 - 2
Source/Engine/Graphics/OpenGL/OGLShaderVariation.h

@@ -55,7 +55,7 @@ public:
     void SetDefines(const String& defines);
     
     /// Return shader type.
-    ShaderType GetShaderType() const { return shaderType_; }
+    ShaderType GetShaderType() const { return type_; }
     /// Return full shader name.
     const String& GetName() const { return name_; }
     /// Return defines.
@@ -69,7 +69,7 @@ private:
     /// Shader this variation belongs to.
     WeakPtr<Shader> owner_;
     /// Shader type.
-    ShaderType shaderType_;
+    ShaderType type_;
     /// Full shader name.
     String name_;
     /// Defines to use in compiling.

+ 2 - 2
Source/Engine/Graphics/Shader.cpp

@@ -117,7 +117,7 @@ ShaderVariation* Shader::GetVariation(ShaderType type, const char* defines)
         if (i == vsVariations_.End())
         {
             i = vsVariations_.Insert(MakePair(definesHash, SharedPtr<ShaderVariation>(new ShaderVariation(this, VS))));
-            i->second_->SetName(GetFileName(GetName()) + "_" + defines);
+            i->second_->SetName(GetFileName(GetName()) + " " + defines);
             i->second_->SetDefines(defines);
             
             SetMemoryUse(GetMemoryUse() + sizeof(ShaderVariation));
@@ -132,7 +132,7 @@ ShaderVariation* Shader::GetVariation(ShaderType type, const char* defines)
         if (i == psVariations_.End())
         {
             i = psVariations_.Insert(MakePair(definesHash, SharedPtr<ShaderVariation>(new ShaderVariation(this, PS))));
-            i->second_->SetName(GetFileName(GetName()) + "_" + defines);
+            i->second_->SetName(GetFileName(GetName()) + " " + defines);
             i->second_->SetDefines(defines);
             
             SetMemoryUse(GetMemoryUse() + sizeof(ShaderVariation));

+ 1009 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBody.cpp

@@ -0,0 +1,1009 @@
+/*
+ * PURPOSE:
+ *   Class representing an articulated rigid body. Stores the body's
+ *   current state, allows forces and torques to be set, handles
+ *   timestepping and implements Featherstone's algorithm.
+ *   
+ * COPYRIGHT:
+ *   Copyright (C) Stephen Thompson, <[email protected]>, 2011-2013
+ *   Portions written By Erwin Coumans: replacing Eigen math library by Bullet LinearMath and a dedicated 6x6 matrix inverse (solveImatrix)
+
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from the use of this software.
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it freely,
+ subject to the following restrictions:
+ 
+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ 
+ */
+
+
+#include "btMultiBody.h"
+#include "btMultiBodyLink.h"
+#include "btMultiBodyLinkCollider.h"
+
+// #define INCLUDE_GYRO_TERM 
+
+namespace {
+    const btScalar SLEEP_EPSILON = btScalar(0.05);  // this is a squared velocity (m^2 s^-2)
+    const btScalar SLEEP_TIMEOUT = btScalar(2);     // in seconds
+}
+
+
+
+
+//
+// Various spatial helper functions
+//
+
+namespace {
+    void SpatialTransform(const btMatrix3x3 &rotation_matrix,  // rotates vectors in 'from' frame to vectors in 'to' frame
+                          const btVector3 &displacement,     // vector from origin of 'from' frame to origin of 'to' frame, in 'to' coordinates
+                          const btVector3 &top_in,       // top part of input vector
+                          const btVector3 &bottom_in,    // bottom part of input vector
+                          btVector3 &top_out,         // top part of output vector
+                          btVector3 &bottom_out)      // bottom part of output vector
+    {
+        top_out = rotation_matrix * top_in;
+        bottom_out = -displacement.cross(top_out) + rotation_matrix * bottom_in;
+    }
+
+    void InverseSpatialTransform(const btMatrix3x3 &rotation_matrix,
+                                 const btVector3 &displacement,
+                                 const btVector3 &top_in,
+                                 const btVector3 &bottom_in,
+                                 btVector3 &top_out,
+                                 btVector3 &bottom_out)
+    {
+        top_out = rotation_matrix.transpose() * top_in;
+        bottom_out = rotation_matrix.transpose() * (bottom_in + displacement.cross(top_in));
+    }
+
+    btScalar SpatialDotProduct(const btVector3 &a_top,
+                            const btVector3 &a_bottom,
+                            const btVector3 &b_top,
+                            const btVector3 &b_bottom)
+    {
+        return a_bottom.dot(b_top) + a_top.dot(b_bottom);
+    }
+}
+
+
+//
+// Implementation of class btMultiBody
+//
+
+btMultiBody::btMultiBody(int n_links,
+                     btScalar mass,
+                     const btVector3 &inertia,
+                     bool fixed_base_,
+                     bool can_sleep_)
+    : base_quat(0, 0, 0, 1),
+      base_mass(mass),
+      base_inertia(inertia),
+    
+		fixed_base(fixed_base_),
+		awake(true),
+		can_sleep(can_sleep_),
+		sleep_timer(0),
+		m_baseCollider(0),
+		m_linearDamping(0.04f),
+		m_angularDamping(0.04f),
+		m_useGyroTerm(true),
+		m_maxAppliedImpulse(1000.f),
+		m_hasSelfCollision(true)
+{
+	 links.resize(n_links);
+
+	vector_buf.resize(2*n_links);
+    matrix_buf.resize(n_links + 1);
+	m_real_buf.resize(6 + 2*n_links);
+    base_pos.setValue(0, 0, 0);
+    base_force.setValue(0, 0, 0);
+    base_torque.setValue(0, 0, 0);
+}
+
+btMultiBody::~btMultiBody()
+{
+}
+
+void btMultiBody::setupPrismatic(int i,
+                               btScalar mass,
+                               const btVector3 &inertia,
+                               int parent,
+                               const btQuaternion &rot_parent_to_this,
+                               const btVector3 &joint_axis,
+                               const btVector3 &r_vector_when_q_zero,
+							   bool disableParentCollision)
+{
+    links[i].mass = mass;
+    links[i].inertia = inertia;
+    links[i].parent = parent;
+    links[i].zero_rot_parent_to_this = rot_parent_to_this;
+    links[i].axis_top.setValue(0,0,0);
+    links[i].axis_bottom = joint_axis;
+    links[i].e_vector = r_vector_when_q_zero;
+    links[i].is_revolute = false;
+    links[i].cached_rot_parent_to_this = rot_parent_to_this;
+	if (disableParentCollision)
+		links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
+
+    links[i].updateCache();
+}
+
+void btMultiBody::setupRevolute(int i,
+                              btScalar mass,
+                              const btVector3 &inertia,
+                              int parent,
+                              const btQuaternion &zero_rot_parent_to_this,
+                              const btVector3 &joint_axis,
+                              const btVector3 &parent_axis_position,
+                              const btVector3 &my_axis_position,
+							  bool disableParentCollision)
+{
+    links[i].mass = mass;
+    links[i].inertia = inertia;
+    links[i].parent = parent;
+    links[i].zero_rot_parent_to_this = zero_rot_parent_to_this;
+    links[i].axis_top = joint_axis;
+    links[i].axis_bottom = joint_axis.cross(my_axis_position);
+    links[i].d_vector = my_axis_position;
+    links[i].e_vector = parent_axis_position;
+    links[i].is_revolute = true;
+	if (disableParentCollision)
+		links[i].m_flags |=BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION;
+    links[i].updateCache();
+}
+
+
+
+
+	
+int btMultiBody::getParent(int i) const
+{
+    return links[i].parent;
+}
+
+btScalar btMultiBody::getLinkMass(int i) const
+{
+    return links[i].mass;
+}
+
+const btVector3 & btMultiBody::getLinkInertia(int i) const
+{
+    return links[i].inertia;
+}
+
+btScalar btMultiBody::getJointPos(int i) const
+{
+    return links[i].joint_pos;
+}
+
+btScalar btMultiBody::getJointVel(int i) const
+{
+    return m_real_buf[6 + i];
+}
+
+void btMultiBody::setJointPos(int i, btScalar q)
+{
+    links[i].joint_pos = q;
+    links[i].updateCache();
+}
+
+void btMultiBody::setJointVel(int i, btScalar qdot)
+{
+    m_real_buf[6 + i] = qdot;
+}
+
+const btVector3 & btMultiBody::getRVector(int i) const
+{
+    return links[i].cached_r_vector;
+}
+
+const btQuaternion & btMultiBody::getParentToLocalRot(int i) const
+{
+    return links[i].cached_rot_parent_to_this;
+}
+
+btVector3 btMultiBody::localPosToWorld(int i, const btVector3 &local_pos) const
+{
+    btVector3 result = local_pos;
+    while (i != -1) {
+        // 'result' is in frame i. transform it to frame parent(i)
+        result += getRVector(i);
+        result = quatRotate(getParentToLocalRot(i).inverse(),result);
+        i = getParent(i);
+    }
+
+    // 'result' is now in the base frame. transform it to world frame
+    result = quatRotate(getWorldToBaseRot().inverse() ,result);
+    result += getBasePos();
+
+    return result;
+}
+
+btVector3 btMultiBody::worldPosToLocal(int i, const btVector3 &world_pos) const
+{
+    if (i == -1) {
+        // world to base
+        return quatRotate(getWorldToBaseRot(),(world_pos - getBasePos()));
+    } else {
+        // find position in parent frame, then transform to current frame
+        return quatRotate(getParentToLocalRot(i),worldPosToLocal(getParent(i), world_pos)) - getRVector(i);
+    }
+}
+
+btVector3 btMultiBody::localDirToWorld(int i, const btVector3 &local_dir) const
+{
+    btVector3 result = local_dir;
+    while (i != -1) {
+        result = quatRotate(getParentToLocalRot(i).inverse() , result);
+        i = getParent(i);
+    }
+    result = quatRotate(getWorldToBaseRot().inverse() , result);
+    return result;
+}
+
+btVector3 btMultiBody::worldDirToLocal(int i, const btVector3 &world_dir) const
+{
+    if (i == -1) {
+        return quatRotate(getWorldToBaseRot(), world_dir);
+    } else {
+        return quatRotate(getParentToLocalRot(i) ,worldDirToLocal(getParent(i), world_dir));
+    }
+}
+
+void btMultiBody::compTreeLinkVelocities(btVector3 *omega, btVector3 *vel) const
+{
+	int num_links = getNumLinks();
+    // Calculates the velocities of each link (and the base) in its local frame
+    omega[0] = quatRotate(base_quat ,getBaseOmega());
+    vel[0] = quatRotate(base_quat ,getBaseVel());
+    
+    for (int i = 0; i < num_links; ++i) {
+        const int parent = links[i].parent;
+
+        // transform parent vel into this frame, store in omega[i+1], vel[i+1]
+        SpatialTransform(btMatrix3x3(links[i].cached_rot_parent_to_this), links[i].cached_r_vector,
+                         omega[parent+1], vel[parent+1],
+                         omega[i+1], vel[i+1]);
+
+        // now add qidot * shat_i
+        omega[i+1] += getJointVel(i) * links[i].axis_top;
+        vel[i+1] += getJointVel(i) * links[i].axis_bottom;
+    }
+}
+
+btScalar btMultiBody::getKineticEnergy() const
+{
+	int num_links = getNumLinks();
+    // TODO: would be better not to allocate memory here
+    btAlignedObjectArray<btVector3> omega;omega.resize(num_links+1);
+	btAlignedObjectArray<btVector3> vel;vel.resize(num_links+1);
+    compTreeLinkVelocities(&omega[0], &vel[0]);
+
+    // we will do the factor of 0.5 at the end
+    btScalar result = base_mass * vel[0].dot(vel[0]);
+    result += omega[0].dot(base_inertia * omega[0]);
+    
+    for (int i = 0; i < num_links; ++i) {
+        result += links[i].mass * vel[i+1].dot(vel[i+1]);
+        result += omega[i+1].dot(links[i].inertia * omega[i+1]);
+    }
+
+    return 0.5f * result;
+}
+
+btVector3 btMultiBody::getAngularMomentum() const
+{
+	int num_links = getNumLinks();
+    // TODO: would be better not to allocate memory here
+    btAlignedObjectArray<btVector3> omega;omega.resize(num_links+1);
+	btAlignedObjectArray<btVector3> vel;vel.resize(num_links+1);
+    btAlignedObjectArray<btQuaternion> rot_from_world;rot_from_world.resize(num_links+1);
+    compTreeLinkVelocities(&omega[0], &vel[0]);
+
+    rot_from_world[0] = base_quat;
+    btVector3 result = quatRotate(rot_from_world[0].inverse() , (base_inertia * omega[0]));
+
+    for (int i = 0; i < num_links; ++i) {
+        rot_from_world[i+1] = links[i].cached_rot_parent_to_this * rot_from_world[links[i].parent+1];
+        result += (quatRotate(rot_from_world[i+1].inverse() , (links[i].inertia * omega[i+1])));
+    }
+
+    return result;
+}
+
+
+void btMultiBody::clearForcesAndTorques()
+{
+    base_force.setValue(0, 0, 0);
+    base_torque.setValue(0, 0, 0);
+
+    for (int i = 0; i < getNumLinks(); ++i) {
+        links[i].applied_force.setValue(0, 0, 0);
+        links[i].applied_torque.setValue(0, 0, 0);
+        links[i].joint_torque = 0;
+    }
+}
+
+void btMultiBody::clearVelocities()
+{
+	for (int i = 0; i < 6 + getNumLinks(); ++i) 
+	{
+		m_real_buf[i] = 0.f;
+	}
+}
+void btMultiBody::addLinkForce(int i, const btVector3 &f)
+{
+    links[i].applied_force += f;
+}
+
+void btMultiBody::addLinkTorque(int i, const btVector3 &t)
+{
+    links[i].applied_torque += t;
+}
+
+void btMultiBody::addJointTorque(int i, btScalar Q)
+{
+    links[i].joint_torque += Q;
+}
+
+const btVector3 & btMultiBody::getLinkForce(int i) const
+{
+    return links[i].applied_force;
+}
+
+const btVector3 & btMultiBody::getLinkTorque(int i) const
+{
+    return links[i].applied_torque;
+}
+
+btScalar btMultiBody::getJointTorque(int i) const
+{
+    return links[i].joint_torque;
+}
+
+
+inline btMatrix3x3 vecMulVecTranspose(const btVector3& v0, const btVector3& v1Transposed)
+{
+		btVector3 row0 = btVector3( 
+			v0.x() * v1Transposed.x(),
+			v0.x() * v1Transposed.y(),
+			v0.x() * v1Transposed.z());
+		btVector3 row1 = btVector3( 
+			v0.y() * v1Transposed.x(),
+			v0.y() * v1Transposed.y(),
+			v0.y() * v1Transposed.z());
+		btVector3 row2 = btVector3( 
+			v0.z() * v1Transposed.x(),
+			v0.z() * v1Transposed.y(),
+			v0.z() * v1Transposed.z());
+
+        btMatrix3x3 m(row0[0],row0[1],row0[2],
+						row1[0],row1[1],row1[2],
+						row2[0],row2[1],row2[2]);
+		return m;
+}
+
+
+void btMultiBody::stepVelocities(btScalar dt,
+                               btAlignedObjectArray<btScalar> &scratch_r,
+                               btAlignedObjectArray<btVector3> &scratch_v,
+                               btAlignedObjectArray<btMatrix3x3> &scratch_m)
+{
+    // Implement Featherstone's algorithm to calculate joint accelerations (q_double_dot)
+    // and the base linear & angular accelerations.
+
+    // We apply damping forces in this routine as well as any external forces specified by the 
+    // caller (via addBaseForce etc).
+
+    // output should point to an array of 6 + num_links reals.
+    // Format is: 3 angular accelerations (in world frame), 3 linear accelerations (in world frame),
+    // num_links joint acceleration values.
+    
+	int num_links = getNumLinks();
+
+    const btScalar DAMPING_K1_LINEAR = m_linearDamping;
+	const btScalar DAMPING_K2_LINEAR = m_linearDamping;
+
+	const btScalar DAMPING_K1_ANGULAR = m_angularDamping;
+	const btScalar DAMPING_K2_ANGULAR= m_angularDamping;
+
+    btVector3 base_vel = getBaseVel();
+    btVector3 base_omega = getBaseOmega();
+
+    // Temporary matrices/vectors -- use scratch space from caller
+    // so that we don't have to keep reallocating every frame
+
+    scratch_r.resize(2*num_links + 6);
+    scratch_v.resize(8*num_links + 6);
+    scratch_m.resize(4*num_links + 4);
+
+    btScalar * r_ptr = &scratch_r[0];
+    btScalar * output = &scratch_r[num_links];  // "output" holds the q_double_dot results
+    btVector3 * v_ptr = &scratch_v[0];
+    
+    // vhat_i  (top = angular, bottom = linear part)
+    btVector3 * vel_top_angular = v_ptr; v_ptr += num_links + 1;
+    btVector3 * vel_bottom_linear = v_ptr; v_ptr += num_links + 1;
+
+    // zhat_i^A
+    btVector3 * zero_acc_top_angular = v_ptr; v_ptr += num_links + 1;
+    btVector3 * zero_acc_bottom_linear = v_ptr; v_ptr += num_links + 1;
+
+    // chat_i  (note NOT defined for the base)
+    btVector3 * coriolis_top_angular = v_ptr; v_ptr += num_links;
+    btVector3 * coriolis_bottom_linear = v_ptr; v_ptr += num_links;
+
+    // top left, top right and bottom left blocks of Ihat_i^A.
+    // bottom right block = transpose of top left block and is not stored.
+    // Note: the top right and bottom left blocks are always symmetric matrices, but we don't make use of this fact currently.
+    btMatrix3x3 * inertia_top_left = &scratch_m[num_links + 1];
+    btMatrix3x3 * inertia_top_right = &scratch_m[2*num_links + 2];
+    btMatrix3x3 * inertia_bottom_left = &scratch_m[3*num_links + 3];
+
+    // Cached 3x3 rotation matrices from parent frame to this frame.
+    btMatrix3x3 * rot_from_parent = &matrix_buf[0];
+    btMatrix3x3 * rot_from_world = &scratch_m[0];
+
+    // hhat_i, ahat_i
+    // hhat is NOT stored for the base (but ahat is)
+    btVector3 * h_top = num_links > 0 ? &vector_buf[0] : 0;
+    btVector3 * h_bottom = num_links > 0 ? &vector_buf[num_links] : 0;
+    btVector3 * accel_top = v_ptr; v_ptr += num_links + 1;
+    btVector3 * accel_bottom = v_ptr; v_ptr += num_links + 1;
+
+    // Y_i, D_i
+    btScalar * Y = r_ptr; r_ptr += num_links;
+    btScalar * D = num_links > 0 ? &m_real_buf[6 + num_links] : 0;
+
+    // ptr to the joint accel part of the output
+    btScalar * joint_accel = output + 6;
+
+
+    // Start of the algorithm proper.
+    
+    // First 'upward' loop.
+    // Combines CompTreeLinkVelocities and InitTreeLinks from Mirtich.
+
+    rot_from_parent[0] = btMatrix3x3(base_quat);
+
+    vel_top_angular[0] = rot_from_parent[0] * base_omega;
+    vel_bottom_linear[0] = rot_from_parent[0] * base_vel;
+
+    if (fixed_base) {
+        zero_acc_top_angular[0] = zero_acc_bottom_linear[0] = btVector3(0,0,0);
+    } else {
+        zero_acc_top_angular[0] = - (rot_from_parent[0] * (base_force 
+                                                   - base_mass*(DAMPING_K1_LINEAR+DAMPING_K2_LINEAR*base_vel.norm())*base_vel));
+		
+        zero_acc_bottom_linear[0] =
+            - (rot_from_parent[0] * base_torque);
+
+		if (m_useGyroTerm)
+			zero_acc_bottom_linear[0]+=vel_top_angular[0].cross( base_inertia * vel_top_angular[0] );
+
+        zero_acc_bottom_linear[0] += base_inertia * vel_top_angular[0] * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR*vel_top_angular[0].norm());
+
+    }
+
+
+
+    inertia_top_left[0] = btMatrix3x3(0,0,0,0,0,0,0,0,0);//::Zero();
+	
+	
+    inertia_top_right[0].setValue(base_mass, 0, 0,
+                            0, base_mass, 0,
+                            0, 0, base_mass);
+    inertia_bottom_left[0].setValue(base_inertia[0], 0, 0,
+                              0, base_inertia[1], 0,
+                              0, 0, base_inertia[2]);
+
+    rot_from_world[0] = rot_from_parent[0];
+
+    for (int i = 0; i < num_links; ++i) {
+        const int parent = links[i].parent;
+        rot_from_parent[i+1] = btMatrix3x3(links[i].cached_rot_parent_to_this);
+		
+
+        rot_from_world[i+1] = rot_from_parent[i+1] * rot_from_world[parent+1];
+        
+        // vhat_i = i_xhat_p(i) * vhat_p(i)
+        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
+                         vel_top_angular[parent+1], vel_bottom_linear[parent+1],
+                         vel_top_angular[i+1], vel_bottom_linear[i+1]);
+
+        // we can now calculate chat_i
+        // remember vhat_i is really vhat_p(i) (but in current frame) at this point
+        coriolis_bottom_linear[i] = vel_top_angular[i+1].cross(vel_top_angular[i+1].cross(links[i].cached_r_vector))
+            + 2 * vel_top_angular[i+1].cross(links[i].axis_bottom) * getJointVel(i);
+        if (links[i].is_revolute) {
+            coriolis_top_angular[i] = vel_top_angular[i+1].cross(links[i].axis_top) * getJointVel(i);
+            coriolis_bottom_linear[i] += (getJointVel(i) * getJointVel(i)) * links[i].axis_top.cross(links[i].axis_bottom);
+        } else {
+            coriolis_top_angular[i] = btVector3(0,0,0);
+        }
+        
+        // now set vhat_i to its true value by doing
+        // vhat_i += qidot * shat_i
+        vel_top_angular[i+1] += getJointVel(i) * links[i].axis_top;
+        vel_bottom_linear[i+1] += getJointVel(i) * links[i].axis_bottom;
+
+        // calculate zhat_i^A
+        zero_acc_top_angular[i+1] = - (rot_from_world[i+1] * (links[i].applied_force));
+        zero_acc_top_angular[i+1] += links[i].mass * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR*vel_bottom_linear[i+1].norm()) * vel_bottom_linear[i+1];
+
+        zero_acc_bottom_linear[i+1] =
+            - (rot_from_world[i+1] * links[i].applied_torque);
+		if (m_useGyroTerm)
+		{
+			zero_acc_bottom_linear[i+1] += vel_top_angular[i+1].cross( links[i].inertia * vel_top_angular[i+1] );
+		}
+
+        zero_acc_bottom_linear[i+1] += links[i].inertia * vel_top_angular[i+1] * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR*vel_top_angular[i+1].norm());
+
+        // calculate Ihat_i^A
+        inertia_top_left[i+1] = btMatrix3x3(0,0,0,0,0,0,0,0,0);//::Zero();
+        inertia_top_right[i+1].setValue(links[i].mass, 0, 0,
+                                  0, links[i].mass, 0,
+                                  0, 0, links[i].mass);
+        inertia_bottom_left[i+1].setValue(links[i].inertia[0], 0, 0,
+                                    0, links[i].inertia[1], 0,
+                                    0, 0, links[i].inertia[2]);
+    }
+
+
+    // 'Downward' loop.
+    // (part of TreeForwardDynamics in Mirtich.)
+    for (int i = num_links - 1; i >= 0; --i) {
+
+        h_top[i] = inertia_top_left[i+1] * links[i].axis_top + inertia_top_right[i+1] * links[i].axis_bottom;
+        h_bottom[i] = inertia_bottom_left[i+1] * links[i].axis_top + inertia_top_left[i+1].transpose() * links[i].axis_bottom;
+		btScalar val = SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, h_top[i], h_bottom[i]);
+        D[i] = val;
+		Y[i] = links[i].joint_torque
+            - SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, zero_acc_top_angular[i+1], zero_acc_bottom_linear[i+1])
+            - SpatialDotProduct(h_top[i], h_bottom[i], coriolis_top_angular[i], coriolis_bottom_linear[i]);
+
+        const int parent = links[i].parent;
+
+        
+        // Ip += pXi * (Ii - hi hi' / Di) * iXp
+        const btScalar one_over_di = 1.0f / D[i];
+
+		
+
+
+		const btMatrix3x3 TL = inertia_top_left[i+1]   - vecMulVecTranspose(one_over_di * h_top[i] , h_bottom[i]);
+        const btMatrix3x3 TR = inertia_top_right[i+1]  - vecMulVecTranspose(one_over_di * h_top[i] , h_top[i]);
+        const btMatrix3x3 BL = inertia_bottom_left[i+1]- vecMulVecTranspose(one_over_di * h_bottom[i] , h_bottom[i]);
+
+
+        btMatrix3x3 r_cross;
+        r_cross.setValue(
+            0, -links[i].cached_r_vector[2], links[i].cached_r_vector[1],
+            links[i].cached_r_vector[2], 0, -links[i].cached_r_vector[0],
+            -links[i].cached_r_vector[1], links[i].cached_r_vector[0], 0);
+        
+        inertia_top_left[parent+1] += rot_from_parent[i+1].transpose() * ( TL - TR * r_cross ) * rot_from_parent[i+1];
+        inertia_top_right[parent+1] += rot_from_parent[i+1].transpose() * TR * rot_from_parent[i+1];
+        inertia_bottom_left[parent+1] += rot_from_parent[i+1].transpose() *
+            (r_cross * (TL - TR * r_cross) + BL - TL.transpose() * r_cross) * rot_from_parent[i+1];
+        
+        
+        // Zp += pXi * (Zi + Ii*ci + hi*Yi/Di)
+        btVector3 in_top, in_bottom, out_top, out_bottom;
+        const btScalar Y_over_D = Y[i] * one_over_di;
+        in_top = zero_acc_top_angular[i+1]
+            + inertia_top_left[i+1] * coriolis_top_angular[i]
+            + inertia_top_right[i+1] * coriolis_bottom_linear[i]
+            + Y_over_D * h_top[i];
+        in_bottom = zero_acc_bottom_linear[i+1]
+            + inertia_bottom_left[i+1] * coriolis_top_angular[i]
+            + inertia_top_left[i+1].transpose() * coriolis_bottom_linear[i]
+            + Y_over_D * h_bottom[i];
+        InverseSpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
+                                in_top, in_bottom, out_top, out_bottom);
+        zero_acc_top_angular[parent+1] += out_top;
+        zero_acc_bottom_linear[parent+1] += out_bottom;
+    }
+
+
+    // Second 'upward' loop
+    // (part of TreeForwardDynamics in Mirtich)
+
+    if (fixed_base) 
+	{
+        accel_top[0] = accel_bottom[0] = btVector3(0,0,0);
+    } 
+	else 
+	{
+        if (num_links > 0) 
+		{
+            //Matrix<btScalar, 6, 6> Imatrix;
+            //Imatrix.block<3,3>(0,0) = inertia_top_left[0];
+            //Imatrix.block<3,3>(3,0) = inertia_bottom_left[0];
+            //Imatrix.block<3,3>(0,3) = inertia_top_right[0];
+            //Imatrix.block<3,3>(3,3) = inertia_top_left[0].transpose();
+            //cached_imatrix_lu.reset(new Eigen::LU<Matrix<btScalar, 6, 6> >(Imatrix));  // TODO: Avoid memory allocation here?
+
+			cached_inertia_top_left = inertia_top_left[0];
+			cached_inertia_top_right = inertia_top_right[0];
+			cached_inertia_lower_left = inertia_bottom_left[0];
+			cached_inertia_lower_right= inertia_top_left[0].transpose();
+
+        }
+		btVector3 rhs_top (zero_acc_top_angular[0][0], zero_acc_top_angular[0][1], zero_acc_top_angular[0][2]);
+		btVector3 rhs_bot (zero_acc_bottom_linear[0][0], zero_acc_bottom_linear[0][1], zero_acc_bottom_linear[0][2]);
+        float result[6];
+
+		solveImatrix(rhs_top, rhs_bot, result);
+//		printf("result=%f,%f,%f,%f,%f,%f\n",result[0],result[0],result[0],result[0],result[0],result[0]);
+        for (int i = 0; i < 3; ++i) {
+            accel_top[0][i] = -result[i];
+            accel_bottom[0][i] = -result[i+3];
+        }
+
+    }
+
+    // now do the loop over the links
+    for (int i = 0; i < num_links; ++i) {
+        const int parent = links[i].parent;
+        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
+                         accel_top[parent+1], accel_bottom[parent+1],
+                         accel_top[i+1], accel_bottom[i+1]);
+        joint_accel[i] = (Y[i] - SpatialDotProduct(h_top[i], h_bottom[i], accel_top[i+1], accel_bottom[i+1])) / D[i];
+        accel_top[i+1] += coriolis_top_angular[i] + joint_accel[i] * links[i].axis_top;
+        accel_bottom[i+1] += coriolis_bottom_linear[i] + joint_accel[i] * links[i].axis_bottom;
+    }
+
+    // transform base accelerations back to the world frame.
+    btVector3 omegadot_out = rot_from_parent[0].transpose() * accel_top[0];
+	output[0] = omegadot_out[0];
+	output[1] = omegadot_out[1];
+	output[2] = omegadot_out[2];
+
+    btVector3 vdot_out = rot_from_parent[0].transpose() * accel_bottom[0];
+	output[3] = vdot_out[0];
+	output[4] = vdot_out[1];
+	output[5] = vdot_out[2];
+    // Final step: add the accelerations (times dt) to the velocities.
+    applyDeltaVee(output, dt);
+
+	
+}
+
+
+
+void btMultiBody::solveImatrix(const btVector3& rhs_top, const btVector3& rhs_bot, float result[6]) const
+{
+	int num_links = getNumLinks();
+	///solve I * x = rhs, so the result = invI * rhs
+    if (num_links == 0) 
+	{
+		// in the case of 0 links (i.e. a plain rigid body, not a multibody) rhs * invI is easier
+        result[0] = rhs_bot[0] / base_inertia[0];
+        result[1] = rhs_bot[1] / base_inertia[1];
+        result[2] = rhs_bot[2] / base_inertia[2];
+        result[3] = rhs_top[0] / base_mass;
+        result[4] = rhs_top[1] / base_mass;
+        result[5] = rhs_top[2] / base_mass;
+    } else 
+	{
+		/// Special routine for calculating the inverse of a spatial inertia matrix
+		///the 6x6 matrix is stored as 4 blocks of 3x3 matrices
+		btMatrix3x3 Binv = cached_inertia_top_right.inverse()*-1.f;
+		btMatrix3x3 tmp = cached_inertia_lower_right * Binv;
+		btMatrix3x3 invIupper_right = (tmp * cached_inertia_top_left + cached_inertia_lower_left).inverse();
+		tmp = invIupper_right * cached_inertia_lower_right;
+		btMatrix3x3 invI_upper_left = (tmp * Binv);
+		btMatrix3x3 invI_lower_right = (invI_upper_left).transpose();
+		tmp = cached_inertia_top_left  * invI_upper_left;
+		tmp[0][0]-= 1.0;
+		tmp[1][1]-= 1.0;
+		tmp[2][2]-= 1.0;
+		btMatrix3x3 invI_lower_left = (Binv * tmp);
+
+		//multiply result = invI * rhs
+		{
+		  btVector3 vtop = invI_upper_left*rhs_top;
+		  btVector3 tmp;
+		  tmp = invIupper_right * rhs_bot;
+		  vtop += tmp;
+		  btVector3 vbot = invI_lower_left*rhs_top;
+		  tmp = invI_lower_right * rhs_bot;
+		  vbot += tmp;
+		  result[0] = vtop[0];
+		  result[1] = vtop[1];
+		  result[2] = vtop[2];
+		  result[3] = vbot[0];
+		  result[4] = vbot[1];
+		  result[5] = vbot[2];
+		}
+		
+    }
+}
+
+
+void btMultiBody::calcAccelerationDeltas(const btScalar *force, btScalar *output,
+                                       btAlignedObjectArray<btScalar> &scratch_r, btAlignedObjectArray<btVector3> &scratch_v) const
+{
+    // Temporary matrices/vectors -- use scratch space from caller
+    // so that we don't have to keep reallocating every frame
+	int num_links = getNumLinks();
+    scratch_r.resize(num_links);
+    scratch_v.resize(4*num_links + 4);
+
+    btScalar * r_ptr = num_links == 0 ? 0 : &scratch_r[0];
+    btVector3 * v_ptr = &scratch_v[0];
+    
+    // zhat_i^A (scratch space)
+    btVector3 * zero_acc_top_angular = v_ptr; v_ptr += num_links + 1;
+    btVector3 * zero_acc_bottom_linear = v_ptr; v_ptr += num_links + 1;
+
+    // rot_from_parent (cached from calcAccelerations)
+    const btMatrix3x3 * rot_from_parent = &matrix_buf[0];
+
+    // hhat (cached), accel (scratch)
+    const btVector3 * h_top = num_links > 0 ? &vector_buf[0] : 0;
+    const btVector3 * h_bottom = num_links > 0 ? &vector_buf[num_links] : 0;
+    btVector3 * accel_top = v_ptr; v_ptr += num_links + 1;
+    btVector3 * accel_bottom = v_ptr; v_ptr += num_links + 1;
+
+    // Y_i (scratch), D_i (cached)
+    btScalar * Y = r_ptr; r_ptr += num_links;
+    const btScalar * D = num_links > 0 ? &m_real_buf[6 + num_links] : 0;
+
+    btAssert(num_links == 0 || r_ptr - &scratch_r[0] == scratch_r.size());
+    btAssert(v_ptr - &scratch_v[0] == scratch_v.size());
+
+
+    
+    // First 'upward' loop.
+    // Combines CompTreeLinkVelocities and InitTreeLinks from Mirtich.
+
+    btVector3 input_force(force[3],force[4],force[5]);
+    btVector3 input_torque(force[0],force[1],force[2]);
+    
+    // Fill in zero_acc
+    // -- set to force/torque on the base, zero otherwise
+    if (fixed_base) 
+	{
+        zero_acc_top_angular[0] = zero_acc_bottom_linear[0] = btVector3(0,0,0);
+    } else 
+	{
+        zero_acc_top_angular[0] = - (rot_from_parent[0] * input_force);
+        zero_acc_bottom_linear[0] =  - (rot_from_parent[0] * input_torque);
+    }
+    for (int i = 0; i < num_links; ++i) 
+	{
+        zero_acc_top_angular[i+1] = zero_acc_bottom_linear[i+1] = btVector3(0,0,0);
+    }
+
+    // 'Downward' loop.
+    for (int i = num_links - 1; i >= 0; --i) 
+	{
+
+        Y[i] = - SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, zero_acc_top_angular[i+1], zero_acc_bottom_linear[i+1]);
+        Y[i] += force[6 + i];  // add joint torque
+        
+        const int parent = links[i].parent;
+        
+        // Zp += pXi * (Zi + hi*Yi/Di)
+        btVector3 in_top, in_bottom, out_top, out_bottom;
+        const btScalar Y_over_D = Y[i] / D[i];
+        in_top = zero_acc_top_angular[i+1] + Y_over_D * h_top[i];
+        in_bottom = zero_acc_bottom_linear[i+1] + Y_over_D * h_bottom[i];
+        InverseSpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
+                                in_top, in_bottom, out_top, out_bottom);
+        zero_acc_top_angular[parent+1] += out_top;
+        zero_acc_bottom_linear[parent+1] += out_bottom;
+    }
+
+    // ptr to the joint accel part of the output
+    btScalar * joint_accel = output + 6;
+
+    // Second 'upward' loop
+    if (fixed_base) 
+	{
+        accel_top[0] = accel_bottom[0] = btVector3(0,0,0);
+    } else 
+	{
+		btVector3 rhs_top (zero_acc_top_angular[0][0], zero_acc_top_angular[0][1], zero_acc_top_angular[0][2]);
+		btVector3 rhs_bot (zero_acc_bottom_linear[0][0], zero_acc_bottom_linear[0][1], zero_acc_bottom_linear[0][2]);
+		
+		float result[6];
+        solveImatrix(rhs_top,rhs_bot, result);
+	//	printf("result=%f,%f,%f,%f,%f,%f\n",result[0],result[0],result[0],result[0],result[0],result[0]);
+
+        for (int i = 0; i < 3; ++i) {
+            accel_top[0][i] = -result[i];
+            accel_bottom[0][i] = -result[i+3];
+        }
+
+    }
+    
+    // now do the loop over the links
+    for (int i = 0; i < num_links; ++i) {
+        const int parent = links[i].parent;
+        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
+                         accel_top[parent+1], accel_bottom[parent+1],
+                         accel_top[i+1], accel_bottom[i+1]);
+        joint_accel[i] = (Y[i] - SpatialDotProduct(h_top[i], h_bottom[i], accel_top[i+1], accel_bottom[i+1])) / D[i];
+        accel_top[i+1] += joint_accel[i] * links[i].axis_top;
+        accel_bottom[i+1] += joint_accel[i] * links[i].axis_bottom;
+    }
+
+    // transform base accelerations back to the world frame.
+    btVector3 omegadot_out;
+    omegadot_out = rot_from_parent[0].transpose() * accel_top[0];
+	output[0] = omegadot_out[0];
+	output[1] = omegadot_out[1];
+	output[2] = omegadot_out[2];
+
+    btVector3 vdot_out;
+    vdot_out = rot_from_parent[0].transpose() * accel_bottom[0];
+
+	output[3] = vdot_out[0];
+	output[4] = vdot_out[1];
+	output[5] = vdot_out[2];
+}
+
+void btMultiBody::stepPositions(btScalar dt)
+{
+	int num_links = getNumLinks();
+    // step position by adding dt * velocity
+	btVector3 v = getBaseVel();
+    base_pos += dt * v;
+
+    // "exponential map" method for the rotation
+    btVector3 base_omega = getBaseOmega();
+    const btScalar omega_norm = base_omega.norm();
+    const btScalar omega_times_dt = omega_norm * dt;
+    const btScalar SMALL_ROTATION_ANGLE = 0.02f; // Theoretically this should be ~ pow(FLT_EPSILON,0.25) which is ~ 0.0156
+    if (fabs(omega_times_dt) < SMALL_ROTATION_ANGLE) 
+	{
+        const btScalar xsq = omega_times_dt * omega_times_dt;     // |omega|^2 * dt^2
+        const btScalar sin_term = dt * (xsq / 48.0f - 0.5f);      // -sin(0.5*dt*|omega|) / |omega|
+        const btScalar cos_term = 1.0f - xsq / 8.0f;              // cos(0.5*dt*|omega|)
+        base_quat = base_quat * btQuaternion(sin_term * base_omega[0],sin_term * base_omega[1],sin_term * base_omega[2],cos_term);
+    } else 
+	{
+        base_quat = base_quat * btQuaternion(base_omega / omega_norm,-omega_times_dt);
+    }
+
+    // Make sure the quaternion represents a valid rotation.
+    // (Not strictly necessary, but helps prevent any round-off errors from building up.)
+    base_quat.normalize();
+
+    // Finally we can update joint_pos for each of the links
+    for (int i = 0; i < num_links; ++i) 
+	{
+		float jointVel = getJointVel(i);
+        links[i].joint_pos += dt * jointVel;
+        links[i].updateCache();
+    }
+}
+
+void btMultiBody::fillContactJacobian(int link,
+                                    const btVector3 &contact_point,
+                                    const btVector3 &normal,
+                                    btScalar *jac,
+                                    btAlignedObjectArray<btScalar> &scratch_r,
+                                    btAlignedObjectArray<btVector3> &scratch_v,
+                                    btAlignedObjectArray<btMatrix3x3> &scratch_m) const
+{
+    // temporary space
+	int num_links = getNumLinks();
+    scratch_v.resize(2*num_links + 2);
+    scratch_m.resize(num_links + 1);
+
+    btVector3 * v_ptr = &scratch_v[0];
+    btVector3 * p_minus_com = v_ptr; v_ptr += num_links + 1;
+    btVector3 * n_local = v_ptr; v_ptr += num_links + 1;
+    btAssert(v_ptr - &scratch_v[0] == scratch_v.size());
+
+    scratch_r.resize(num_links);
+    btScalar * results = num_links > 0 ? &scratch_r[0] : 0;
+
+    btMatrix3x3 * rot_from_world = &scratch_m[0];
+
+    const btVector3 p_minus_com_world = contact_point - base_pos;
+
+    rot_from_world[0] = btMatrix3x3(base_quat);
+
+    p_minus_com[0] = rot_from_world[0] * p_minus_com_world;
+    n_local[0] = rot_from_world[0] * normal;
+    
+    // omega coeffients first.
+    btVector3 omega_coeffs;
+    omega_coeffs = p_minus_com_world.cross(normal);
+	jac[0] = omega_coeffs[0];
+	jac[1] = omega_coeffs[1];
+	jac[2] = omega_coeffs[2];
+    // then v coefficients
+    jac[3] = normal[0];
+    jac[4] = normal[1];
+    jac[5] = normal[2];
+
+    // Set remaining jac values to zero for now.
+    for (int i = 6; i < 6 + num_links; ++i) {
+        jac[i] = 0;
+    }
+
+    // Qdot coefficients, if necessary.
+    if (num_links > 0 && link > -1) {
+
+        // TODO: speed this up -- don't calculate for links we don't need.
+        // (Also, we are making 3 separate calls to this function, for the normal & the 2 friction directions,
+        // which is resulting in repeated work being done...)
+
+        // calculate required normals & positions in the local frames.
+        for (int i = 0; i < num_links; ++i) {
+
+            // transform to local frame
+            const int parent = links[i].parent;
+            const btMatrix3x3 mtx(links[i].cached_rot_parent_to_this);
+            rot_from_world[i+1] = mtx * rot_from_world[parent+1];
+
+            n_local[i+1] = mtx * n_local[parent+1];
+            p_minus_com[i+1] = mtx * p_minus_com[parent+1] - links[i].cached_r_vector;
+
+            // calculate the jacobian entry
+            if (links[i].is_revolute) {
+                results[i] = n_local[i+1].dot( links[i].axis_top.cross(p_minus_com[i+1]) + links[i].axis_bottom );
+            } else {
+                results[i] = n_local[i+1].dot( links[i].axis_bottom );
+            }
+        }
+
+        // Now copy through to output.
+        while (link != -1) {
+            jac[6 + link] = results[link];
+            link = links[link].parent;
+        }
+    }
+}
+
+void btMultiBody::wakeUp()
+{
+    awake = true;
+}
+
+void btMultiBody::goToSleep()
+{
+    awake = false;
+}
+
+void btMultiBody::checkMotionAndSleepIfRequired(btScalar timestep)
+{
+	int num_links = getNumLinks();
+	extern bool gDisableDeactivation;
+    if (!can_sleep || gDisableDeactivation) 
+	{
+		awake = true;
+		sleep_timer = 0;
+		return;
+	}
+
+    // motion is computed as omega^2 + v^2 + (sum of squares of joint velocities)
+    btScalar motion = 0;
+    for (int i = 0; i < 6 + num_links; ++i) {
+        motion += m_real_buf[i] * m_real_buf[i];
+    }
+
+    if (motion < SLEEP_EPSILON) {
+        sleep_timer += timestep;
+        if (sleep_timer > SLEEP_TIMEOUT) {
+            goToSleep();
+        }
+    } else {
+        sleep_timer = 0;
+		if (!awake)
+			wakeUp();
+    }
+}

+ 466 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBody.h

@@ -0,0 +1,466 @@
+/*
+ * PURPOSE:
+ *   Class representing an articulated rigid body. Stores the body's
+ *   current state, allows forces and torques to be set, handles
+ *   timestepping and implements Featherstone's algorithm.
+ *   
+ * COPYRIGHT:
+ *   Copyright (C) Stephen Thompson, <[email protected]>, 2011-2013
+ *   Portions written By Erwin Coumans: replacing Eigen math library by Bullet LinearMath and a dedicated 6x6 matrix inverse (solveImatrix)
+
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from the use of this software.
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it freely,
+ subject to the following restrictions:
+ 
+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ 
+ */
+
+
+#ifndef BT_MULTIBODY_H
+#define BT_MULTIBODY_H
+
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+#include "btMultiBodyLink.h"
+class btMultiBodyLinkCollider;
+
+class btMultiBody 
+{
+public:
+
+    
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+    //
+    // initialization
+    //
+    
+    btMultiBody(int n_links,                // NOT including the base
+              btScalar mass,                // mass of base
+              const btVector3 &inertia,    // inertia of base, in base frame; assumed diagonal
+              bool fixed_base_,           // whether the base is fixed (true) or can move (false)
+              bool can_sleep_);
+
+    ~btMultiBody();
+    
+    void setupPrismatic(int i,             // 0 to num_links-1
+                        btScalar mass,
+                        const btVector3 &inertia,       // in my frame; assumed diagonal
+                        int parent,
+                        const btQuaternion &rot_parent_to_this,  // rotate points in parent frame to my frame.
+                        const btVector3 &joint_axis,             // in my frame
+                        const btVector3 &r_vector_when_q_zero,  // vector from parent COM to my COM, in my frame, when q = 0.
+						bool disableParentCollision=false
+						);
+
+    void setupRevolute(int i,            // 0 to num_links-1
+                       btScalar mass,
+                       const btVector3 &inertia,
+                       int parent,
+                       const btQuaternion &zero_rot_parent_to_this,  // rotate points in parent frame to this frame, when q = 0
+                       const btVector3 &joint_axis,    // in my frame
+                       const btVector3 &parent_axis_position,    // vector from parent COM to joint axis, in PARENT frame
+                       const btVector3 &my_axis_position,       // vector from joint axis to my COM, in MY frame
+					   bool disableParentCollision=false);
+	
+	const btMultibodyLink& getLink(int index) const
+	{
+		return links[index];
+	}
+
+	btMultibodyLink& getLink(int index)
+	{
+		return links[index];
+	}
+
+
+	void setBaseCollider(btMultiBodyLinkCollider* collider)//collider can be NULL to disable collision for the base
+	{
+		m_baseCollider = collider;
+	}
+	const btMultiBodyLinkCollider* getBaseCollider() const
+	{
+		return m_baseCollider;
+	}
+	btMultiBodyLinkCollider* getBaseCollider()
+	{
+		return m_baseCollider;
+	}
+
+    //
+    // get parent
+    // input: link num from 0 to num_links-1
+    // output: link num from 0 to num_links-1, OR -1 to mean the base.
+    //
+    int getParent(int link_num) const;
+    
+    
+    //
+    // get number of links, masses, moments of inertia
+    //
+
+    int getNumLinks() const { return links.size(); }
+    btScalar getBaseMass() const { return base_mass; }
+    const btVector3 & getBaseInertia() const { return base_inertia; }
+    btScalar getLinkMass(int i) const;
+    const btVector3 & getLinkInertia(int i) const;
+    
+
+    //
+    // change mass (incomplete: can only change base mass and inertia at present)
+    //
+
+    void setBaseMass(btScalar mass) { base_mass = mass; }
+    void setBaseInertia(const btVector3 &inertia) { base_inertia = inertia; }
+
+
+    //
+    // get/set pos/vel/rot/omega for the base link
+    //
+
+    const btVector3 & getBasePos() const { return base_pos; }    // in world frame
+    const btVector3 getBaseVel() const 
+	{ 
+		return btVector3(m_real_buf[3],m_real_buf[4],m_real_buf[5]); 
+	}     // in world frame
+    const btQuaternion & getWorldToBaseRot() const 
+	{ 
+		return base_quat; 
+	}     // rotates world vectors into base frame
+    btVector3 getBaseOmega() const { return btVector3(m_real_buf[0],m_real_buf[1],m_real_buf[2]); }   // in world frame
+
+    void setBasePos(const btVector3 &pos) 
+	{ 
+		base_pos = pos; 
+	}
+    void setBaseVel(const btVector3 &vel) 
+	{ 
+
+		m_real_buf[3]=vel[0]; m_real_buf[4]=vel[1]; m_real_buf[5]=vel[2]; 
+	}
+    void setWorldToBaseRot(const btQuaternion &rot) 
+	{ 
+		base_quat = rot; 
+	}
+    void setBaseOmega(const btVector3 &omega) 
+	{ 
+		m_real_buf[0]=omega[0]; 
+		m_real_buf[1]=omega[1]; 
+		m_real_buf[2]=omega[2]; 
+	}
+
+
+    //
+    // get/set pos/vel for child links (i = 0 to num_links-1)
+    //
+
+    btScalar getJointPos(int i) const;
+    btScalar getJointVel(int i) const;
+
+    void setJointPos(int i, btScalar q);
+    void setJointVel(int i, btScalar qdot);
+
+    //
+    // direct access to velocities as a vector of 6 + num_links elements.
+    // (omega first, then v, then joint velocities.)
+    //
+    const btScalar * getVelocityVector() const 
+	{ 
+		return &m_real_buf[0]; 
+	}
+/*    btScalar * getVelocityVector() 
+	{ 
+		return &real_buf[0]; 
+	}
+  */  
+
+    //
+    // get the frames of reference (positions and orientations) of the child links
+    // (i = 0 to num_links-1)
+    //
+
+    const btVector3 & getRVector(int i) const;   // vector from COM(parent(i)) to COM(i), in frame i's coords
+    const btQuaternion & getParentToLocalRot(int i) const;   // rotates vectors in frame parent(i) to vectors in frame i.
+
+
+    //
+    // transform vectors in local frame of link i to world frame (or vice versa)
+    //
+    btVector3 localPosToWorld(int i, const btVector3 &vec) const;
+    btVector3 localDirToWorld(int i, const btVector3 &vec) const;
+    btVector3 worldPosToLocal(int i, const btVector3 &vec) const;
+    btVector3 worldDirToLocal(int i, const btVector3 &vec) const;
+    
+
+    //
+    // calculate kinetic energy and angular momentum
+    // useful for debugging.
+    //
+
+    btScalar getKineticEnergy() const;
+    btVector3 getAngularMomentum() const;
+    
+
+    //
+    // set external forces and torques. Note all external forces/torques are given in the WORLD frame.
+    //
+
+    void clearForcesAndTorques();
+	void clearVelocities();
+
+    void addBaseForce(const btVector3 &f) 
+	{ 
+		base_force += f; 
+	}
+    void addBaseTorque(const btVector3 &t) { base_torque += t; }
+    void addLinkForce(int i, const btVector3 &f);
+    void addLinkTorque(int i, const btVector3 &t);
+    void addJointTorque(int i, btScalar Q);
+
+    const btVector3 & getBaseForce() const { return base_force; }
+    const btVector3 & getBaseTorque() const { return base_torque; }
+    const btVector3 & getLinkForce(int i) const;
+    const btVector3 & getLinkTorque(int i) const;
+    btScalar getJointTorque(int i) const;
+
+
+    //
+    // dynamics routines.
+    //
+
+    // timestep the velocities (given the external forces/torques set using addBaseForce etc).
+    // also sets up caches for calcAccelerationDeltas.
+    //
+    // Note: the caller must provide three vectors which are used as
+    // temporary scratch space. The idea here is to reduce dynamic
+    // memory allocation: the same scratch vectors can be re-used
+    // again and again for different Multibodies, instead of each
+    // btMultiBody allocating (and then deallocating) their own
+    // individual scratch buffers. This gives a considerable speed
+    // improvement, at least on Windows (where dynamic memory
+    // allocation appears to be fairly slow).
+    //
+    void stepVelocities(btScalar dt,
+                        btAlignedObjectArray<btScalar> &scratch_r,
+                        btAlignedObjectArray<btVector3> &scratch_v,
+                        btAlignedObjectArray<btMatrix3x3> &scratch_m);
+
+    // calcAccelerationDeltas
+    // input: force vector (in same format as jacobian, i.e.:
+    //                      3 torque values, 3 force values, num_links joint torque values)
+    // output: 3 omegadot values, 3 vdot values, num_links q_double_dot values
+    // (existing contents of output array are replaced)
+    // stepVelocities must have been called first.
+    void calcAccelerationDeltas(const btScalar *force, btScalar *output,
+                                btAlignedObjectArray<btScalar> &scratch_r,
+                                btAlignedObjectArray<btVector3> &scratch_v) const;
+
+    // apply a delta-vee directly. used in sequential impulses code.
+    void applyDeltaVee(const btScalar * delta_vee) 
+	{
+
+        for (int i = 0; i < 6 + getNumLinks(); ++i) 
+		{
+			m_real_buf[i] += delta_vee[i];
+		}
+		
+    }
+    void applyDeltaVee(const btScalar * delta_vee, btScalar multiplier) 
+	{
+		btScalar sum = 0;
+        for (int i = 0; i < 6 + getNumLinks(); ++i)
+		{
+			sum += delta_vee[i]*multiplier*delta_vee[i]*multiplier;
+		}
+		btScalar l = btSqrt(sum);
+		/*
+		static btScalar maxl = -1e30f;
+		if (l>maxl)
+		{
+			maxl=l;
+	//		printf("maxl=%f\n",maxl);
+		}
+		*/
+		if (l>m_maxAppliedImpulse)
+		{
+//			printf("exceeds 100: l=%f\n",maxl);
+			multiplier *= m_maxAppliedImpulse/l;
+		}
+
+        for (int i = 0; i < 6 + getNumLinks(); ++i)
+		{
+			sum += delta_vee[i]*multiplier*delta_vee[i]*multiplier;
+			m_real_buf[i] += delta_vee[i] * multiplier;
+		}
+    }
+
+    // timestep the positions (given current velocities).
+    void stepPositions(btScalar dt);
+
+
+    //
+    // contacts
+    //
+
+    // This routine fills out a contact constraint jacobian for this body.
+    // the 'normal' supplied must be -n for body1 or +n for body2 of the contact.
+    // 'normal' & 'contact_point' are both given in world coordinates.
+    void fillContactJacobian(int link,
+                             const btVector3 &contact_point,
+                             const btVector3 &normal,
+                             btScalar *jac,
+                             btAlignedObjectArray<btScalar> &scratch_r,
+                             btAlignedObjectArray<btVector3> &scratch_v,
+                             btAlignedObjectArray<btMatrix3x3> &scratch_m) const;
+
+
+    //
+    // sleeping
+    //
+	void	setCanSleep(bool canSleep)
+	{
+		can_sleep = canSleep;
+	}
+
+    bool isAwake() const { return awake; }
+    void wakeUp();
+    void goToSleep();
+    void checkMotionAndSleepIfRequired(btScalar timestep);
+    
+	bool hasFixedBase() const
+	{
+		    return fixed_base;
+	}
+
+	int getCompanionId() const
+	{
+		return m_companionId;
+	}
+	void setCompanionId(int id)
+	{
+		//printf("for %p setCompanionId(%d)\n",this, id);
+		m_companionId = id;
+	}
+
+	void setNumLinks(int numLinks)//careful: when changing the number of links, make sure to re-initialize or update existing links
+	{
+		links.resize(numLinks);
+	}
+
+	btScalar getLinearDamping() const
+	{
+			return m_linearDamping;
+	}
+	void setLinearDamping( btScalar damp)
+	{
+		m_linearDamping = damp;
+	}
+	btScalar getAngularDamping() const
+	{
+		return m_angularDamping;
+	}
+		
+	bool getUseGyroTerm() const
+	{
+		return m_useGyroTerm;
+	}
+	void setUseGyroTerm(bool useGyro)
+	{
+		m_useGyroTerm = useGyro;
+	}
+	btScalar	getMaxAppliedImpulse() const
+	{
+		return m_maxAppliedImpulse;
+	}
+	void	setMaxAppliedImpulse(btScalar maxImp)
+	{
+		m_maxAppliedImpulse = maxImp;
+	}
+
+	void	setHasSelfCollision(bool hasSelfCollision)
+	{
+		m_hasSelfCollision = hasSelfCollision;
+	}
+	bool hasSelfCollision() const
+	{
+		return m_hasSelfCollision;
+	}
+
+private:
+    btMultiBody(const btMultiBody &);  // not implemented
+    void operator=(const btMultiBody &);  // not implemented
+
+    void compTreeLinkVelocities(btVector3 *omega, btVector3 *vel) const;
+
+	void solveImatrix(const btVector3& rhs_top, const btVector3& rhs_bot, float result[6]) const;
+    
+	
+private:
+
+	btMultiBodyLinkCollider* m_baseCollider;//can be NULL
+
+    btVector3 base_pos;       // position of COM of base (world frame)
+    btQuaternion base_quat;   // rotates world points into base frame
+
+    btScalar base_mass;         // mass of the base
+    btVector3 base_inertia;   // inertia of the base (in local frame; diagonal)
+
+    btVector3 base_force;     // external force applied to base. World frame.
+    btVector3 base_torque;    // external torque applied to base. World frame.
+    
+    btAlignedObjectArray<btMultibodyLink> links;    // array of links, excluding the base. index from 0 to num_links-1.
+	btAlignedObjectArray<btMultiBodyLinkCollider*> m_colliders;
+    
+    //
+    // real_buf:
+    //  offset         size            array
+    //   0              6 + num_links   v (base_omega; base_vel; joint_vels)
+    //   6+num_links    num_links       D
+    //
+    // vector_buf:
+    //  offset         size         array
+    //   0              num_links    h_top
+    //   num_links      num_links    h_bottom
+    //
+    // matrix_buf:
+    //  offset         size         array
+    //   0              num_links+1  rot_from_parent
+    //
+    
+    btAlignedObjectArray<btScalar> m_real_buf;
+    btAlignedObjectArray<btVector3> vector_buf;
+    btAlignedObjectArray<btMatrix3x3> matrix_buf;
+
+    //std::auto_ptr<Eigen::LU<Eigen::Matrix<btScalar, 6, 6> > > cached_imatrix_lu;
+
+	btMatrix3x3 cached_inertia_top_left;
+	btMatrix3x3 cached_inertia_top_right;
+	btMatrix3x3 cached_inertia_lower_left;
+	btMatrix3x3 cached_inertia_lower_right;
+
+    bool fixed_base;
+
+    // Sleep parameters.
+    bool awake;
+    bool can_sleep;
+    btScalar sleep_timer;
+
+	int	m_companionId;
+	btScalar	m_linearDamping;
+	btScalar	m_angularDamping;
+	bool	m_useGyroTerm;
+	btScalar	m_maxAppliedImpulse;
+	bool		m_hasSelfCollision;
+};
+
+#endif

+ 527 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp

@@ -0,0 +1,527 @@
+#include "btMultiBodyConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
+btMultiBodyConstraint::btMultiBodyConstraint(btMultiBody* bodyA,btMultiBody* bodyB,int linkA, int linkB, int numRows, bool isUnilateral)
+	:m_bodyA(bodyA),
+	m_bodyB(bodyB),
+	m_linkA(linkA),
+	m_linkB(linkB),
+	m_num_rows(numRows),
+	m_isUnilateral(isUnilateral),
+	m_maxAppliedImpulse(100)
+{
+	m_jac_size_A = (6 + bodyA->getNumLinks());
+	m_jac_size_both = (m_jac_size_A + (bodyB ? 6 + bodyB->getNumLinks() : 0));
+	m_pos_offset = ((1 + m_jac_size_both)*m_num_rows);
+	m_data.resize((2 + m_jac_size_both) * m_num_rows);
+}
+
+btMultiBodyConstraint::~btMultiBodyConstraint()
+{
+}
+
+
+
+btScalar btMultiBodyConstraint::fillConstraintRowMultiBodyMultiBody(btMultiBodySolverConstraint& constraintRow,
+														btMultiBodyJacobianData& data,
+														btScalar* jacOrgA,btScalar* jacOrgB,
+														const btContactSolverInfo& infoGlobal,
+														btScalar desiredVelocity,
+														btScalar lowerLimit,
+														btScalar upperLimit)
+{
+			
+	
+	
+	constraintRow.m_multiBodyA = m_bodyA;
+	constraintRow.m_multiBodyB = m_bodyB;
+
+	btMultiBody* multiBodyA = constraintRow.m_multiBodyA;
+	btMultiBody* multiBodyB = constraintRow.m_multiBodyB;
+
+	if (multiBodyA)
+	{
+		
+		const int ndofA  = multiBodyA->getNumLinks() + 6;
+
+		constraintRow.m_deltaVelAindex = multiBodyA->getCompanionId();
+
+		if (constraintRow.m_deltaVelAindex <0)
+		{
+			constraintRow.m_deltaVelAindex = data.m_deltaVelocities.size();
+			multiBodyA->setCompanionId(constraintRow.m_deltaVelAindex);
+			data.m_deltaVelocities.resize(data.m_deltaVelocities.size()+ndofA);
+		} else
+		{
+			btAssert(data.m_deltaVelocities.size() >= constraintRow.m_deltaVelAindex+ndofA);
+		}
+
+		constraintRow.m_jacAindex = data.m_jacobians.size();
+		data.m_jacobians.resize(data.m_jacobians.size()+ndofA);
+		data.m_deltaVelocitiesUnitImpulse.resize(data.m_deltaVelocitiesUnitImpulse.size()+ndofA);
+		btAssert(data.m_jacobians.size() == data.m_deltaVelocitiesUnitImpulse.size());
+		for (int i=0;i<ndofA;i++)
+			data.m_jacobians[constraintRow.m_jacAindex+i] = jacOrgA[i];
+		
+		btScalar* delta = &data.m_deltaVelocitiesUnitImpulse[constraintRow.m_jacAindex];
+		multiBodyA->calcAccelerationDeltas(&data.m_jacobians[constraintRow.m_jacAindex],delta,data.scratch_r, data.scratch_v);
+	} 
+
+	if (multiBodyB)
+	{
+		const int ndofB  = multiBodyB->getNumLinks() + 6;
+
+		constraintRow.m_deltaVelBindex = multiBodyB->getCompanionId();
+		if (constraintRow.m_deltaVelBindex <0)
+		{
+			constraintRow.m_deltaVelBindex = data.m_deltaVelocities.size();
+			multiBodyB->setCompanionId(constraintRow.m_deltaVelBindex);
+			data.m_deltaVelocities.resize(data.m_deltaVelocities.size()+ndofB);
+		}
+
+		constraintRow.m_jacBindex = data.m_jacobians.size();
+		data.m_jacobians.resize(data.m_jacobians.size()+ndofB);
+
+		for (int i=0;i<ndofB;i++)
+			data.m_jacobians[constraintRow.m_jacBindex+i] = jacOrgB[i];
+
+		data.m_deltaVelocitiesUnitImpulse.resize(data.m_deltaVelocitiesUnitImpulse.size()+ndofB);
+		btAssert(data.m_jacobians.size() == data.m_deltaVelocitiesUnitImpulse.size());
+		multiBodyB->calcAccelerationDeltas(&data.m_jacobians[constraintRow.m_jacBindex],&data.m_deltaVelocitiesUnitImpulse[constraintRow.m_jacBindex],data.scratch_r, data.scratch_v);
+	} 
+	{
+						
+		btVector3 vec;
+		btScalar denom0 = 0.f;
+		btScalar denom1 = 0.f;
+		btScalar* jacB = 0;
+		btScalar* jacA = 0;
+		btScalar* lambdaA =0;
+		btScalar* lambdaB =0;
+		int ndofA  = 0;
+		if (multiBodyA)
+		{
+			ndofA  = multiBodyA->getNumLinks() + 6;
+			jacA = &data.m_jacobians[constraintRow.m_jacAindex];
+			lambdaA = &data.m_deltaVelocitiesUnitImpulse[constraintRow.m_jacAindex];
+			for (int i = 0; i < ndofA; ++i)
+			{
+				btScalar j = jacA[i] ;
+				btScalar l =lambdaA[i];
+				denom0 += j*l;
+			}
+		} 
+		if (multiBodyB)
+		{
+			const int ndofB  = multiBodyB->getNumLinks() + 6;
+			jacB = &data.m_jacobians[constraintRow.m_jacBindex];
+			lambdaB = &data.m_deltaVelocitiesUnitImpulse[constraintRow.m_jacBindex];
+			for (int i = 0; i < ndofB; ++i)
+			{
+				btScalar j = jacB[i] ;
+				btScalar l =lambdaB[i];
+				denom1 += j*l;
+			}
+
+		} 
+
+		 if (multiBodyA && (multiBodyA==multiBodyB))
+		 {
+            // ndof1 == ndof2 in this case
+            for (int i = 0; i < ndofA; ++i) 
+			{
+                denom1 += jacB[i] * lambdaA[i];
+                denom1 += jacA[i] * lambdaB[i];
+            }
+        }
+
+		 btScalar d = denom0+denom1;
+		 if (btFabs(d)>SIMD_EPSILON)
+		 {
+			 
+			 constraintRow.m_jacDiagABInv = 1.f/(d);
+		 } else
+		 {
+			constraintRow.m_jacDiagABInv  = 1.f;
+		 }
+		
+	}
+
+	
+	//compute rhs and remaining constraintRow fields
+
+	
+
+
+	btScalar rel_vel = 0.f;
+	int ndofA  = 0;
+	int ndofB  = 0;
+	{
+
+		btVector3 vel1,vel2;
+		if (multiBodyA)
+		{
+			ndofA  = multiBodyA->getNumLinks() + 6;
+			btScalar* jacA = &data.m_jacobians[constraintRow.m_jacAindex];
+			for (int i = 0; i < ndofA ; ++i) 
+				rel_vel += multiBodyA->getVelocityVector()[i] * jacA[i];
+		} 
+		if (multiBodyB)
+		{
+			ndofB  = multiBodyB->getNumLinks() + 6;
+			btScalar* jacB = &data.m_jacobians[constraintRow.m_jacBindex];
+			for (int i = 0; i < ndofB ; ++i) 
+				rel_vel += multiBodyB->getVelocityVector()[i] * jacB[i];
+
+		}
+
+		constraintRow.m_friction = 0.f;
+
+		constraintRow.m_appliedImpulse = 0.f;
+		constraintRow.m_appliedPushImpulse = 0.f;
+		
+		btScalar	velocityError =  desiredVelocity - rel_vel;// * damping;
+
+		btScalar erp = infoGlobal.m_erp2;
+
+		btScalar velocityImpulse = velocityError *constraintRow.m_jacDiagABInv;
+
+		if (!infoGlobal.m_splitImpulse)
+		{
+			//combine position and velocity into rhs
+			constraintRow.m_rhs = velocityImpulse;
+			constraintRow.m_rhsPenetration = 0.f;
+
+		} else
+		{
+			//split position and velocity into rhs and m_rhsPenetration
+			constraintRow.m_rhs = velocityImpulse;
+			constraintRow.m_rhsPenetration = 0.f;
+		}
+
+
+		constraintRow.m_cfm = 0.f;
+		constraintRow.m_lowerLimit = lowerLimit;
+		constraintRow.m_upperLimit = upperLimit;
+
+	}
+	return rel_vel;
+}
+
+
+void	btMultiBodyConstraint::applyDeltaVee(btMultiBodyJacobianData& data, btScalar* delta_vee, btScalar impulse, int velocityIndex, int ndof)
+{
+	for (int i = 0; i < ndof; ++i) 
+		data.m_deltaVelocities[velocityIndex+i] += delta_vee[i] * impulse;
+}
+
+
+void btMultiBodyConstraint::fillMultiBodyConstraintMixed(btMultiBodySolverConstraint& solverConstraint, 
+																	btMultiBodyJacobianData& data,
+																 const btVector3& contactNormalOnB,
+																 const btVector3& posAworld, const btVector3& posBworld, 
+																 btScalar position,
+																 const btContactSolverInfo& infoGlobal,
+																 btScalar& relaxation,
+																 bool isFriction, btScalar desiredVelocity, btScalar cfmSlip)
+{
+			
+	
+	btVector3 rel_pos1 = posAworld;
+	btVector3 rel_pos2 = posBworld;
+
+	solverConstraint.m_multiBodyA = m_bodyA;
+	solverConstraint.m_multiBodyB = m_bodyB;
+	solverConstraint.m_linkA = m_linkA;
+	solverConstraint.m_linkB = m_linkB;
+	
+
+	btMultiBody* multiBodyA = solverConstraint.m_multiBodyA;
+	btMultiBody* multiBodyB = solverConstraint.m_multiBodyB;
+
+	const btVector3& pos1 = posAworld;
+	const btVector3& pos2 = posBworld;
+
+	btSolverBody* bodyA = multiBodyA ? 0 : &data.m_solverBodyPool->at(solverConstraint.m_solverBodyIdA);
+	btSolverBody* bodyB = multiBodyB ? 0 : &data.m_solverBodyPool->at(solverConstraint.m_solverBodyIdB);
+
+	btRigidBody* rb0 = multiBodyA ? 0 : bodyA->m_originalBody;
+	btRigidBody* rb1 = multiBodyB ? 0 : bodyB->m_originalBody;
+
+	if (bodyA)
+		rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin(); 
+	if (bodyB)
+		rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin();
+
+	relaxation = 1.f;
+
+	if (multiBodyA)
+	{
+		const int ndofA  = multiBodyA->getNumLinks() + 6;
+
+		solverConstraint.m_deltaVelAindex = multiBodyA->getCompanionId();
+
+		if (solverConstraint.m_deltaVelAindex <0)
+		{
+			solverConstraint.m_deltaVelAindex = data.m_deltaVelocities.size();
+			multiBodyA->setCompanionId(solverConstraint.m_deltaVelAindex);
+			data.m_deltaVelocities.resize(data.m_deltaVelocities.size()+ndofA);
+		} else
+		{
+			btAssert(data.m_deltaVelocities.size() >= solverConstraint.m_deltaVelAindex+ndofA);
+		}
+
+		solverConstraint.m_jacAindex = data.m_jacobians.size();
+		data.m_jacobians.resize(data.m_jacobians.size()+ndofA);
+		data.m_deltaVelocitiesUnitImpulse.resize(data.m_deltaVelocitiesUnitImpulse.size()+ndofA);
+		btAssert(data.m_jacobians.size() == data.m_deltaVelocitiesUnitImpulse.size());
+
+		btScalar* jac1=&data.m_jacobians[solverConstraint.m_jacAindex];
+		multiBodyA->fillContactJacobian(solverConstraint.m_linkA, posAworld, contactNormalOnB, jac1, data.scratch_r, data.scratch_v, data.scratch_m);
+		btScalar* delta = &data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
+		multiBodyA->calcAccelerationDeltas(&data.m_jacobians[solverConstraint.m_jacAindex],delta,data.scratch_r, data.scratch_v);
+	} else
+	{
+		btVector3 torqueAxis0 = rel_pos1.cross(contactNormalOnB);
+		solverConstraint.m_angularComponentA = rb0 ? rb0->getInvInertiaTensorWorld()*torqueAxis0*rb0->getAngularFactor() : btVector3(0,0,0);
+		solverConstraint.m_relpos1CrossNormal = torqueAxis0;
+		solverConstraint.m_contactNormal1 = contactNormalOnB;
+	}
+
+	if (multiBodyB)
+	{
+		const int ndofB  = multiBodyB->getNumLinks() + 6;
+
+		solverConstraint.m_deltaVelBindex = multiBodyB->getCompanionId();
+		if (solverConstraint.m_deltaVelBindex <0)
+		{
+			solverConstraint.m_deltaVelBindex = data.m_deltaVelocities.size();
+			multiBodyB->setCompanionId(solverConstraint.m_deltaVelBindex);
+			data.m_deltaVelocities.resize(data.m_deltaVelocities.size()+ndofB);
+		}
+
+		solverConstraint.m_jacBindex = data.m_jacobians.size();
+
+		data.m_jacobians.resize(data.m_jacobians.size()+ndofB);
+		data.m_deltaVelocitiesUnitImpulse.resize(data.m_deltaVelocitiesUnitImpulse.size()+ndofB);
+		btAssert(data.m_jacobians.size() == data.m_deltaVelocitiesUnitImpulse.size());
+
+		multiBodyB->fillContactJacobian(solverConstraint.m_linkB, posBworld, -contactNormalOnB, &data.m_jacobians[solverConstraint.m_jacBindex], data.scratch_r, data.scratch_v, data.scratch_m);
+		multiBodyB->calcAccelerationDeltas(&data.m_jacobians[solverConstraint.m_jacBindex],&data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex],data.scratch_r, data.scratch_v);
+	} else
+	{
+		btVector3 torqueAxis1 = rel_pos2.cross(contactNormalOnB);		
+		solverConstraint.m_angularComponentB = rb1 ? rb1->getInvInertiaTensorWorld()*-torqueAxis1*rb1->getAngularFactor() : btVector3(0,0,0);
+		solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
+		solverConstraint.m_contactNormal2 = -contactNormalOnB;
+	}
+
+	{
+						
+		btVector3 vec;
+		btScalar denom0 = 0.f;
+		btScalar denom1 = 0.f;
+		btScalar* jacB = 0;
+		btScalar* jacA = 0;
+		btScalar* lambdaA =0;
+		btScalar* lambdaB =0;
+		int ndofA  = 0;
+		if (multiBodyA)
+		{
+			ndofA  = multiBodyA->getNumLinks() + 6;
+			jacA = &data.m_jacobians[solverConstraint.m_jacAindex];
+			lambdaA = &data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
+			for (int i = 0; i < ndofA; ++i)
+			{
+				btScalar j = jacA[i] ;
+				btScalar l =lambdaA[i];
+				denom0 += j*l;
+			}
+		} else
+		{
+			if (rb0)
+			{
+				vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
+				denom0 = rb0->getInvMass() + contactNormalOnB.dot(vec);
+			}
+		}
+		if (multiBodyB)
+		{
+			const int ndofB  = multiBodyB->getNumLinks() + 6;
+			jacB = &data.m_jacobians[solverConstraint.m_jacBindex];
+			lambdaB = &data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex];
+			for (int i = 0; i < ndofB; ++i)
+			{
+				btScalar j = jacB[i] ;
+				btScalar l =lambdaB[i];
+				denom1 += j*l;
+			}
+
+		} else
+		{
+			if (rb1)
+			{
+				vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
+				denom1 = rb1->getInvMass() + contactNormalOnB.dot(vec);
+			}
+		}
+
+		 if (multiBodyA && (multiBodyA==multiBodyB))
+		 {
+            // ndof1 == ndof2 in this case
+            for (int i = 0; i < ndofA; ++i) 
+			{
+                denom1 += jacB[i] * lambdaA[i];
+                denom1 += jacA[i] * lambdaB[i];
+            }
+        }
+
+		 btScalar d = denom0+denom1;
+		 if (btFabs(d)>SIMD_EPSILON)
+		 {
+			 
+			 solverConstraint.m_jacDiagABInv = relaxation/(d);
+		 } else
+		 {
+			solverConstraint.m_jacDiagABInv  = 1.f;
+		 }
+		
+	}
+
+	
+	//compute rhs and remaining solverConstraint fields
+
+	
+
+	btScalar restitution = 0.f;
+	btScalar penetration = isFriction? 0 : position+infoGlobal.m_linearSlop;
+
+	btScalar rel_vel = 0.f;
+	int ndofA  = 0;
+	int ndofB  = 0;
+	{
+
+		btVector3 vel1,vel2;
+		if (multiBodyA)
+		{
+			ndofA  = multiBodyA->getNumLinks() + 6;
+			btScalar* jacA = &data.m_jacobians[solverConstraint.m_jacAindex];
+			for (int i = 0; i < ndofA ; ++i) 
+				rel_vel += multiBodyA->getVelocityVector()[i] * jacA[i];
+		} else
+		{
+			if (rb0)
+			{
+				rel_vel += rb0->getVelocityInLocalPoint(rel_pos1).dot(solverConstraint.m_contactNormal1);
+			}
+		}
+		if (multiBodyB)
+		{
+			ndofB  = multiBodyB->getNumLinks() + 6;
+			btScalar* jacB = &data.m_jacobians[solverConstraint.m_jacBindex];
+			for (int i = 0; i < ndofB ; ++i) 
+				rel_vel += multiBodyB->getVelocityVector()[i] * jacB[i];
+
+		} else
+		{
+			if (rb1)
+			{
+				rel_vel += rb1->getVelocityInLocalPoint(rel_pos2).dot(solverConstraint.m_contactNormal2);
+			}
+		}
+
+		solverConstraint.m_friction = 0.f;//cp.m_combinedFriction;
+
+				
+		restitution =  restitution * -rel_vel;//restitutionCurve(rel_vel, cp.m_combinedRestitution);
+		if (restitution <= btScalar(0.))
+		{
+			restitution = 0.f;
+		};
+	}
+
+
+	///warm starting (or zero if disabled)
+	/*
+	if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+	{
+		solverConstraint.m_appliedImpulse = isFriction ? 0 : cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;
+
+		if (solverConstraint.m_appliedImpulse)
+		{
+			if (multiBodyA)
+			{
+				btScalar impulse = solverConstraint.m_appliedImpulse;
+				btScalar* deltaV = &data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
+				multiBodyA->applyDeltaVee(deltaV,impulse);
+				applyDeltaVee(data,deltaV,impulse,solverConstraint.m_deltaVelAindex,ndofA);
+			} else
+			{
+				if (rb0)
+					bodyA->internalApplyImpulse(solverConstraint.m_contactNormal1*bodyA->internalGetInvMass()*rb0->getLinearFactor(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
+			}
+			if (multiBodyB)
+			{
+				btScalar impulse = solverConstraint.m_appliedImpulse;
+				btScalar* deltaV = &data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex];
+				multiBodyB->applyDeltaVee(deltaV,impulse);
+				applyDeltaVee(data,deltaV,impulse,solverConstraint.m_deltaVelBindex,ndofB);
+			} else
+			{
+				if (rb1)
+					bodyB->internalApplyImpulse(-solverConstraint.m_contactNormal2*bodyB->internalGetInvMass()*rb1->getLinearFactor(),-solverConstraint.m_angularComponentB,-(btScalar)solverConstraint.m_appliedImpulse);
+			}
+		}
+	} else
+	*/
+	{
+		solverConstraint.m_appliedImpulse = 0.f;
+	}
+
+	solverConstraint.m_appliedPushImpulse = 0.f;
+
+	{
+		
+
+		btScalar positionalError = 0.f;
+		btScalar	velocityError = restitution - rel_vel;// * damping;
+					
+
+		btScalar erp = infoGlobal.m_erp2;
+		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+		{
+			erp = infoGlobal.m_erp;
+		}
+
+		if (penetration>0)
+		{
+			positionalError = 0;
+			velocityError = -penetration / infoGlobal.m_timeStep;
+
+		} else
+		{
+			positionalError = -penetration * erp/infoGlobal.m_timeStep;
+		}
+
+		btScalar  penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
+		btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+
+		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+		{
+			//combine position and velocity into rhs
+			solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
+			solverConstraint.m_rhsPenetration = 0.f;
+
+		} else
+		{
+			//split position and velocity into rhs and m_rhsPenetration
+			solverConstraint.m_rhs = velocityImpulse;
+			solverConstraint.m_rhsPenetration = penetrationImpulse;
+		}
+
+		solverConstraint.m_cfm = 0.f;
+		solverConstraint.m_lowerLimit = -m_maxAppliedImpulse;
+		solverConstraint.m_upperLimit = m_maxAppliedImpulse;
+	}
+
+}

+ 166 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraint.h

@@ -0,0 +1,166 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTIBODY_CONSTRAINT_H
+#define BT_MULTIBODY_CONSTRAINT_H
+
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "btMultiBody.h"
+
+class btMultiBody;
+struct btSolverInfo;
+
+#include "btMultiBodySolverConstraint.h"
+
+struct btMultiBodyJacobianData
+{
+	btAlignedObjectArray<btScalar>		m_jacobians;
+	btAlignedObjectArray<btScalar>		m_deltaVelocitiesUnitImpulse;
+	btAlignedObjectArray<btScalar>		m_deltaVelocities;
+	btAlignedObjectArray<btScalar>		scratch_r;
+	btAlignedObjectArray<btVector3>		scratch_v;
+	btAlignedObjectArray<btMatrix3x3>	scratch_m;
+	btAlignedObjectArray<btSolverBody>*	m_solverBodyPool;
+	int									m_fixedBodyId;
+
+};
+
+
+class btMultiBodyConstraint
+{
+protected:
+
+	btMultiBody*	m_bodyA;
+    btMultiBody*	m_bodyB;
+    int				m_linkA;
+    int				m_linkB;
+
+    int				m_num_rows;
+    int				m_jac_size_A;
+    int				m_jac_size_both;
+    int				m_pos_offset;
+
+	bool			m_isUnilateral;
+
+	btScalar		m_maxAppliedImpulse;
+
+
+    // data block laid out as follows:
+    // cached impulses. (one per row.)
+    // jacobians. (interleaved, row1 body1 then row1 body2 then row2 body 1 etc)
+    // positions. (one per row.)
+    btAlignedObjectArray<btScalar> m_data;
+
+	void	applyDeltaVee(btMultiBodyJacobianData& data, btScalar* delta_vee, btScalar impulse, int velocityIndex, int ndof);
+
+	void fillMultiBodyConstraintMixed(btMultiBodySolverConstraint& solverConstraint, 
+																	btMultiBodyJacobianData& data,
+																 const btVector3& contactNormalOnB,
+																 const btVector3& posAworld, const btVector3& posBworld, 
+																 btScalar position,
+																 const btContactSolverInfo& infoGlobal,
+																 btScalar& relaxation,
+																 bool isFriction, btScalar desiredVelocity=0, btScalar cfmSlip=0);
+
+		btScalar fillConstraintRowMultiBodyMultiBody(btMultiBodySolverConstraint& constraintRow,
+														btMultiBodyJacobianData& data,
+														btScalar* jacOrgA,btScalar* jacOrgB,
+														const btContactSolverInfo& infoGlobal,
+														btScalar desiredVelocity,
+														btScalar lowerLimit,
+														btScalar upperLimit);
+
+public:
+
+	btMultiBodyConstraint(btMultiBody* bodyA,btMultiBody* bodyB,int linkA, int linkB, int numRows, bool isUnilateral);
+	virtual ~btMultiBodyConstraint();
+
+
+
+	virtual int getIslandIdA() const =0;
+	virtual int getIslandIdB() const =0;
+	
+	virtual void createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal)=0;
+
+	int	getNumRows() const
+	{
+		return m_num_rows;
+	}
+
+	btMultiBody*	getMultiBodyA()
+	{
+		return m_bodyA;
+	}
+    btMultiBody*	getMultiBodyB()
+	{
+		return m_bodyB;
+	}
+
+	// current constraint position
+    // constraint is pos >= 0 for unilateral, or pos = 0 for bilateral
+    // NOTE: ignored position for friction rows.
+    btScalar getPosition(int row) const 
+	{ 
+		return m_data[m_pos_offset + row]; 
+	}
+
+    void setPosition(int row, btScalar pos) 
+	{ 
+		m_data[m_pos_offset + row] = pos; 
+	}
+
+	
+	bool isUnilateral() const
+	{
+		return m_isUnilateral;
+	}
+
+	// jacobian blocks.
+    // each of size 6 + num_links. (jacobian2 is null if no body2.)
+    // format: 3 'omega' coefficients, 3 'v' coefficients, then the 'qdot' coefficients.
+    btScalar* jacobianA(int row) 
+	{ 
+		return &m_data[m_num_rows + row * m_jac_size_both]; 
+	}
+    const btScalar* jacobianA(int row) const 
+	{ 
+		return &m_data[m_num_rows + (row * m_jac_size_both)]; 
+	}
+    btScalar* jacobianB(int row) 
+	{ 
+		return &m_data[m_num_rows + (row * m_jac_size_both) + m_jac_size_A]; 
+	}
+    const btScalar* jacobianB(int row) const 
+	{ 
+		return &m_data[m_num_rows + (row * m_jac_size_both) + m_jac_size_A]; 
+	}
+
+	btScalar	getMaxAppliedImpulse() const
+	{
+		return m_maxAppliedImpulse;
+	}
+	void	setMaxAppliedImpulse(btScalar maxImp)
+	{
+		m_maxAppliedImpulse = maxImp;
+	}
+	
+
+};
+
+#endif //BT_MULTIBODY_CONSTRAINT_H
+

+ 795 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp

@@ -0,0 +1,795 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btMultiBodyConstraintSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "btMultiBodyLinkCollider.h"
+
+#include "BulletDynamics/ConstraintSolver/btSolverBody.h"
+#include "btMultiBodyConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+
+#include "LinearMath/btQuickprof.h"
+
+btScalar btMultiBodyConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
+{
+	btScalar val = btSequentialImpulseConstraintSolver::solveSingleIteration(iteration, bodies ,numBodies,manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer);
+	
+	//solve featherstone non-contact constraints
+
+	//printf("m_multiBodyNonContactConstraints = %d\n",m_multiBodyNonContactConstraints.size());
+	for (int j=0;j<m_multiBodyNonContactConstraints.size();j++)
+	{
+		btMultiBodySolverConstraint& constraint = m_multiBodyNonContactConstraints[j];
+		//if (iteration < constraint.m_overrideNumSolverIterations)
+			//resolveSingleConstraintRowGenericMultiBody(constraint);
+		resolveSingleConstraintRowGeneric(constraint);
+	}
+
+	//solve featherstone normal contact
+	for (int j=0;j<m_multiBodyNormalContactConstraints.size();j++)
+	{
+		btMultiBodySolverConstraint& constraint = m_multiBodyNormalContactConstraints[j];
+		if (iteration < infoGlobal.m_numIterations)
+			resolveSingleConstraintRowGeneric(constraint);
+	}
+	
+	//solve featherstone frictional contact
+
+	for (int j=0;j<this->m_multiBodyFrictionContactConstraints.size();j++)
+	{
+		if (iteration < infoGlobal.m_numIterations)
+		{
+			btMultiBodySolverConstraint& frictionConstraint = m_multiBodyFrictionContactConstraints[j];
+			btScalar totalImpulse = m_multiBodyNormalContactConstraints[frictionConstraint.m_frictionIndex].m_appliedImpulse;
+			//adjust friction limits here
+			if (totalImpulse>btScalar(0))
+			{
+				frictionConstraint.m_lowerLimit = -(frictionConstraint.m_friction*totalImpulse);
+				frictionConstraint.m_upperLimit = frictionConstraint.m_friction*totalImpulse;
+				resolveSingleConstraintRowGeneric(frictionConstraint);
+			}
+		}
+	}
+	return val;
+}
+
+btScalar btMultiBodyConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
+{
+	m_multiBodyNonContactConstraints.resize(0);
+	m_multiBodyNormalContactConstraints.resize(0);
+	m_multiBodyFrictionContactConstraints.resize(0);
+	m_data.m_jacobians.resize(0);
+	m_data.m_deltaVelocitiesUnitImpulse.resize(0);
+	m_data.m_deltaVelocities.resize(0);
+
+	for (int i=0;i<numBodies;i++)
+	{
+		const btMultiBodyLinkCollider* fcA = btMultiBodyLinkCollider::upcast(bodies[i]);
+		if (fcA)
+		{
+			fcA->m_multiBody->setCompanionId(-1);
+		}
+	}
+
+	btScalar val = btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup( bodies,numBodies,manifoldPtr, numManifolds, constraints,numConstraints,infoGlobal,debugDrawer);
+
+	return val;
+}
+
+void	btMultiBodyConstraintSolver::applyDeltaVee(btScalar* delta_vee, btScalar impulse, int velocityIndex, int ndof)
+{
+    for (int i = 0; i < ndof; ++i) 
+		m_data.m_deltaVelocities[velocityIndex+i] += delta_vee[i] * impulse;
+}
+
+void btMultiBodyConstraintSolver::resolveSingleConstraintRowGeneric(const btMultiBodySolverConstraint& c)
+{
+
+	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
+	btScalar deltaVelADotn=0;
+	btScalar deltaVelBDotn=0;
+	btSolverBody* bodyA = 0;
+	btSolverBody* bodyB = 0;
+	int ndofA=0;
+	int ndofB=0;
+
+	if (c.m_multiBodyA)
+	{
+		ndofA  = c.m_multiBodyA->getNumLinks() + 6;
+		for (int i = 0; i < ndofA; ++i) 
+			deltaVelADotn += m_data.m_jacobians[c.m_jacAindex+i] * m_data.m_deltaVelocities[c.m_deltaVelAindex+i];
+	} else
+	{
+		bodyA = &m_tmpSolverBodyPool[c.m_solverBodyIdA];
+		deltaVelADotn += c.m_contactNormal1.dot(bodyA->internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(bodyA->internalGetDeltaAngularVelocity());
+	}
+
+	if (c.m_multiBodyB)
+	{
+		ndofB = c.m_multiBodyB->getNumLinks() + 6;
+		for (int i = 0; i < ndofB; ++i) 
+			deltaVelBDotn += m_data.m_jacobians[c.m_jacBindex+i] * m_data.m_deltaVelocities[c.m_deltaVelBindex+i];
+	} else
+	{
+		bodyB = &m_tmpSolverBodyPool[c.m_solverBodyIdB];
+		deltaVelBDotn += c.m_contactNormal2.dot(bodyB->internalGetDeltaLinearVelocity())  + c.m_relpos2CrossNormal.dot(bodyB->internalGetDeltaAngularVelocity());
+	}
+
+	
+	deltaImpulse	-=	deltaVelADotn*c.m_jacDiagABInv;//m_jacDiagABInv = 1./denom
+	deltaImpulse	-=	deltaVelBDotn*c.m_jacDiagABInv;
+	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
+	
+	if (sum < c.m_lowerLimit)
+	{
+		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_lowerLimit;
+	}
+	else if (sum > c.m_upperLimit) 
+	{
+		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_upperLimit;
+	}
+	else
+	{
+		c.m_appliedImpulse = sum;
+	}
+
+	if (c.m_multiBodyA)
+	{
+		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse,c.m_deltaVelAindex,ndofA);
+		c.m_multiBodyA->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse);
+	} else
+	{
+		bodyA->internalApplyImpulse(c.m_contactNormal1*bodyA->internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+
+	}
+	if (c.m_multiBodyB)
+	{
+		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse,c.m_deltaVelBindex,ndofB);
+		c.m_multiBodyB->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse);
+	} else
+	{
+		bodyB->internalApplyImpulse(c.m_contactNormal2*bodyB->internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
+	}
+
+}
+
+
+void btMultiBodyConstraintSolver::resolveSingleConstraintRowGenericMultiBody(const btMultiBodySolverConstraint& c)
+{
+
+	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
+	btScalar deltaVelADotn=0;
+	btScalar deltaVelBDotn=0;
+	int ndofA=0;
+	int ndofB=0;
+
+	if (c.m_multiBodyA)
+	{
+		ndofA  = c.m_multiBodyA->getNumLinks() + 6;
+		for (int i = 0; i < ndofA; ++i) 
+			deltaVelADotn += m_data.m_jacobians[c.m_jacAindex+i] * m_data.m_deltaVelocities[c.m_deltaVelAindex+i];
+	}
+
+	if (c.m_multiBodyB)
+	{
+		ndofB = c.m_multiBodyB->getNumLinks() + 6;
+		for (int i = 0; i < ndofB; ++i) 
+			deltaVelBDotn += m_data.m_jacobians[c.m_jacBindex+i] * m_data.m_deltaVelocities[c.m_deltaVelBindex+i];
+	}
+
+	
+	deltaImpulse	-=	deltaVelADotn*c.m_jacDiagABInv;//m_jacDiagABInv = 1./denom
+	deltaImpulse	-=	deltaVelBDotn*c.m_jacDiagABInv;
+	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
+	
+	if (sum < c.m_lowerLimit)
+	{
+		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_lowerLimit;
+	}
+	else if (sum > c.m_upperLimit) 
+	{
+		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_upperLimit;
+	}
+	else
+	{
+		c.m_appliedImpulse = sum;
+	}
+
+	if (c.m_multiBodyA)
+	{
+		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse,c.m_deltaVelAindex,ndofA);
+		c.m_multiBodyA->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse);
+	}
+	if (c.m_multiBodyB)
+	{
+		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse,c.m_deltaVelBindex,ndofB);
+		c.m_multiBodyB->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse);
+	}
+}
+
+
+void btMultiBodyConstraintSolver::setupMultiBodyContactConstraint(btMultiBodySolverConstraint& solverConstraint, 
+																 const btVector3& contactNormal,
+																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal,
+																 btScalar& relaxation,
+																 bool isFriction, btScalar desiredVelocity, btScalar cfmSlip)
+{
+			
+	BT_PROFILE("setupMultiBodyContactConstraint");
+	btVector3 rel_pos1;
+	btVector3 rel_pos2;
+
+	btMultiBody* multiBodyA = solverConstraint.m_multiBodyA;
+	btMultiBody* multiBodyB = solverConstraint.m_multiBodyB;
+
+	const btVector3& pos1 = cp.getPositionWorldOnA();
+	const btVector3& pos2 = cp.getPositionWorldOnB();
+
+	btSolverBody* bodyA = multiBodyA ? 0 : &m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdA];
+	btSolverBody* bodyB = multiBodyB ? 0 : &m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdB];
+
+	btRigidBody* rb0 = multiBodyA ? 0 : bodyA->m_originalBody;
+	btRigidBody* rb1 = multiBodyB ? 0 : bodyB->m_originalBody;
+
+	if (bodyA)
+		rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin(); 
+	if (bodyB)
+		rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin();
+
+	relaxation = 1.f;
+
+	if (multiBodyA)
+	{
+		const int ndofA  = multiBodyA->getNumLinks() + 6;
+
+		solverConstraint.m_deltaVelAindex = multiBodyA->getCompanionId();
+
+		if (solverConstraint.m_deltaVelAindex <0)
+		{
+			solverConstraint.m_deltaVelAindex = m_data.m_deltaVelocities.size();
+			multiBodyA->setCompanionId(solverConstraint.m_deltaVelAindex);
+			m_data.m_deltaVelocities.resize(m_data.m_deltaVelocities.size()+ndofA);
+		} else
+		{
+			btAssert(m_data.m_deltaVelocities.size() >= solverConstraint.m_deltaVelAindex+ndofA);
+		}
+
+		solverConstraint.m_jacAindex = m_data.m_jacobians.size();
+		m_data.m_jacobians.resize(m_data.m_jacobians.size()+ndofA);
+		m_data.m_deltaVelocitiesUnitImpulse.resize(m_data.m_deltaVelocitiesUnitImpulse.size()+ndofA);
+		btAssert(m_data.m_jacobians.size() == m_data.m_deltaVelocitiesUnitImpulse.size());
+
+		btScalar* jac1=&m_data.m_jacobians[solverConstraint.m_jacAindex];
+		multiBodyA->fillContactJacobian(solverConstraint.m_linkA, cp.getPositionWorldOnA(), contactNormal, jac1, m_data.scratch_r, m_data.scratch_v, m_data.scratch_m);
+		btScalar* delta = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
+		multiBodyA->calcAccelerationDeltas(&m_data.m_jacobians[solverConstraint.m_jacAindex],delta,m_data.scratch_r, m_data.scratch_v);
+	} else
+	{
+		btVector3 torqueAxis0 = rel_pos1.cross(contactNormal);
+		solverConstraint.m_angularComponentA = rb0 ? rb0->getInvInertiaTensorWorld()*torqueAxis0*rb0->getAngularFactor() : btVector3(0,0,0);
+		solverConstraint.m_relpos1CrossNormal = torqueAxis0;
+		solverConstraint.m_contactNormal1 = contactNormal;
+	}
+
+	if (multiBodyB)
+	{
+		const int ndofB  = multiBodyB->getNumLinks() + 6;
+
+		solverConstraint.m_deltaVelBindex = multiBodyB->getCompanionId();
+		if (solverConstraint.m_deltaVelBindex <0)
+		{
+			solverConstraint.m_deltaVelBindex = m_data.m_deltaVelocities.size();
+			multiBodyB->setCompanionId(solverConstraint.m_deltaVelBindex);
+			m_data.m_deltaVelocities.resize(m_data.m_deltaVelocities.size()+ndofB);
+		}
+
+		solverConstraint.m_jacBindex = m_data.m_jacobians.size();
+
+		m_data.m_jacobians.resize(m_data.m_jacobians.size()+ndofB);
+		m_data.m_deltaVelocitiesUnitImpulse.resize(m_data.m_deltaVelocitiesUnitImpulse.size()+ndofB);
+		btAssert(m_data.m_jacobians.size() == m_data.m_deltaVelocitiesUnitImpulse.size());
+
+		multiBodyB->fillContactJacobian(solverConstraint.m_linkB, cp.getPositionWorldOnB(), -contactNormal, &m_data.m_jacobians[solverConstraint.m_jacBindex], m_data.scratch_r, m_data.scratch_v, m_data.scratch_m);
+		multiBodyB->calcAccelerationDeltas(&m_data.m_jacobians[solverConstraint.m_jacBindex],&m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex],m_data.scratch_r, m_data.scratch_v);
+	} else
+	{
+		btVector3 torqueAxis1 = rel_pos2.cross(contactNormal);		
+		solverConstraint.m_angularComponentB = rb1 ? rb1->getInvInertiaTensorWorld()*-torqueAxis1*rb1->getAngularFactor() : btVector3(0,0,0);
+		solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
+		solverConstraint.m_contactNormal2 = -contactNormal;
+	}
+
+	{
+						
+		btVector3 vec;
+		btScalar denom0 = 0.f;
+		btScalar denom1 = 0.f;
+		btScalar* jacB = 0;
+		btScalar* jacA = 0;
+		btScalar* lambdaA =0;
+		btScalar* lambdaB =0;
+		int ndofA  = 0;
+		if (multiBodyA)
+		{
+			ndofA  = multiBodyA->getNumLinks() + 6;
+			jacA = &m_data.m_jacobians[solverConstraint.m_jacAindex];
+			lambdaA = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
+			for (int i = 0; i < ndofA; ++i)
+			{
+				btScalar j = jacA[i] ;
+				btScalar l =lambdaA[i];
+				denom0 += j*l;
+			}
+		} else
+		{
+			if (rb0)
+			{
+				vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
+				denom0 = rb0->getInvMass() + contactNormal.dot(vec);
+			}
+		}
+		if (multiBodyB)
+		{
+			const int ndofB  = multiBodyB->getNumLinks() + 6;
+			jacB = &m_data.m_jacobians[solverConstraint.m_jacBindex];
+			lambdaB = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex];
+			for (int i = 0; i < ndofB; ++i)
+			{
+				btScalar j = jacB[i] ;
+				btScalar l =lambdaB[i];
+				denom1 += j*l;
+			}
+
+		} else
+		{
+			if (rb1)
+			{
+				vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
+				denom1 = rb1->getInvMass() + contactNormal.dot(vec);
+			}
+		}
+
+		 if (multiBodyA && (multiBodyA==multiBodyB))
+		 {
+            // ndof1 == ndof2 in this case
+            for (int i = 0; i < ndofA; ++i) 
+			{
+                denom1 += jacB[i] * lambdaA[i];
+                denom1 += jacA[i] * lambdaB[i];
+            }
+        }
+
+		 btScalar d = denom0+denom1;
+		 if (btFabs(d)>SIMD_EPSILON)
+		 {
+			 
+			 solverConstraint.m_jacDiagABInv = relaxation/(d);
+		 } else
+		 {
+			solverConstraint.m_jacDiagABInv  = 1.f;
+		 }
+		
+	}
+
+	
+	//compute rhs and remaining solverConstraint fields
+
+	
+
+	btScalar restitution = 0.f;
+	btScalar penetration = isFriction? 0 : cp.getDistance()+infoGlobal.m_linearSlop;
+
+	btScalar rel_vel = 0.f;
+	int ndofA  = 0;
+	int ndofB  = 0;
+	{
+
+		btVector3 vel1,vel2;
+		if (multiBodyA)
+		{
+			ndofA  = multiBodyA->getNumLinks() + 6;
+			btScalar* jacA = &m_data.m_jacobians[solverConstraint.m_jacAindex];
+			for (int i = 0; i < ndofA ; ++i) 
+				rel_vel += multiBodyA->getVelocityVector()[i] * jacA[i];
+		} else
+		{
+			if (rb0)
+			{
+				rel_vel += rb0->getVelocityInLocalPoint(rel_pos1).dot(solverConstraint.m_contactNormal1);
+			}
+		}
+		if (multiBodyB)
+		{
+			ndofB  = multiBodyB->getNumLinks() + 6;
+			btScalar* jacB = &m_data.m_jacobians[solverConstraint.m_jacBindex];
+			for (int i = 0; i < ndofB ; ++i) 
+				rel_vel += multiBodyB->getVelocityVector()[i] * jacB[i];
+
+		} else
+		{
+			if (rb1)
+			{
+				rel_vel += rb1->getVelocityInLocalPoint(rel_pos2).dot(solverConstraint.m_contactNormal2);
+			}
+		}
+
+		solverConstraint.m_friction = cp.m_combinedFriction;
+
+				
+		restitution =  restitutionCurve(rel_vel, cp.m_combinedRestitution);
+		if (restitution <= btScalar(0.))
+		{
+			restitution = 0.f;
+		};
+	}
+
+
+	///warm starting (or zero if disabled)
+	if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+	{
+		solverConstraint.m_appliedImpulse = isFriction ? 0 : cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;
+
+		if (solverConstraint.m_appliedImpulse)
+		{
+			if (multiBodyA)
+			{
+				btScalar impulse = solverConstraint.m_appliedImpulse;
+				btScalar* deltaV = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
+				multiBodyA->applyDeltaVee(deltaV,impulse);
+				applyDeltaVee(deltaV,impulse,solverConstraint.m_deltaVelAindex,ndofA);
+			} else
+			{
+				if (rb0)
+					bodyA->internalApplyImpulse(solverConstraint.m_contactNormal1*bodyA->internalGetInvMass()*rb0->getLinearFactor(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
+			}
+			if (multiBodyB)
+			{
+				btScalar impulse = solverConstraint.m_appliedImpulse;
+				btScalar* deltaV = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex];
+				multiBodyB->applyDeltaVee(deltaV,impulse);
+				applyDeltaVee(deltaV,impulse,solverConstraint.m_deltaVelBindex,ndofB);
+			} else
+			{
+				if (rb1)
+					bodyB->internalApplyImpulse(-solverConstraint.m_contactNormal2*bodyB->internalGetInvMass()*rb1->getLinearFactor(),-solverConstraint.m_angularComponentB,-(btScalar)solverConstraint.m_appliedImpulse);
+			}
+		}
+	} else
+	{
+		solverConstraint.m_appliedImpulse = 0.f;
+	}
+
+	solverConstraint.m_appliedPushImpulse = 0.f;
+
+	{
+		
+
+		btScalar positionalError = 0.f;
+		btScalar	velocityError = restitution - rel_vel;// * damping;
+					
+
+		btScalar erp = infoGlobal.m_erp2;
+		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+		{
+			erp = infoGlobal.m_erp;
+		}
+
+		if (penetration>0)
+		{
+			positionalError = 0;
+			velocityError = -penetration / infoGlobal.m_timeStep;
+
+		} else
+		{
+			positionalError = -penetration * erp/infoGlobal.m_timeStep;
+		}
+
+		btScalar  penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
+		btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+
+		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+		{
+			//combine position and velocity into rhs
+			solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
+			solverConstraint.m_rhsPenetration = 0.f;
+
+		} else
+		{
+			//split position and velocity into rhs and m_rhsPenetration
+			solverConstraint.m_rhs = velocityImpulse;
+			solverConstraint.m_rhsPenetration = penetrationImpulse;
+		}
+
+		solverConstraint.m_cfm = 0.f;
+		solverConstraint.m_lowerLimit = 0;
+		solverConstraint.m_upperLimit = 1e10f;
+	}
+
+}
+
+
+
+
+btMultiBodySolverConstraint&	btMultiBodyConstraintSolver::addMultiBodyFrictionConstraint(const btVector3& normalAxis,btPersistentManifold* manifold,int frictionIndex,btManifoldPoint& cp,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, const btContactSolverInfo& infoGlobal, btScalar desiredVelocity, btScalar cfmSlip)
+{
+	BT_PROFILE("addMultiBodyFrictionConstraint");
+	btMultiBodySolverConstraint& solverConstraint = m_multiBodyFrictionContactConstraints.expandNonInitializing();
+	solverConstraint.m_frictionIndex = frictionIndex;
+	bool isFriction = true;
+
+	const btMultiBodyLinkCollider* fcA = btMultiBodyLinkCollider::upcast(manifold->getBody0());
+	const btMultiBodyLinkCollider* fcB = btMultiBodyLinkCollider::upcast(manifold->getBody1());
+	
+	btMultiBody* mbA = fcA? fcA->m_multiBody : 0;
+	btMultiBody* mbB = fcB? fcB->m_multiBody : 0;
+
+	int solverBodyIdA = mbA? -1 : getOrInitSolverBody(*colObj0,infoGlobal.m_timeStep);
+	int solverBodyIdB = mbB ? -1 : getOrInitSolverBody(*colObj1,infoGlobal.m_timeStep);
+
+	solverConstraint.m_solverBodyIdA = solverBodyIdA;
+	solverConstraint.m_solverBodyIdB = solverBodyIdB;
+	solverConstraint.m_multiBodyA = mbA;
+	if (mbA)
+		solverConstraint.m_linkA = fcA->m_link;
+
+	solverConstraint.m_multiBodyB = mbB;
+	if (mbB)
+		solverConstraint.m_linkB = fcB->m_link;
+
+	solverConstraint.m_originalContactPoint = &cp;
+
+	setupMultiBodyContactConstraint(solverConstraint, normalAxis, cp, infoGlobal,relaxation,isFriction, desiredVelocity, cfmSlip);
+	return solverConstraint;
+}
+
+void	btMultiBodyConstraintSolver::convertMultiBodyContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal)
+{
+	const btMultiBodyLinkCollider* fcA = btMultiBodyLinkCollider::upcast(manifold->getBody0());
+	const btMultiBodyLinkCollider* fcB = btMultiBodyLinkCollider::upcast(manifold->getBody1());
+	
+	btMultiBody* mbA = fcA? fcA->m_multiBody : 0;
+	btMultiBody* mbB = fcB? fcB->m_multiBody : 0;
+
+	btCollisionObject* colObj0=0,*colObj1=0;
+
+	colObj0 = (btCollisionObject*)manifold->getBody0();
+	colObj1 = (btCollisionObject*)manifold->getBody1();
+
+	int solverBodyIdA = mbA? -1 : getOrInitSolverBody(*colObj0,infoGlobal.m_timeStep);
+	int solverBodyIdB = mbB ? -1 : getOrInitSolverBody(*colObj1,infoGlobal.m_timeStep);
+
+	btSolverBody* solverBodyA = mbA ? 0 : &m_tmpSolverBodyPool[solverBodyIdA];
+	btSolverBody* solverBodyB = mbB ? 0 : &m_tmpSolverBodyPool[solverBodyIdB];
+
+
+	///avoid collision response between two static objects
+//	if (!solverBodyA || (solverBodyA->m_invMass.isZero() && (!solverBodyB || solverBodyB->m_invMass.isZero())))
+	//	return;
+
+	int rollingFriction=1;
+
+	for (int j=0;j<manifold->getNumContacts();j++)
+	{
+
+		btManifoldPoint& cp = manifold->getContactPoint(j);
+
+		if (cp.getDistance() <= manifold->getContactProcessingThreshold())
+		{
+		
+			btScalar relaxation;
+
+			int frictionIndex = m_multiBodyNormalContactConstraints.size();
+
+			btMultiBodySolverConstraint& solverConstraint = m_multiBodyNormalContactConstraints.expandNonInitializing();
+
+			btRigidBody* rb0 = btRigidBody::upcast(colObj0);
+			btRigidBody* rb1 = btRigidBody::upcast(colObj1);
+			solverConstraint.m_solverBodyIdA = solverBodyIdA;
+			solverConstraint.m_solverBodyIdB = solverBodyIdB;
+			solverConstraint.m_multiBodyA = mbA;
+			if (mbA)
+				solverConstraint.m_linkA = fcA->m_link;
+
+			solverConstraint.m_multiBodyB = mbB;
+			if (mbB)
+				solverConstraint.m_linkB = fcB->m_link;
+
+			solverConstraint.m_originalContactPoint = &cp;
+
+			bool isFriction = false;
+			setupMultiBodyContactConstraint(solverConstraint, cp.m_normalWorldOnB,cp, infoGlobal, relaxation, isFriction);
+
+//			const btVector3& pos1 = cp.getPositionWorldOnA();
+//			const btVector3& pos2 = cp.getPositionWorldOnB();
+
+			/////setup the friction constraints
+#define ENABLE_FRICTION
+#ifdef ENABLE_FRICTION
+			solverConstraint.m_frictionIndex = frictionIndex;
+#if ROLLING_FRICTION
+			btVector3 angVelA(0,0,0),angVelB(0,0,0);
+			if (rb0)
+				angVelA = rb0->getAngularVelocity();
+			if (rb1)
+				angVelB = rb1->getAngularVelocity();
+			btVector3 relAngVel = angVelB-angVelA;
+
+			if ((cp.m_combinedRollingFriction>0.f) && (rollingFriction>0))
+			{
+				//only a single rollingFriction per manifold
+				rollingFriction--;
+				if (relAngVel.length()>infoGlobal.m_singleAxisRollingFrictionThreshold)
+				{
+					relAngVel.normalize();
+					applyAnisotropicFriction(colObj0,relAngVel,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj1,relAngVel,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					if (relAngVel.length()>0.001)
+						addRollingFrictionConstraint(relAngVel,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+				} else
+				{
+					addRollingFrictionConstraint(cp.m_normalWorldOnB,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					btVector3 axis0,axis1;
+					btPlaneSpace1(cp.m_normalWorldOnB,axis0,axis1);
+					applyAnisotropicFriction(colObj0,axis0,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj1,axis0,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj0,axis1,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj1,axis1,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					if (axis0.length()>0.001)
+						addRollingFrictionConstraint(axis0,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					if (axis1.length()>0.001)
+						addRollingFrictionConstraint(axis1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+		
+				}
+			}
+#endif //ROLLING_FRICTION
+
+			///Bullet has several options to set the friction directions
+			///By default, each contact has only a single friction direction that is recomputed automatically very frame 
+			///based on the relative linear velocity.
+			///If the relative velocity it zero, it will automatically compute a friction direction.
+			
+			///You can also enable two friction directions, using the SOLVER_USE_2_FRICTION_DIRECTIONS.
+			///In that case, the second friction direction will be orthogonal to both contact normal and first friction direction.
+			///
+			///If you choose SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION, then the friction will be independent from the relative projected velocity.
+			///
+			///The user can manually override the friction directions for certain contacts using a contact callback, 
+			///and set the cp.m_lateralFrictionInitialized to true
+			///In that case, you can set the target relative motion in each friction direction (cp.m_contactMotion1 and cp.m_contactMotion2)
+			///this will give a conveyor belt effect
+			///
+			if (!(infoGlobal.m_solverMode & SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized)
+			{/*
+				cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel;
+				btScalar lat_rel_vel = cp.m_lateralFrictionDir1.length2();
+				if (!(infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > SIMD_EPSILON)
+				{
+					cp.m_lateralFrictionDir1 *= 1.f/btSqrt(lat_rel_vel);
+					if((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					{
+						cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB);
+						cp.m_lateralFrictionDir2.normalize();//??
+						applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						addMultiBodyFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+					}
+
+					applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					addMultiBodyFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+				} else
+				*/
+				{
+					btPlaneSpace1(cp.m_normalWorldOnB,cp.m_lateralFrictionDir1,cp.m_lateralFrictionDir2);
+
+					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					{
+						applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						addMultiBodyFrictionConstraint(cp.m_lateralFrictionDir2,manifold,frictionIndex,cp,colObj0,colObj1, relaxation,infoGlobal);
+					}
+
+					applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					addMultiBodyFrictionConstraint(cp.m_lateralFrictionDir1,manifold,frictionIndex,cp,colObj0,colObj1, relaxation,infoGlobal);
+
+					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION))
+					{
+						cp.m_lateralFrictionInitialized = true;
+					}
+				}
+
+			} else
+			{
+				addMultiBodyFrictionConstraint(cp.m_lateralFrictionDir1,manifold,frictionIndex,cp,colObj0,colObj1, relaxation,infoGlobal,cp.m_contactMotion1, cp.m_contactCFM1);
+
+				if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					addMultiBodyFrictionConstraint(cp.m_lateralFrictionDir2,manifold,frictionIndex,cp,colObj0,colObj1, relaxation, infoGlobal,cp.m_contactMotion2, cp.m_contactCFM2);
+
+				//setMultiBodyFrictionConstraintImpulse( solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal);
+				//todo:
+				solverConstraint.m_appliedImpulse = 0.f;
+				solverConstraint.m_appliedPushImpulse = 0.f;
+			}
+		
+
+#endif //ENABLE_FRICTION
+
+		}
+	}
+}
+
+void btMultiBodyConstraintSolver::convertContacts(btPersistentManifold** manifoldPtr,int numManifolds, const btContactSolverInfo& infoGlobal)
+{
+	btPersistentManifold* manifold = 0;
+
+	for (int i=0;i<numManifolds;i++)
+	{
+		btPersistentManifold* manifold= manifoldPtr[i];
+		const btMultiBodyLinkCollider* fcA = btMultiBodyLinkCollider::upcast(manifold->getBody0());
+		const btMultiBodyLinkCollider* fcB = btMultiBodyLinkCollider::upcast(manifold->getBody1());
+		if (!fcA && !fcB)
+		{
+			//the contact doesn't involve any Featherstone btMultiBody, so deal with the regular btRigidBody/btCollisionObject case
+			convertContact(manifold,infoGlobal);
+		} else
+		{
+			convertMultiBodyContact(manifold,infoGlobal);
+		}
+	}
+
+	//also convert the multibody constraints, if any
+
+	
+	for (int i=0;i<m_tmpNumMultiBodyConstraints;i++)
+	{
+		btMultiBodyConstraint* c = m_tmpMultiBodyConstraints[i];
+		m_data.m_solverBodyPool = &m_tmpSolverBodyPool;
+		m_data.m_fixedBodyId = m_fixedBodyId;
+		
+		c->createConstraintRows(m_multiBodyNonContactConstraints,m_data,	infoGlobal);
+	}
+
+}
+
+
+
+btScalar btMultiBodyConstraintSolver::solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer,btDispatcher* dispatcher)
+{
+	return btSequentialImpulseConstraintSolver::solveGroup(bodies,numBodies,manifold,numManifolds,constraints,numConstraints,info,debugDrawer,dispatcher);
+}
+
+
+void  btMultiBodyConstraintSolver::solveMultiBodyGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer,btDispatcher* dispatcher)
+{
+	//printf("solveMultiBodyGroup start\n");
+	m_tmpMultiBodyConstraints = multiBodyConstraints;
+	m_tmpNumMultiBodyConstraints = numMultiBodyConstraints;
+	
+	btSequentialImpulseConstraintSolver::solveGroup(bodies,numBodies,manifold,numManifolds,constraints,numConstraints,info,debugDrawer,dispatcher);
+
+	m_tmpMultiBodyConstraints = 0;
+	m_tmpNumMultiBodyConstraints = 0;
+	
+
+}

+ 85 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.h

@@ -0,0 +1,85 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTIBODY_CONSTRAINT_SOLVER_H
+#define BT_MULTIBODY_CONSTRAINT_SOLVER_H
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+#include "btMultiBodySolverConstraint.h"
+
+
+class btMultiBody;
+
+#include "btMultiBodyConstraint.h"
+
+
+
+ATTRIBUTE_ALIGNED16(class) btMultiBodyConstraintSolver : public btSequentialImpulseConstraintSolver
+{
+
+protected:
+
+	btMultiBodyConstraintArray			m_multiBodyNonContactConstraints;
+
+	btMultiBodyConstraintArray			m_multiBodyNormalContactConstraints;
+	btMultiBodyConstraintArray			m_multiBodyFrictionContactConstraints;
+
+	btMultiBodyJacobianData				m_data;
+	
+	//temp storage for multi body constraints for a specific island/group called by 'solveGroup'
+	btMultiBodyConstraint**					m_tmpMultiBodyConstraints;
+	int										m_tmpNumMultiBodyConstraints;
+
+	void resolveSingleConstraintRowGeneric(const btMultiBodySolverConstraint& c);
+	void resolveSingleConstraintRowGenericMultiBody(const btMultiBodySolverConstraint& c);
+
+	void convertContacts(btPersistentManifold** manifoldPtr,int numManifolds, const btContactSolverInfo& infoGlobal);
+	btMultiBodySolverConstraint&	addMultiBodyFrictionConstraint(const btVector3& normalAxis,btPersistentManifold* manifold,int frictionIndex,btManifoldPoint& cp,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, const btContactSolverInfo& infoGlobal, btScalar desiredVelocity=0, btScalar cfmSlip=0);
+
+
+	void setupMultiBodyJointLimitConstraint(btMultiBodySolverConstraint& constraintRow, 
+																 btScalar* jacA,btScalar* jacB,
+																 btScalar penetration,btScalar combinedFrictionCoeff, btScalar combinedRestitutionCoeff,
+																 const btContactSolverInfo& infoGlobal);
+
+	void setupMultiBodyContactConstraint(btMultiBodySolverConstraint& solverConstraint, 
+																 const btVector3& contactNormal,
+																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal,
+																 btScalar& relaxation,
+																 bool isFriction, btScalar desiredVelocity=0, btScalar cfmSlip=0);
+
+	void convertMultiBodyContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal);
+	virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+//	virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+
+	virtual btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+	void	applyDeltaVee(btScalar* deltaV, btScalar impulse, int velocityIndex, int ndof);
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	///this method should not be called, it was just used during porting/integration of Featherstone btMultiBody, providing backwards compatibility but no support for btMultiBodyConstraint (only contact constraints)
+	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer,btDispatcher* dispatcher);
+
+	virtual void solveMultiBodyGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer,btDispatcher* dispatcher);
+};
+
+	
+	
+
+
+#endif //BT_MULTIBODY_CONSTRAINT_SOLVER_H
+

+ 578 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp

@@ -0,0 +1,578 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btMultiBodyDynamicsWorld.h"
+#include "btMultiBodyConstraintSolver.h"
+#include "btMultiBody.h"
+#include "btMultiBodyLinkCollider.h"
+#include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h"
+#include "LinearMath/btQuickprof.h"
+#include "btMultiBodyConstraint.h"
+
+	
+
+
+void	btMultiBodyDynamicsWorld::addMultiBody(btMultiBody* body, short group, short mask)
+{
+	m_multiBodies.push_back(body);
+
+}
+
+void	btMultiBodyDynamicsWorld::removeMultiBody(btMultiBody* body)
+{
+	m_multiBodies.remove(body);
+}
+
+void	btMultiBodyDynamicsWorld::calculateSimulationIslands()
+{
+	BT_PROFILE("calculateSimulationIslands");
+
+	getSimulationIslandManager()->updateActivationState(getCollisionWorld(),getCollisionWorld()->getDispatcher());
+
+    {
+        //merge islands based on speculative contact manifolds too
+        for (int i=0;i<this->m_predictiveManifolds.size();i++)
+        {
+            btPersistentManifold* manifold = m_predictiveManifolds[i];
+            
+            const btCollisionObject* colObj0 = manifold->getBody0();
+            const btCollisionObject* colObj1 = manifold->getBody1();
+            
+            if (((colObj0) && (!(colObj0)->isStaticOrKinematicObject())) &&
+                ((colObj1) && (!(colObj1)->isStaticOrKinematicObject())))
+            {
+				getSimulationIslandManager()->getUnionFind().unite((colObj0)->getIslandTag(),(colObj1)->getIslandTag());
+            }
+        }
+    }
+    
+	{
+		int i;
+		int numConstraints = int(m_constraints.size());
+		for (i=0;i< numConstraints ; i++ )
+		{
+			btTypedConstraint* constraint = m_constraints[i];
+			if (constraint->isEnabled())
+			{
+				const btRigidBody* colObj0 = &constraint->getRigidBodyA();
+				const btRigidBody* colObj1 = &constraint->getRigidBodyB();
+
+				if (((colObj0) && (!(colObj0)->isStaticOrKinematicObject())) &&
+					((colObj1) && (!(colObj1)->isStaticOrKinematicObject())))
+				{
+					getSimulationIslandManager()->getUnionFind().unite((colObj0)->getIslandTag(),(colObj1)->getIslandTag());
+				}
+			}
+		}
+	}
+
+	//merge islands linked by Featherstone link colliders
+	for (int i=0;i<m_multiBodies.size();i++)
+	{
+		btMultiBody* body = m_multiBodies[i];
+		{
+			btMultiBodyLinkCollider* prev = body->getBaseCollider();
+
+			for (int b=0;b<body->getNumLinks();b++)
+			{
+				btMultiBodyLinkCollider* cur = body->getLink(b).m_collider;
+				
+				if (((cur) && (!(cur)->isStaticOrKinematicObject())) &&
+					((prev) && (!(prev)->isStaticOrKinematicObject())))
+				{
+					int tagPrev = prev->getIslandTag();
+					int tagCur = cur->getIslandTag();
+					getSimulationIslandManager()->getUnionFind().unite(tagPrev, tagCur);
+				}
+				if (cur && !cur->isStaticOrKinematicObject())
+					prev = cur;
+				
+			}
+		}
+	}
+
+	//merge islands linked by multibody constraints
+	{
+		for (int i=0;i<this->m_multiBodyConstraints.size();i++)
+		{
+			btMultiBodyConstraint* c = m_multiBodyConstraints[i];
+			int tagA = c->getIslandIdA();
+			int tagB = c->getIslandIdB();
+			if (tagA>=0 && tagB>=0)
+				getSimulationIslandManager()->getUnionFind().unite(tagA, tagB);
+		}
+	}
+
+	//Store the island id in each body
+	getSimulationIslandManager()->storeIslandActivationState(getCollisionWorld());
+
+}
+
+
+void	btMultiBodyDynamicsWorld::updateActivationState(btScalar timeStep)
+{
+	BT_PROFILE("btMultiBodyDynamicsWorld::updateActivationState");
+
+	
+	
+	for ( int i=0;i<m_multiBodies.size();i++)
+	{
+		btMultiBody* body = m_multiBodies[i];
+		if (body)
+		{
+			body->checkMotionAndSleepIfRequired(timeStep);
+			if (!body->isAwake())
+			{
+				btMultiBodyLinkCollider* col = body->getBaseCollider();
+				if (col && col->getActivationState() == ACTIVE_TAG)
+				{
+					col->setActivationState( WANTS_DEACTIVATION);
+					col->setDeactivationTime(0.f);
+				}
+				for (int b=0;b<body->getNumLinks();b++)
+				{
+					btMultiBodyLinkCollider* col = body->getLink(b).m_collider;
+					if (col && col->getActivationState() == ACTIVE_TAG)
+					{
+						col->setActivationState( WANTS_DEACTIVATION);
+						col->setDeactivationTime(0.f);
+					}
+				}
+			} else
+			{
+				btMultiBodyLinkCollider* col = body->getBaseCollider();
+				if (col && col->getActivationState() != DISABLE_DEACTIVATION)
+					col->setActivationState( ACTIVE_TAG );
+
+				for (int b=0;b<body->getNumLinks();b++)
+				{
+					btMultiBodyLinkCollider* col = body->getLink(b).m_collider;
+					if (col && col->getActivationState() != DISABLE_DEACTIVATION)
+						col->setActivationState( ACTIVE_TAG );
+				}
+			}
+
+		}
+	}
+
+	btDiscreteDynamicsWorld::updateActivationState(timeStep);
+}
+
+
+SIMD_FORCE_INLINE	int	btGetConstraintIslandId2(const btTypedConstraint* lhs)
+{
+	int islandId;
+	
+	const btCollisionObject& rcolObj0 = lhs->getRigidBodyA();
+	const btCollisionObject& rcolObj1 = lhs->getRigidBodyB();
+	islandId= rcolObj0.getIslandTag()>=0?rcolObj0.getIslandTag():rcolObj1.getIslandTag();
+	return islandId;
+
+}
+
+
+class btSortConstraintOnIslandPredicate2
+{
+	public:
+
+		bool operator() ( const btTypedConstraint* lhs, const btTypedConstraint* rhs ) const
+		{
+			int rIslandId0,lIslandId0;
+			rIslandId0 = btGetConstraintIslandId2(rhs);
+			lIslandId0 = btGetConstraintIslandId2(lhs);
+			return lIslandId0 < rIslandId0;
+		}
+};
+
+
+
+SIMD_FORCE_INLINE	int	btGetMultiBodyConstraintIslandId(const btMultiBodyConstraint* lhs)
+{
+	int islandId;
+	
+	int islandTagA = lhs->getIslandIdA();
+	int islandTagB = lhs->getIslandIdB();
+	islandId= islandTagA>=0?islandTagA:islandTagB;
+	return islandId;
+
+}
+
+
+class btSortMultiBodyConstraintOnIslandPredicate
+{
+	public:
+
+		bool operator() ( const btMultiBodyConstraint* lhs, const btMultiBodyConstraint* rhs ) const
+		{
+			int rIslandId0,lIslandId0;
+			rIslandId0 = btGetMultiBodyConstraintIslandId(rhs);
+			lIslandId0 = btGetMultiBodyConstraintIslandId(lhs);
+			return lIslandId0 < rIslandId0;
+		}
+};
+
+struct MultiBodyInplaceSolverIslandCallback : public btSimulationIslandManager::IslandCallback
+{
+	btContactSolverInfo*	m_solverInfo;
+	btMultiBodyConstraintSolver*		m_solver;
+	btMultiBodyConstraint**		m_multiBodySortedConstraints;
+	int							m_numMultiBodyConstraints;
+	
+	btTypedConstraint**		m_sortedConstraints;
+	int						m_numConstraints;
+	btIDebugDraw*			m_debugDrawer;
+	btDispatcher*			m_dispatcher;
+	
+	btAlignedObjectArray<btCollisionObject*> m_bodies;
+	btAlignedObjectArray<btPersistentManifold*> m_manifolds;
+	btAlignedObjectArray<btTypedConstraint*> m_constraints;
+	btAlignedObjectArray<btMultiBodyConstraint*> m_multiBodyConstraints;
+
+
+	MultiBodyInplaceSolverIslandCallback(	btMultiBodyConstraintSolver*	solver,
+									btDispatcher* dispatcher)
+		:m_solverInfo(NULL),
+		m_solver(solver),
+		m_multiBodySortedConstraints(NULL),
+		m_numConstraints(0),
+		m_debugDrawer(NULL),
+		m_dispatcher(dispatcher)
+	{
+
+	}
+
+	MultiBodyInplaceSolverIslandCallback& operator=(MultiBodyInplaceSolverIslandCallback& other)
+	{
+		btAssert(0);
+		(void)other;
+		return *this;
+	}
+
+	SIMD_FORCE_INLINE void setup ( btContactSolverInfo* solverInfo, btTypedConstraint** sortedConstraints, int numConstraints, btMultiBodyConstraint** sortedMultiBodyConstraints,	int	numMultiBodyConstraints,	btIDebugDraw* debugDrawer)
+	{
+		btAssert(solverInfo);
+		m_solverInfo = solverInfo;
+
+		m_multiBodySortedConstraints = sortedMultiBodyConstraints;
+		m_numMultiBodyConstraints = numMultiBodyConstraints;
+		m_sortedConstraints = sortedConstraints;
+		m_numConstraints = numConstraints;
+
+		m_debugDrawer = debugDrawer;
+		m_bodies.resize (0);
+		m_manifolds.resize (0);
+		m_constraints.resize (0);
+		m_multiBodyConstraints.resize(0);
+	}
+
+	
+	virtual	void	processIsland(btCollisionObject** bodies,int numBodies,btPersistentManifold**	manifolds,int numManifolds, int islandId)
+	{
+		if (islandId<0)
+		{
+			///we don't split islands, so all constraints/contact manifolds/bodies are passed into the solver regardless the island id
+			m_solver->solveMultiBodyGroup( bodies,numBodies,manifolds, numManifolds,m_sortedConstraints, m_numConstraints, &m_multiBodySortedConstraints[0],m_numConstraints,*m_solverInfo,m_debugDrawer,m_dispatcher);
+		} else
+		{
+				//also add all non-contact constraints/joints for this island
+			btTypedConstraint** startConstraint = 0;
+			btMultiBodyConstraint** startMultiBodyConstraint = 0;
+
+			int numCurConstraints = 0;
+			int numCurMultiBodyConstraints = 0;
+
+			int i;
+			
+			//find the first constraint for this island
+
+			for (i=0;i<m_numConstraints;i++)
+			{
+				if (btGetConstraintIslandId2(m_sortedConstraints[i]) == islandId)
+				{
+					startConstraint = &m_sortedConstraints[i];
+					break;
+				}
+			}
+			//count the number of constraints in this island
+			for (;i<m_numConstraints;i++)
+			{
+				if (btGetConstraintIslandId2(m_sortedConstraints[i]) == islandId)
+				{
+					numCurConstraints++;
+				}
+			}
+
+			for (i=0;i<m_numMultiBodyConstraints;i++)
+			{
+				if (btGetMultiBodyConstraintIslandId(m_multiBodySortedConstraints[i]) == islandId)
+				{
+					
+					startMultiBodyConstraint = &m_multiBodySortedConstraints[i];
+					break;
+				}
+			}
+			//count the number of multi body constraints in this island
+			for (;i<m_numMultiBodyConstraints;i++)
+			{
+				if (btGetMultiBodyConstraintIslandId(m_multiBodySortedConstraints[i]) == islandId)
+				{
+					numCurMultiBodyConstraints++;
+				}
+			}
+
+			if (m_solverInfo->m_minimumSolverBatchSize<=1)
+			{
+				m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,startConstraint,numCurConstraints,*m_solverInfo,m_debugDrawer,m_dispatcher);
+			} else
+			{
+				
+				for (i=0;i<numBodies;i++)
+					m_bodies.push_back(bodies[i]);
+				for (i=0;i<numManifolds;i++)
+					m_manifolds.push_back(manifolds[i]);
+				for (i=0;i<numCurConstraints;i++)
+					m_constraints.push_back(startConstraint[i]);
+				
+				for (i=0;i<numCurMultiBodyConstraints;i++)
+					m_multiBodyConstraints.push_back(startMultiBodyConstraint[i]);
+				
+				if ((m_constraints.size()+m_manifolds.size())>m_solverInfo->m_minimumSolverBatchSize)
+				{
+					processConstraints();
+				} else
+				{
+					//printf("deferred\n");
+				}
+			}
+		}
+	}
+	void	processConstraints()
+	{
+
+		btCollisionObject** bodies = m_bodies.size()? &m_bodies[0]:0;
+		btPersistentManifold** manifold = m_manifolds.size()?&m_manifolds[0]:0;
+		btTypedConstraint** constraints = m_constraints.size()?&m_constraints[0]:0;
+		btMultiBodyConstraint** multiBodyConstraints = m_multiBodyConstraints.size() ? &m_multiBodyConstraints[0] : 0;
+	
+		m_solver->solveMultiBodyGroup( bodies,m_bodies.size(),manifold, m_manifolds.size(),constraints, m_constraints.size() ,multiBodyConstraints, m_multiBodyConstraints.size(), *m_solverInfo,m_debugDrawer,m_dispatcher);
+		m_bodies.resize(0);
+		m_manifolds.resize(0);
+		m_constraints.resize(0);
+		m_multiBodyConstraints.resize(0);
+	}
+
+};
+
+
+
+btMultiBodyDynamicsWorld::btMultiBodyDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btMultiBodyConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration)
+	:btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration),
+	m_multiBodyConstraintSolver(constraintSolver)
+{
+	//split impulse is not yet supported for Featherstone hierarchies
+	getSolverInfo().m_splitImpulse = false;
+	getSolverInfo().m_solverMode |=SOLVER_USE_2_FRICTION_DIRECTIONS;
+	m_solverMultiBodyIslandCallback = new MultiBodyInplaceSolverIslandCallback(constraintSolver,dispatcher);
+}
+
+btMultiBodyDynamicsWorld::~btMultiBodyDynamicsWorld ()
+{
+	delete m_solverMultiBodyIslandCallback;
+}
+
+
+
+
+void	btMultiBodyDynamicsWorld::solveConstraints(btContactSolverInfo& solverInfo)
+{
+
+	btAlignedObjectArray<btScalar> scratch_r;
+	btAlignedObjectArray<btVector3> scratch_v;
+	btAlignedObjectArray<btMatrix3x3> scratch_m;
+
+
+	BT_PROFILE("solveConstraints");
+	
+	m_sortedConstraints.resize( m_constraints.size());
+	int i; 
+	for (i=0;i<getNumConstraints();i++)
+	{
+		m_sortedConstraints[i] = m_constraints[i];
+	}
+	m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate2());
+	btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0;
+
+	m_sortedMultiBodyConstraints.resize(m_multiBodyConstraints.size());
+	for (i=0;i<m_multiBodyConstraints.size();i++)
+	{
+		m_sortedMultiBodyConstraints[i] = m_multiBodyConstraints[i];
+	}
+	m_sortedMultiBodyConstraints.quickSort(btSortMultiBodyConstraintOnIslandPredicate());
+
+	btMultiBodyConstraint** sortedMultiBodyConstraints = m_sortedMultiBodyConstraints.size() ?  &m_sortedMultiBodyConstraints[0] : 0;
+	
+
+	m_solverMultiBodyIslandCallback->setup(&solverInfo,constraintsPtr,m_sortedConstraints.size(),sortedMultiBodyConstraints,m_sortedMultiBodyConstraints.size(), getDebugDrawer());
+	m_constraintSolver->prepareSolve(getCollisionWorld()->getNumCollisionObjects(), getCollisionWorld()->getDispatcher()->getNumManifolds());
+	
+	/// solve all the constraints for this island
+	m_islandManager->buildAndProcessIslands(getCollisionWorld()->getDispatcher(),getCollisionWorld(),m_solverMultiBodyIslandCallback);
+
+
+	{
+		BT_PROFILE("btMultiBody addForce and stepVelocities");
+		for (int i=0;i<this->m_multiBodies.size();i++)
+		{
+			btMultiBody* bod = m_multiBodies[i];
+
+			bool isSleeping = false;
+			
+			if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
+			{
+				isSleeping = true;
+			} 
+			for (int b=0;b<bod->getNumLinks();b++)
+			{
+				if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState()==ISLAND_SLEEPING)
+					isSleeping = true;
+			} 
+
+			if (!isSleeping)
+			{
+				scratch_r.resize(bod->getNumLinks()+1);
+				scratch_v.resize(bod->getNumLinks()+1);
+				scratch_m.resize(bod->getNumLinks()+1);
+
+				bod->clearForcesAndTorques();
+				bod->addBaseForce(m_gravity * bod->getBaseMass());
+
+				for (int j = 0; j < bod->getNumLinks(); ++j) 
+				{
+					bod->addLinkForce(j, m_gravity * bod->getLinkMass(j));
+				}
+
+				bod->stepVelocities(solverInfo.m_timeStep, scratch_r, scratch_v, scratch_m);
+			}
+		}
+	}
+
+	m_solverMultiBodyIslandCallback->processConstraints();
+	
+	m_constraintSolver->allSolved(solverInfo, m_debugDrawer);
+
+}
+
+void	btMultiBodyDynamicsWorld::integrateTransforms(btScalar timeStep)
+{
+	btDiscreteDynamicsWorld::integrateTransforms(timeStep);
+
+	{
+		BT_PROFILE("btMultiBody stepPositions");
+		//integrate and update the Featherstone hierarchies
+		btAlignedObjectArray<btQuaternion> world_to_local;
+		btAlignedObjectArray<btVector3> local_origin;
+
+		for (int b=0;b<m_multiBodies.size();b++)
+		{
+			btMultiBody* bod = m_multiBodies[b];
+			bool isSleeping = false;
+			if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING)
+			{
+				isSleeping = true;
+			} 
+			for (int b=0;b<bod->getNumLinks();b++)
+			{
+				if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState()==ISLAND_SLEEPING)
+					isSleeping = true;
+			}
+
+
+			if (!isSleeping)
+			{
+				int nLinks = bod->getNumLinks();
+
+				///base + num links
+				world_to_local.resize(nLinks+1);
+				local_origin.resize(nLinks+1);
+
+				bod->stepPositions(timeStep);
+
+	 
+
+				world_to_local[0] = bod->getWorldToBaseRot();
+				local_origin[0] = bod->getBasePos();
+
+				if (bod->getBaseCollider())
+				{
+					btVector3 posr = local_origin[0];
+					float pos[4]={posr.x(),posr.y(),posr.z(),1};
+					float quat[4]={-world_to_local[0].x(),-world_to_local[0].y(),-world_to_local[0].z(),world_to_local[0].w()};
+					btTransform tr;
+					tr.setIdentity();
+					tr.setOrigin(posr);
+					tr.setRotation(btQuaternion(quat[0],quat[1],quat[2],quat[3]));
+
+					bod->getBaseCollider()->setWorldTransform(tr);
+
+				}
+      
+				for (int k=0;k<bod->getNumLinks();k++)
+				{
+					const int parent = bod->getParent(k);
+					world_to_local[k+1] = bod->getParentToLocalRot(k) * world_to_local[parent+1];
+					local_origin[k+1] = local_origin[parent+1] + (quatRotate(world_to_local[k+1].inverse() , bod->getRVector(k)));
+				}
+
+
+				for (int m=0;m<bod->getNumLinks();m++)
+				{
+					btMultiBodyLinkCollider* col = bod->getLink(m).m_collider;
+					if (col)
+					{
+						int link = col->m_link;
+						btAssert(link == m);
+
+						int index = link+1;
+
+						btVector3 posr = local_origin[index];
+						float pos[4]={posr.x(),posr.y(),posr.z(),1};
+						float quat[4]={-world_to_local[index].x(),-world_to_local[index].y(),-world_to_local[index].z(),world_to_local[index].w()};
+						btTransform tr;
+						tr.setIdentity();
+						tr.setOrigin(posr);
+						tr.setRotation(btQuaternion(quat[0],quat[1],quat[2],quat[3]));
+
+						col->setWorldTransform(tr);
+					}
+				}
+			} else
+			{
+				bod->clearVelocities();
+			}
+		}
+	}
+}
+
+
+
+void	btMultiBodyDynamicsWorld::addMultiBodyConstraint( btMultiBodyConstraint* constraint)
+{
+	m_multiBodyConstraints.push_back(constraint);
+}
+
+void	btMultiBodyDynamicsWorld::removeMultiBodyConstraint( btMultiBodyConstraint* constraint)
+{
+	m_multiBodyConstraints.remove(constraint);
+}

+ 56 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h

@@ -0,0 +1,56 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTIBODY_DYNAMICS_WORLD_H
+#define BT_MULTIBODY_DYNAMICS_WORLD_H
+
+#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h"
+
+
+class btMultiBody;
+class btMultiBodyConstraint;
+class btMultiBodyConstraintSolver;
+struct MultiBodyInplaceSolverIslandCallback;
+
+///The btMultiBodyDynamicsWorld adds Featherstone multi body dynamics to Bullet
+///This implementation is still preliminary/experimental.
+class btMultiBodyDynamicsWorld : public btDiscreteDynamicsWorld
+{
+protected:
+	btAlignedObjectArray<btMultiBody*> m_multiBodies;
+	btAlignedObjectArray<btMultiBodyConstraint*> m_multiBodyConstraints;
+	btAlignedObjectArray<btMultiBodyConstraint*> m_sortedMultiBodyConstraints;
+	btMultiBodyConstraintSolver*	m_multiBodyConstraintSolver;
+	MultiBodyInplaceSolverIslandCallback*	m_solverMultiBodyIslandCallback;
+
+	virtual void	calculateSimulationIslands();
+	virtual void	updateActivationState(btScalar timeStep);
+	virtual void	solveConstraints(btContactSolverInfo& solverInfo);
+	virtual void	integrateTransforms(btScalar timeStep);
+public:
+
+	btMultiBodyDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btMultiBodyConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
+	
+	virtual ~btMultiBodyDynamicsWorld ();
+
+	virtual void	addMultiBody(btMultiBody* body, short group= btBroadphaseProxy::DefaultFilter, short mask=btBroadphaseProxy::AllFilter);
+
+	virtual void	removeMultiBody(btMultiBody* body);
+
+	virtual void	addMultiBodyConstraint( btMultiBodyConstraint* constraint);
+
+	virtual void	removeMultiBodyConstraint( btMultiBodyConstraint* constraint);
+};
+#endif //BT_MULTIBODY_DYNAMICS_WORLD_H

+ 133 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp

@@ -0,0 +1,133 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was written by Erwin Coumans
+
+#include "btMultiBodyJointLimitConstraint.h"
+#include "btMultiBody.h"
+#include "btMultiBodyLinkCollider.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+
+btMultiBodyJointLimitConstraint::btMultiBodyJointLimitConstraint(btMultiBody* body, int link, btScalar lower, btScalar upper)
+	:btMultiBodyConstraint(body,body,link,link,2,true),
+	m_lowerBound(lower),
+	m_upperBound(upper)
+{
+	// the data.m_jacobians never change, so may as well
+    // initialize them here
+        
+    // note: we rely on the fact that data.m_jacobians are
+    // always initialized to zero by the Constraint ctor
+
+    // row 0: the lower bound
+    jacobianA(0)[6 + link] = 1;
+
+    // row 1: the upper bound
+    jacobianB(1)[6 + link] = -1;
+}
+btMultiBodyJointLimitConstraint::~btMultiBodyJointLimitConstraint()
+{
+}
+
+int btMultiBodyJointLimitConstraint::getIslandIdA() const
+{
+	btMultiBodyLinkCollider* col = m_bodyA->getBaseCollider();
+	if (col)
+		return col->getIslandTag();
+	for (int i=0;i<m_bodyA->getNumLinks();i++)
+	{
+		if (m_bodyA->getLink(i).m_collider)
+			return m_bodyA->getLink(i).m_collider->getIslandTag();
+	}
+	return -1;
+}
+
+int btMultiBodyJointLimitConstraint::getIslandIdB() const
+{
+	btMultiBodyLinkCollider* col = m_bodyB->getBaseCollider();
+	if (col)
+		return col->getIslandTag();
+
+	for (int i=0;i<m_bodyB->getNumLinks();i++)
+	{
+		col = m_bodyB->getLink(i).m_collider;
+		if (col)
+			return col->getIslandTag();
+	}
+	return -1;
+}
+
+
+void btMultiBodyJointLimitConstraint::createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal)
+{
+    // only positions need to be updated -- data.m_jacobians and force
+    // directions were set in the ctor and never change.
+    
+    // row 0: the lower bound
+    setPosition(0, m_bodyA->getJointPos(m_linkA) - m_lowerBound);
+
+    // row 1: the upper bound
+    setPosition(1, m_upperBound - m_bodyA->getJointPos(m_linkA));
+
+	for (int row=0;row<getNumRows();row++)
+	{
+		btMultiBodySolverConstraint& constraintRow = constraintRows.expandNonInitializing();
+		constraintRow.m_multiBodyA = m_bodyA;
+		constraintRow.m_multiBodyB = m_bodyB;
+		
+		btScalar rel_vel = fillConstraintRowMultiBodyMultiBody(constraintRow,data,jacobianA(row),jacobianB(row),infoGlobal,0,-m_maxAppliedImpulse,m_maxAppliedImpulse);
+		{
+			btScalar penetration = getPosition(row);
+			btScalar positionalError = 0.f;
+			btScalar	velocityError =  - rel_vel;// * damping;
+			btScalar erp = infoGlobal.m_erp2;
+			if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+			{
+				erp = infoGlobal.m_erp;
+			}
+			if (penetration>0)
+			{
+				positionalError = 0;
+				velocityError = -penetration / infoGlobal.m_timeStep;
+			} else
+			{
+				positionalError = -penetration * erp/infoGlobal.m_timeStep;
+			}
+
+			btScalar  penetrationImpulse = positionalError*constraintRow.m_jacDiagABInv;
+			btScalar velocityImpulse = velocityError *constraintRow.m_jacDiagABInv;
+			if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+			{
+				//combine position and velocity into rhs
+				constraintRow.m_rhs = penetrationImpulse+velocityImpulse;
+				constraintRow.m_rhsPenetration = 0.f;
+
+			} else
+			{
+				//split position and velocity into rhs and m_rhsPenetration
+				constraintRow.m_rhs = velocityImpulse;
+				constraintRow.m_rhsPenetration = penetrationImpulse;
+			}
+		}
+	}
+
+}
+	
+	
+	
+

+ 44 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h

@@ -0,0 +1,44 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTIBODY_JOINT_LIMIT_CONSTRAINT_H
+#define BT_MULTIBODY_JOINT_LIMIT_CONSTRAINT_H
+
+#include "btMultiBodyConstraint.h"
+struct btSolverInfo;
+
+class btMultiBodyJointLimitConstraint : public btMultiBodyConstraint
+{
+protected:
+
+	btScalar	m_lowerBound;
+	btScalar	m_upperBound;
+public:
+
+	btMultiBodyJointLimitConstraint(btMultiBody* body, int link, btScalar lower, btScalar upper);
+	virtual ~btMultiBodyJointLimitConstraint();
+
+	virtual int getIslandIdA() const;
+	virtual int getIslandIdB() const;
+
+	virtual void createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal);
+	
+	
+};
+
+#endif //BT_MULTIBODY_JOINT_LIMIT_CONSTRAINT_H
+

+ 89 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp

@@ -0,0 +1,89 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was written by Erwin Coumans
+
+#include "btMultiBodyJointMotor.h"
+#include "btMultiBody.h"
+#include "btMultiBodyLinkCollider.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+
+btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse)
+	:btMultiBodyConstraint(body,body,link,link,1,true),
+	m_desiredVelocity(desiredVelocity)	
+{
+	m_maxAppliedImpulse = maxMotorImpulse;
+	// the data.m_jacobians never change, so may as well
+    // initialize them here
+        
+    // note: we rely on the fact that data.m_jacobians are
+    // always initialized to zero by the Constraint ctor
+
+    // row 0: the lower bound
+    jacobianA(0)[6 + link] = 1;
+}
+btMultiBodyJointMotor::~btMultiBodyJointMotor()
+{
+}
+
+int btMultiBodyJointMotor::getIslandIdA() const
+{
+	btMultiBodyLinkCollider* col = m_bodyA->getBaseCollider();
+	if (col)
+		return col->getIslandTag();
+	for (int i=0;i<m_bodyA->getNumLinks();i++)
+	{
+		if (m_bodyA->getLink(i).m_collider)
+			return m_bodyA->getLink(i).m_collider->getIslandTag();
+	}
+	return -1;
+}
+
+int btMultiBodyJointMotor::getIslandIdB() const
+{
+	btMultiBodyLinkCollider* col = m_bodyB->getBaseCollider();
+	if (col)
+		return col->getIslandTag();
+
+	for (int i=0;i<m_bodyB->getNumLinks();i++)
+	{
+		col = m_bodyB->getLink(i).m_collider;
+		if (col)
+			return col->getIslandTag();
+	}
+	return -1;
+}
+
+
+void btMultiBodyJointMotor::createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal)
+{
+    // only positions need to be updated -- data.m_jacobians and force
+    // directions were set in the ctor and never change.
+    
+  
+
+	for (int row=0;row<getNumRows();row++)
+	{
+		btMultiBodySolverConstraint& constraintRow = constraintRows.expandNonInitializing();
+		
+		btScalar penetration = 0;
+		fillConstraintRowMultiBodyMultiBody(constraintRow,data,jacobianA(row),jacobianB(row),infoGlobal,m_desiredVelocity,-m_maxAppliedImpulse,m_maxAppliedImpulse);
+	}
+
+}
+	

+ 47 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyJointMotor.h

@@ -0,0 +1,47 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was written by Erwin Coumans
+
+#ifndef BT_MULTIBODY_JOINT_MOTOR_H
+#define BT_MULTIBODY_JOINT_MOTOR_H
+
+#include "btMultiBodyConstraint.h"
+struct btSolverInfo;
+
+class btMultiBodyJointMotor : public btMultiBodyConstraint
+{
+protected:
+
+	
+	btScalar	m_desiredVelocity;
+
+public:
+
+	btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse);
+	virtual ~btMultiBodyJointMotor();
+
+	virtual int getIslandIdA() const;
+	virtual int getIslandIdB() const;
+
+	virtual void createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal);
+	
+	
+};
+
+#endif //BT_MULTIBODY_JOINT_MOTOR_H
+

+ 110 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyLink.h

@@ -0,0 +1,110 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTIBODY_LINK_H
+#define BT_MULTIBODY_LINK_H
+
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btVector3.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+enum	btMultiBodyLinkFlags
+{
+	BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION = 1
+};
+//
+// Link struct
+//
+
+struct btMultibodyLink 
+{
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+    btScalar joint_pos;    // qi
+
+    btScalar mass;         // mass of link
+    btVector3 inertia;   // inertia of link (local frame; diagonal)
+
+    int parent;         // index of the parent link (assumed to be < index of this link), or -1 if parent is the base link.
+
+    btQuaternion zero_rot_parent_to_this;    // rotates vectors in parent-frame to vectors in local-frame (when q=0). constant.
+
+    // "axis" = spatial joint axis (Mirtich Defn 9 p104). (expressed in local frame.) constant.
+    // for prismatic: axis_top = zero;
+    //                axis_bottom = unit vector along the joint axis.
+    // for revolute: axis_top = unit vector along the rotation axis (u);
+    //               axis_bottom = u cross d_vector.
+    btVector3 axis_top;
+    btVector3 axis_bottom;
+
+    btVector3 d_vector;   // vector from the inboard joint pos to this link's COM. (local frame.) constant. set for revolute joints only.
+
+    // e_vector is constant, but depends on the joint type
+    // prismatic: vector from COM of parent to COM of this link, WHEN Q = 0. (local frame.)
+    // revolute: vector from parent's COM to the pivot point, in PARENT's frame.
+    btVector3 e_vector;
+
+    bool is_revolute;   // true = revolute, false = prismatic
+
+    btQuaternion cached_rot_parent_to_this;   // rotates vectors in parent frame to vectors in local frame
+    btVector3 cached_r_vector;                // vector from COM of parent to COM of this link, in local frame.
+
+    btVector3 applied_force;    // In WORLD frame
+    btVector3 applied_torque;   // In WORLD frame
+    btScalar joint_torque;
+
+	class btMultiBodyLinkCollider* m_collider;
+	int m_flags;
+
+    // ctor: set some sensible defaults
+	btMultibodyLink()
+		: joint_pos(0),
+			mass(1),
+			parent(-1),
+			zero_rot_parent_to_this(1, 0, 0, 0),
+			is_revolute(false),
+			cached_rot_parent_to_this(1, 0, 0, 0),
+			joint_torque(0),
+			m_collider(0),
+			m_flags(0)
+	{
+		inertia.setValue(1, 1, 1);
+		axis_top.setValue(0, 0, 0);
+		axis_bottom.setValue(1, 0, 0);
+		d_vector.setValue(0, 0, 0);
+		e_vector.setValue(0, 0, 0);
+		cached_r_vector.setValue(0, 0, 0);
+		applied_force.setValue( 0, 0, 0);
+		applied_torque.setValue(0, 0, 0);
+	}
+
+    // routine to update cached_rot_parent_to_this and cached_r_vector
+    void updateCache()
+	{
+		if (is_revolute) 
+		{
+			cached_rot_parent_to_this = btQuaternion(axis_top,-joint_pos) * zero_rot_parent_to_this;
+			cached_r_vector = d_vector + quatRotate(cached_rot_parent_to_this,e_vector);
+		} else 
+		{
+			// cached_rot_parent_to_this never changes, so no need to update
+			cached_r_vector = e_vector + joint_pos * axis_bottom;
+		}
+	}
+};
+
+
+#endif //BT_MULTIBODY_LINK_H

+ 92 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h

@@ -0,0 +1,92 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_FEATHERSTONE_LINK_COLLIDER_H
+#define BT_FEATHERSTONE_LINK_COLLIDER_H
+
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+#include "btMultiBody.h"
+
+class btMultiBodyLinkCollider : public btCollisionObject
+{
+//protected:
+public:
+
+	btMultiBody* m_multiBody;
+	int m_link;
+
+
+	btMultiBodyLinkCollider (btMultiBody* multiBody,int link)
+		:m_multiBody(multiBody),
+		m_link(link)
+	{
+		m_checkCollideWith =  true;
+		//we need to remove the 'CF_STATIC_OBJECT' flag, otherwise links/base doesn't merge islands
+		//this means that some constraints might point to bodies that are not in the islands, causing crashes
+		//if (link>=0 || (multiBody && !multiBody->hasFixedBase()))
+		{
+			m_collisionFlags &= (~btCollisionObject::CF_STATIC_OBJECT);
+		}
+		// else
+		//{
+		//	m_collisionFlags |= (btCollisionObject::CF_STATIC_OBJECT);
+		//}
+
+		m_internalType = CO_FEATHERSTONE_LINK;
+	}
+	static btMultiBodyLinkCollider* upcast(btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()&btCollisionObject::CO_FEATHERSTONE_LINK)
+			return (btMultiBodyLinkCollider*)colObj;
+		return 0;
+	}
+	static const btMultiBodyLinkCollider* upcast(const btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()&btCollisionObject::CO_FEATHERSTONE_LINK)
+			return (btMultiBodyLinkCollider*)colObj;
+		return 0;
+	}
+
+	virtual bool checkCollideWithOverride(const  btCollisionObject* co) const
+	{
+		const btMultiBodyLinkCollider* other = btMultiBodyLinkCollider::upcast(co);
+		if (!other)
+			return true;
+		if (other->m_multiBody != this->m_multiBody)
+			return true;
+		if (!m_multiBody->hasSelfCollision())
+			return false;
+
+		//check if 'link' has collision disabled
+		if (m_link>=0)
+		{
+			const btMultibodyLink& link = m_multiBody->getLink(this->m_link);
+			if ((link.m_flags&BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION) && link.parent == other->m_link)
+				return false;
+		}
+		
+		if (other->m_link>=0)
+		{
+			const btMultibodyLink& otherLink = other->m_multiBody->getLink(other->m_link);
+			if ((otherLink.m_flags& BT_MULTIBODYLINKFLAGS_DISABLE_PARENT_COLLISION) && otherLink.parent == this->m_link)
+				return false;
+		}
+		return true;
+	}
+};
+
+#endif //BT_FEATHERSTONE_LINK_COLLIDER_H
+

+ 143 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp

@@ -0,0 +1,143 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was written by Erwin Coumans
+
+#include "btMultiBodyPoint2Point.h"
+#include "btMultiBodyLinkCollider.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
+btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB)
+	:btMultiBodyConstraint(body,0,link,-1,3,false),
+	m_rigidBodyA(0),
+	m_rigidBodyB(bodyB),
+	m_pivotInA(pivotInA),
+	m_pivotInB(pivotInB)
+{
+}
+
+btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB)
+	:btMultiBodyConstraint(bodyA,bodyB,linkA,linkB,3,false),
+	m_rigidBodyA(0),
+	m_rigidBodyB(0),
+	m_pivotInA(pivotInA),
+	m_pivotInB(pivotInB)
+{
+}
+
+
+btMultiBodyPoint2Point::~btMultiBodyPoint2Point()
+{
+}
+
+
+int btMultiBodyPoint2Point::getIslandIdA() const
+{
+	if (m_rigidBodyA)
+		return m_rigidBodyA->getIslandTag();
+
+	if (m_bodyA)
+	{
+		btMultiBodyLinkCollider* col = m_bodyA->getBaseCollider();
+		if (col)
+			return col->getIslandTag();
+		for (int i=0;i<m_bodyA->getNumLinks();i++)
+		{
+			if (m_bodyA->getLink(i).m_collider)
+				return m_bodyA->getLink(i).m_collider->getIslandTag();
+		}
+	}
+	return -1;
+}
+
+int btMultiBodyPoint2Point::getIslandIdB() const
+{
+	if (m_rigidBodyB)
+		return m_rigidBodyB->getIslandTag();
+	if (m_bodyB)
+	{
+		btMultiBodyLinkCollider* col = m_bodyB->getBaseCollider();
+		if (col)
+			return col->getIslandTag();
+
+		for (int i=0;i<m_bodyB->getNumLinks();i++)
+		{
+			col = m_bodyB->getLink(i).m_collider;
+			if (col)
+				return col->getIslandTag();
+		}
+	}
+	return -1;
+}
+
+
+
+void btMultiBodyPoint2Point::createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal)
+{
+
+//	int i=1;
+	for (int i=0;i<3;i++)
+	{
+
+		btMultiBodySolverConstraint& constraintRow = constraintRows.expandNonInitializing();
+
+		constraintRow.m_solverBodyIdA = data.m_fixedBodyId;
+		constraintRow.m_solverBodyIdB = data.m_fixedBodyId;
+		
+
+		btVector3 contactNormalOnB(0,0,0);
+		contactNormalOnB[i] = -1;
+
+		btScalar penetration = 0;
+
+		 // Convert local points back to world
+		btVector3 pivotAworld = m_pivotInA;
+		if (m_rigidBodyA)
+		{
+			
+			constraintRow.m_solverBodyIdA = m_rigidBodyA->getCompanionId();
+			pivotAworld = m_rigidBodyA->getCenterOfMassTransform()*m_pivotInA;
+		} else
+		{
+			if (m_bodyA)
+				pivotAworld = m_bodyA->localPosToWorld(m_linkA, m_pivotInA);
+		}
+		btVector3 pivotBworld = m_pivotInB;
+		if (m_rigidBodyB)
+		{
+			constraintRow.m_solverBodyIdB = m_rigidBodyB->getCompanionId();
+			pivotBworld = m_rigidBodyB->getCenterOfMassTransform()*m_pivotInB;
+		} else
+		{
+			if (m_bodyB)
+				pivotBworld = m_bodyB->localPosToWorld(m_linkB, m_pivotInB);
+			
+		}
+		btScalar position = (pivotAworld-pivotBworld).dot(contactNormalOnB);
+		btScalar relaxation = 1.f;
+		fillMultiBodyConstraintMixed(constraintRow, data,
+																 contactNormalOnB,
+																 pivotAworld, pivotBworld, 
+																 position,
+																 infoGlobal,
+																 relaxation,
+																 false);
+		constraintRow.m_lowerLimit = -m_maxAppliedImpulse;
+		constraintRow.m_upperLimit = m_maxAppliedImpulse;
+
+	}
+}

+ 60 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodyPoint2Point.h

@@ -0,0 +1,60 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was written by Erwin Coumans
+
+#ifndef BT_MULTIBODY_POINT2POINT_H
+#define BT_MULTIBODY_POINT2POINT_H
+
+#include "btMultiBodyConstraint.h"
+
+class btMultiBodyPoint2Point : public btMultiBodyConstraint
+{
+protected:
+
+	btRigidBody*	m_rigidBodyA;
+	btRigidBody*	m_rigidBodyB;
+	btVector3		m_pivotInA;
+	btVector3		m_pivotInB;
+	
+
+public:
+
+	btMultiBodyPoint2Point(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB);
+	btMultiBodyPoint2Point(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB);
+
+	virtual ~btMultiBodyPoint2Point();
+
+	virtual int getIslandIdA() const;
+	virtual int getIslandIdB() const;
+
+	virtual void createConstraintRows(btMultiBodyConstraintArray& constraintRows,
+		btMultiBodyJacobianData& data,
+		const btContactSolverInfo& infoGlobal);
+
+	const btVector3& getPivotInB() const
+	{
+		return m_pivotInB;
+	}
+
+	void setPivotInB(const btVector3& pivotInB)
+	{
+		m_pivotInB = pivotInB;
+	}
+
+	
+};
+
+#endif //BT_MULTIBODY_POINT2POINT_H

+ 82 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/Featherstone/btMultiBodySolverConstraint.h

@@ -0,0 +1,82 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTIBODY_SOLVER_CONSTRAINT_H
+#define BT_MULTIBODY_SOLVER_CONSTRAINT_H
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+class btMultiBody;
+#include "BulletDynamics/ConstraintSolver/btSolverBody.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+
+///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
+ATTRIBUTE_ALIGNED16 (struct)	btMultiBodySolverConstraint
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+
+	int				m_deltaVelAindex;//more generic version of m_relpos1CrossNormal/m_contactNormal1
+	btVector3		m_relpos1CrossNormal;
+	btVector3		m_contactNormal1;
+	int				m_jacAindex;
+
+	int				m_deltaVelBindex;
+	btVector3		m_relpos2CrossNormal;
+	btVector3		m_contactNormal2; //usually m_contactNormal2 == -m_contactNormal1, but not always
+	int				m_jacBindex;
+
+	btVector3		m_angularComponentA;
+	btVector3		m_angularComponentB;
+	
+	mutable btSimdScalar	m_appliedPushImpulse;
+	mutable btSimdScalar	m_appliedImpulse;
+
+	btScalar	m_friction;
+	btScalar	m_jacDiagABInv;
+	btScalar		m_rhs;
+	btScalar		m_cfm;
+	
+    btScalar		m_lowerLimit;
+	btScalar		m_upperLimit;
+	btScalar		m_rhsPenetration;
+    union
+	{
+		void*		m_originalContactPoint;
+		btScalar	m_unusedPadding4;
+	};
+
+	int	m_overrideNumSolverIterations;
+    int			m_frictionIndex;
+
+	int m_solverBodyIdA;
+	btMultiBody* m_multiBodyA;
+	int			m_linkA;
+	
+	int m_solverBodyIdB;
+	btMultiBody* m_multiBodyB;
+	int			m_linkB;
+
+	enum		btSolverConstraintType
+	{
+		BT_SOLVER_CONTACT_1D = 0,
+		BT_SOLVER_FRICTION_1D
+	};
+};
+
+typedef btAlignedObjectArray<btMultiBodySolverConstraint>	btMultiBodyConstraintArray;
+
+#endif //BT_MULTIBODY_SOLVER_CONSTRAINT_H

+ 2079 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btDantzigLCP.cpp

@@ -0,0 +1,2079 @@
+/*************************************************************************
+*                                                                       *
+* Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+* All rights reserved.  Email: [email protected]   Web: www.q12.org          *
+*                                                                       *
+* This library is free software; you can redistribute it and/or         *
+* modify it under the terms of EITHER:                                  *
+*   (1) The GNU Lesser General Public License as published by the Free  *
+*       Software Foundation; either version 2.1 of the License, or (at  *
+*       your option) any later version. The text of the GNU Lesser      *
+*       General Public License is included with this library in the     *
+*       file LICENSE.TXT.                                               *
+*   (2) The BSD-style license that is included with this library in     *
+*       the file LICENSE-BSD.TXT.                                       *
+*                                                                       *
+* This library is distributed in the hope that it will be useful,       *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+* LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+*                                                                       *
+*************************************************************************/
+
+/*
+
+
+THE ALGORITHM
+-------------
+
+solve A*x = b+w, with x and w subject to certain LCP conditions.
+each x(i),w(i) must lie on one of the three line segments in the following
+diagram. each line segment corresponds to one index set :
+
+     w(i)
+     /|\      |           :
+      |       |           :
+      |       |i in N     :
+  w>0 |       |state[i]=0 :
+      |       |           :
+      |       |           :  i in C
+  w=0 +       +-----------------------+
+      |                   :           |
+      |                   :           |
+  w<0 |                   :           |i in N
+      |                   :           |state[i]=1
+      |                   :           |
+      |                   :           |
+      +-------|-----------|-----------|----------> x(i)
+             lo           0           hi
+
+the Dantzig algorithm proceeds as follows:
+  for i=1:n
+    * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
+      negative towards the line. as this is done, the other (x(j),w(j))
+      for j<i are constrained to be on the line. if any (x,w) reaches the
+      end of a line segment then it is switched between index sets.
+    * i is added to the appropriate index set depending on what line segment
+      it hits.
+
+we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
+simpler, because the starting point for x(i),w(i) is always on the dotted
+line x=0 and x will only ever increase in one direction, so it can only hit
+two out of the three line segments.
+
+
+NOTES
+-----
+
+this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
+the implementation is split into an LCP problem object (btLCP) and an LCP
+driver function. most optimization occurs in the btLCP object.
+
+a naive implementation of the algorithm requires either a lot of data motion
+or a lot of permutation-array lookup, because we are constantly re-ordering
+rows and columns. to avoid this and make a more optimized algorithm, a
+non-trivial data structure is used to represent the matrix A (this is
+implemented in the fast version of the btLCP object).
+
+during execution of this algorithm, some indexes in A are clamped (set C),
+some are non-clamped (set N), and some are "don't care" (where x=0).
+A,x,b,w (and other problem vectors) are permuted such that the clamped
+indexes are first, the unclamped indexes are next, and the don't-care
+indexes are last. this permutation is recorded in the array `p'.
+initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
+the corresponding elements of p are swapped.
+
+because the C and N elements are grouped together in the rows of A, we can do
+lots of work with a fast dot product function. if A,x,etc were not permuted
+and we only had a permutation array, then those dot products would be much
+slower as we would have a permutation array lookup in some inner loops.
+
+A is accessed through an array of row pointers, so that element (i,j) of the
+permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
+we still have to actually move the data.
+
+during execution of this algorithm we maintain an L*D*L' factorization of
+the clamped submatrix of A (call it `AC') which is the top left nC*nC
+submatrix of A. there are two ways we could arrange the rows/columns in AC.
+
+(1) AC is always permuted such that L*D*L' = AC. this causes a problem
+when a row/column is removed from C, because then all the rows/columns of A
+between the deleted index and the end of C need to be rotated downward.
+this results in a lot of data motion and slows things down.
+(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
+itself a permutation of the underlying A). this is what we do - the
+permutation is recorded in the vector C. call this permutation A[C,C].
+when a row/column is removed from C, all we have to do is swap two
+rows/columns and manipulate C.
+
+*/
+
+
+#include "btDantzigLCP.h"
+
+#include <string.h>//memcpy
+
+bool s_error = false;
+
+//***************************************************************************
+// code generation parameters
+
+
+#define btLCP_FAST		// use fast btLCP object
+
+// option 1 : matrix row pointers (less data copying)
+#define BTROWPTRS
+#define BTATYPE btScalar **
+#define BTAROW(i) (m_A[i])
+
+// option 2 : no matrix row pointers (slightly faster inner loops)
+//#define NOROWPTRS
+//#define BTATYPE btScalar *
+//#define BTAROW(i) (m_A+(i)*m_nskip)
+
+#define BTNUB_OPTIMIZATIONS
+
+
+
+/* solve L*X=B, with B containing 1 right hand sides.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is lskip.
+ * B is an n*1 matrix that contains the right hand sides.
+ * B is stored by columns and its leading dimension is also lskip.
+ * B is overwritten with X.
+ * this processes blocks of 2*2.
+ * if this is in the factorizer source file, n must be a multiple of 2.
+ */
+
+static void btSolveL1_1 (const btScalar *L, btScalar *B, int n, int lskip1)
+{  
+  /* declare variables - Z matrix, p and q vectors, etc */
+  btScalar Z11,m11,Z21,m21,p1,q1,p2,*ex;
+  const btScalar *ell;
+  int i,j;
+  /* compute all 2 x 1 blocks of X */
+  for (i=0; i < n; i+=2) {
+    /* compute all 2 x 1 block of X, from rows i..i+2-1 */
+    /* set the Z matrix to 0 */
+    Z11=0;
+    Z21=0;
+    ell = L + i*lskip1;
+    ex = B;
+    /* the inner loop that computes outer products and adds them to Z */
+    for (j=i-2; j >= 0; j -= 2) {
+      /* compute outer product and add it to the Z matrix */
+      p1=ell[0];
+      q1=ex[0];
+      m11 = p1 * q1;
+      p2=ell[lskip1];
+      m21 = p2 * q1;
+      Z11 += m11;
+      Z21 += m21;
+      /* compute outer product and add it to the Z matrix */
+      p1=ell[1];
+      q1=ex[1];
+      m11 = p1 * q1;
+      p2=ell[1+lskip1];
+      m21 = p2 * q1;
+      /* advance pointers */
+      ell += 2;
+      ex += 2;
+      Z11 += m11;
+      Z21 += m21;
+      /* end of inner loop */
+    }
+    /* compute left-over iterations */
+    j += 2;
+    for (; j > 0; j--) {
+      /* compute outer product and add it to the Z matrix */
+      p1=ell[0];
+      q1=ex[0];
+      m11 = p1 * q1;
+      p2=ell[lskip1];
+      m21 = p2 * q1;
+      /* advance pointers */
+      ell += 1;
+      ex += 1;
+      Z11 += m11;
+      Z21 += m21;
+    }
+    /* finish computing the X(i) block */
+    Z11 = ex[0] - Z11;
+    ex[0] = Z11;
+    p1 = ell[lskip1];
+    Z21 = ex[1] - Z21 - p1*Z11;
+    ex[1] = Z21;
+    /* end of outer loop */
+  }
+}
+
+/* solve L*X=B, with B containing 2 right hand sides.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is lskip.
+ * B is an n*2 matrix that contains the right hand sides.
+ * B is stored by columns and its leading dimension is also lskip.
+ * B is overwritten with X.
+ * this processes blocks of 2*2.
+ * if this is in the factorizer source file, n must be a multiple of 2.
+ */
+
+static void btSolveL1_2 (const btScalar *L, btScalar *B, int n, int lskip1)
+{  
+  /* declare variables - Z matrix, p and q vectors, etc */
+  btScalar Z11,m11,Z12,m12,Z21,m21,Z22,m22,p1,q1,p2,q2,*ex;
+  const btScalar *ell;
+  int i,j;
+  /* compute all 2 x 2 blocks of X */
+  for (i=0; i < n; i+=2) {
+    /* compute all 2 x 2 block of X, from rows i..i+2-1 */
+    /* set the Z matrix to 0 */
+    Z11=0;
+    Z12=0;
+    Z21=0;
+    Z22=0;
+    ell = L + i*lskip1;
+    ex = B;
+    /* the inner loop that computes outer products and adds them to Z */
+    for (j=i-2; j >= 0; j -= 2) {
+      /* compute outer product and add it to the Z matrix */
+      p1=ell[0];
+      q1=ex[0];
+      m11 = p1 * q1;
+      q2=ex[lskip1];
+      m12 = p1 * q2;
+      p2=ell[lskip1];
+      m21 = p2 * q1;
+      m22 = p2 * q2;
+      Z11 += m11;
+      Z12 += m12;
+      Z21 += m21;
+      Z22 += m22;
+      /* compute outer product and add it to the Z matrix */
+      p1=ell[1];
+      q1=ex[1];
+      m11 = p1 * q1;
+      q2=ex[1+lskip1];
+      m12 = p1 * q2;
+      p2=ell[1+lskip1];
+      m21 = p2 * q1;
+      m22 = p2 * q2;
+      /* advance pointers */
+      ell += 2;
+      ex += 2;
+      Z11 += m11;
+      Z12 += m12;
+      Z21 += m21;
+      Z22 += m22;
+      /* end of inner loop */
+    }
+    /* compute left-over iterations */
+    j += 2;
+    for (; j > 0; j--) {
+      /* compute outer product and add it to the Z matrix */
+      p1=ell[0];
+      q1=ex[0];
+      m11 = p1 * q1;
+      q2=ex[lskip1];
+      m12 = p1 * q2;
+      p2=ell[lskip1];
+      m21 = p2 * q1;
+      m22 = p2 * q2;
+      /* advance pointers */
+      ell += 1;
+      ex += 1;
+      Z11 += m11;
+      Z12 += m12;
+      Z21 += m21;
+      Z22 += m22;
+    }
+    /* finish computing the X(i) block */
+    Z11 = ex[0] - Z11;
+    ex[0] = Z11;
+    Z12 = ex[lskip1] - Z12;
+    ex[lskip1] = Z12;
+    p1 = ell[lskip1];
+    Z21 = ex[1] - Z21 - p1*Z11;
+    ex[1] = Z21;
+    Z22 = ex[1+lskip1] - Z22 - p1*Z12;
+    ex[1+lskip1] = Z22;
+    /* end of outer loop */
+  }
+}
+
+
+void btFactorLDLT (btScalar *A, btScalar *d, int n, int nskip1)
+{  
+  int i,j;
+  btScalar sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22;
+  if (n < 1) return;
+  
+  for (i=0; i<=n-2; i += 2) {
+    /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */
+    btSolveL1_2 (A,A+i*nskip1,i,nskip1);
+    /* scale the elements in a 2 x i block at A(i,0), and also */
+    /* compute Z = the outer product matrix that we'll need. */
+    Z11 = 0;
+    Z21 = 0;
+    Z22 = 0;
+    ell = A+i*nskip1;
+    dee = d;
+    for (j=i-6; j >= 0; j -= 6) {
+      p1 = ell[0];
+      p2 = ell[nskip1];
+      dd = dee[0];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[0] = q1;
+      ell[nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      p1 = ell[1];
+      p2 = ell[1+nskip1];
+      dd = dee[1];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[1] = q1;
+      ell[1+nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      p1 = ell[2];
+      p2 = ell[2+nskip1];
+      dd = dee[2];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[2] = q1;
+      ell[2+nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      p1 = ell[3];
+      p2 = ell[3+nskip1];
+      dd = dee[3];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[3] = q1;
+      ell[3+nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      p1 = ell[4];
+      p2 = ell[4+nskip1];
+      dd = dee[4];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[4] = q1;
+      ell[4+nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      p1 = ell[5];
+      p2 = ell[5+nskip1];
+      dd = dee[5];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[5] = q1;
+      ell[5+nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      ell += 6;
+      dee += 6;
+    }
+    /* compute left-over iterations */
+    j += 6;
+    for (; j > 0; j--) {
+      p1 = ell[0];
+      p2 = ell[nskip1];
+      dd = dee[0];
+      q1 = p1*dd;
+      q2 = p2*dd;
+      ell[0] = q1;
+      ell[nskip1] = q2;
+      m11 = p1*q1;
+      m21 = p2*q1;
+      m22 = p2*q2;
+      Z11 += m11;
+      Z21 += m21;
+      Z22 += m22;
+      ell++;
+      dee++;
+    }
+    /* solve for diagonal 2 x 2 block at A(i,i) */
+    Z11 = ell[0] - Z11;
+    Z21 = ell[nskip1] - Z21;
+    Z22 = ell[1+nskip1] - Z22;
+    dee = d + i;
+    /* factorize 2 x 2 block Z,dee */
+    /* factorize row 1 */
+    dee[0] = btRecip(Z11);
+    /* factorize row 2 */
+    sum = 0;
+    q1 = Z21;
+    q2 = q1 * dee[0];
+    Z21 = q2;
+    sum += q1*q2;
+    dee[1] = btRecip(Z22 - sum);
+    /* done factorizing 2 x 2 block */
+    ell[nskip1] = Z21;
+  }
+  /* compute the (less than 2) rows at the bottom */
+  switch (n-i) {
+    case 0:
+    break;
+    
+    case 1:
+    btSolveL1_1 (A,A+i*nskip1,i,nskip1);
+    /* scale the elements in a 1 x i block at A(i,0), and also */
+    /* compute Z = the outer product matrix that we'll need. */
+    Z11 = 0;
+    ell = A+i*nskip1;
+    dee = d;
+    for (j=i-6; j >= 0; j -= 6) {
+      p1 = ell[0];
+      dd = dee[0];
+      q1 = p1*dd;
+      ell[0] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      p1 = ell[1];
+      dd = dee[1];
+      q1 = p1*dd;
+      ell[1] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      p1 = ell[2];
+      dd = dee[2];
+      q1 = p1*dd;
+      ell[2] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      p1 = ell[3];
+      dd = dee[3];
+      q1 = p1*dd;
+      ell[3] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      p1 = ell[4];
+      dd = dee[4];
+      q1 = p1*dd;
+      ell[4] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      p1 = ell[5];
+      dd = dee[5];
+      q1 = p1*dd;
+      ell[5] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      ell += 6;
+      dee += 6;
+    }
+    /* compute left-over iterations */
+    j += 6;
+    for (; j > 0; j--) {
+      p1 = ell[0];
+      dd = dee[0];
+      q1 = p1*dd;
+      ell[0] = q1;
+      m11 = p1*q1;
+      Z11 += m11;
+      ell++;
+      dee++;
+    }
+    /* solve for diagonal 1 x 1 block at A(i,i) */
+    Z11 = ell[0] - Z11;
+    dee = d + i;
+    /* factorize 1 x 1 block Z,dee */
+    /* factorize row 1 */
+    dee[0] = btRecip(Z11);
+    /* done factorizing 1 x 1 block */
+    break;
+    
+    //default: *((char*)0)=0;  /* this should never happen! */
+  }
+}
+
+/* solve L*X=B, with B containing 1 right hand sides.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is lskip.
+ * B is an n*1 matrix that contains the right hand sides.
+ * B is stored by columns and its leading dimension is also lskip.
+ * B is overwritten with X.
+ * this processes blocks of 4*4.
+ * if this is in the factorizer source file, n must be a multiple of 4.
+ */
+
+void btSolveL1 (const btScalar *L, btScalar *B, int n, int lskip1)
+{  
+  /* declare variables - Z matrix, p and q vectors, etc */
+  btScalar Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex;
+  const btScalar *ell;
+  int lskip2,lskip3,i,j;
+  /* compute lskip values */
+  lskip2 = 2*lskip1;
+  lskip3 = 3*lskip1;
+  /* compute all 4 x 1 blocks of X */
+  for (i=0; i <= n-4; i+=4) {
+    /* compute all 4 x 1 block of X, from rows i..i+4-1 */
+    /* set the Z matrix to 0 */
+    Z11=0;
+    Z21=0;
+    Z31=0;
+    Z41=0;
+    ell = L + i*lskip1;
+    ex = B;
+    /* the inner loop that computes outer products and adds them to Z */
+    for (j=i-12; j >= 0; j -= 12) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      p2=ell[lskip1];
+      p3=ell[lskip2];
+      p4=ell[lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[1];
+      q1=ex[1];
+      p2=ell[1+lskip1];
+      p3=ell[1+lskip2];
+      p4=ell[1+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[2];
+      q1=ex[2];
+      p2=ell[2+lskip1];
+      p3=ell[2+lskip2];
+      p4=ell[2+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[3];
+      q1=ex[3];
+      p2=ell[3+lskip1];
+      p3=ell[3+lskip2];
+      p4=ell[3+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[4];
+      q1=ex[4];
+      p2=ell[4+lskip1];
+      p3=ell[4+lskip2];
+      p4=ell[4+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[5];
+      q1=ex[5];
+      p2=ell[5+lskip1];
+      p3=ell[5+lskip2];
+      p4=ell[5+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[6];
+      q1=ex[6];
+      p2=ell[6+lskip1];
+      p3=ell[6+lskip2];
+      p4=ell[6+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[7];
+      q1=ex[7];
+      p2=ell[7+lskip1];
+      p3=ell[7+lskip2];
+      p4=ell[7+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[8];
+      q1=ex[8];
+      p2=ell[8+lskip1];
+      p3=ell[8+lskip2];
+      p4=ell[8+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[9];
+      q1=ex[9];
+      p2=ell[9+lskip1];
+      p3=ell[9+lskip2];
+      p4=ell[9+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[10];
+      q1=ex[10];
+      p2=ell[10+lskip1];
+      p3=ell[10+lskip2];
+      p4=ell[10+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* load p and q values */
+      p1=ell[11];
+      q1=ex[11];
+      p2=ell[11+lskip1];
+      p3=ell[11+lskip2];
+      p4=ell[11+lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* advance pointers */
+      ell += 12;
+      ex += 12;
+      /* end of inner loop */
+    }
+    /* compute left-over iterations */
+    j += 12;
+    for (; j > 0; j--) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      p2=ell[lskip1];
+      p3=ell[lskip2];
+      p4=ell[lskip3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      Z21 += p2 * q1;
+      Z31 += p3 * q1;
+      Z41 += p4 * q1;
+      /* advance pointers */
+      ell += 1;
+      ex += 1;
+    }
+    /* finish computing the X(i) block */
+    Z11 = ex[0] - Z11;
+    ex[0] = Z11;
+    p1 = ell[lskip1];
+    Z21 = ex[1] - Z21 - p1*Z11;
+    ex[1] = Z21;
+    p1 = ell[lskip2];
+    p2 = ell[1+lskip2];
+    Z31 = ex[2] - Z31 - p1*Z11 - p2*Z21;
+    ex[2] = Z31;
+    p1 = ell[lskip3];
+    p2 = ell[1+lskip3];
+    p3 = ell[2+lskip3];
+    Z41 = ex[3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
+    ex[3] = Z41;
+    /* end of outer loop */
+  }
+  /* compute rows at end that are not a multiple of block size */
+  for (; i < n; i++) {
+    /* compute all 1 x 1 block of X, from rows i..i+1-1 */
+    /* set the Z matrix to 0 */
+    Z11=0;
+    ell = L + i*lskip1;
+    ex = B;
+    /* the inner loop that computes outer products and adds them to Z */
+    for (j=i-12; j >= 0; j -= 12) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[1];
+      q1=ex[1];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[2];
+      q1=ex[2];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[3];
+      q1=ex[3];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[4];
+      q1=ex[4];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[5];
+      q1=ex[5];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[6];
+      q1=ex[6];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[7];
+      q1=ex[7];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[8];
+      q1=ex[8];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[9];
+      q1=ex[9];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[10];
+      q1=ex[10];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* load p and q values */
+      p1=ell[11];
+      q1=ex[11];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* advance pointers */
+      ell += 12;
+      ex += 12;
+      /* end of inner loop */
+    }
+    /* compute left-over iterations */
+    j += 12;
+    for (; j > 0; j--) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      /* compute outer product and add it to the Z matrix */
+      Z11 += p1 * q1;
+      /* advance pointers */
+      ell += 1;
+      ex += 1;
+    }
+    /* finish computing the X(i) block */
+    Z11 = ex[0] - Z11;
+    ex[0] = Z11;
+  }
+}
+
+/* solve L^T * x=b, with b containing 1 right hand side.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is lskip.
+ * b is an n*1 matrix that contains the right hand side.
+ * b is overwritten with x.
+ * this processes blocks of 4.
+ */
+
+void btSolveL1T (const btScalar *L, btScalar *B, int n, int lskip1)
+{  
+  /* declare variables - Z matrix, p and q vectors, etc */
+  btScalar Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
+  const btScalar *ell;
+  int lskip2,lskip3,i,j;
+  /* special handling for L and B because we're solving L1 *transpose* */
+  L = L + (n-1)*(lskip1+1);
+  B = B + n-1;
+  lskip1 = -lskip1;
+  /* compute lskip values */
+  lskip2 = 2*lskip1;
+  lskip3 = 3*lskip1;
+  /* compute all 4 x 1 blocks of X */
+  for (i=0; i <= n-4; i+=4) {
+    /* compute all 4 x 1 block of X, from rows i..i+4-1 */
+    /* set the Z matrix to 0 */
+    Z11=0;
+    Z21=0;
+    Z31=0;
+    Z41=0;
+    ell = L - i;
+    ex = B;
+    /* the inner loop that computes outer products and adds them to Z */
+    for (j=i-4; j >= 0; j -= 4) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      p2=ell[-1];
+      p3=ell[-2];
+      p4=ell[-3];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      m21 = p2 * q1;
+      m31 = p3 * q1;
+      m41 = p4 * q1;
+      ell += lskip1;
+      Z11 += m11;
+      Z21 += m21;
+      Z31 += m31;
+      Z41 += m41;
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[-1];
+      p2=ell[-1];
+      p3=ell[-2];
+      p4=ell[-3];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      m21 = p2 * q1;
+      m31 = p3 * q1;
+      m41 = p4 * q1;
+      ell += lskip1;
+      Z11 += m11;
+      Z21 += m21;
+      Z31 += m31;
+      Z41 += m41;
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[-2];
+      p2=ell[-1];
+      p3=ell[-2];
+      p4=ell[-3];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      m21 = p2 * q1;
+      m31 = p3 * q1;
+      m41 = p4 * q1;
+      ell += lskip1;
+      Z11 += m11;
+      Z21 += m21;
+      Z31 += m31;
+      Z41 += m41;
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[-3];
+      p2=ell[-1];
+      p3=ell[-2];
+      p4=ell[-3];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      m21 = p2 * q1;
+      m31 = p3 * q1;
+      m41 = p4 * q1;
+      ell += lskip1;
+      ex -= 4;
+      Z11 += m11;
+      Z21 += m21;
+      Z31 += m31;
+      Z41 += m41;
+      /* end of inner loop */
+    }
+    /* compute left-over iterations */
+    j += 4;
+    for (; j > 0; j--) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      p2=ell[-1];
+      p3=ell[-2];
+      p4=ell[-3];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      m21 = p2 * q1;
+      m31 = p3 * q1;
+      m41 = p4 * q1;
+      ell += lskip1;
+      ex -= 1;
+      Z11 += m11;
+      Z21 += m21;
+      Z31 += m31;
+      Z41 += m41;
+    }
+    /* finish computing the X(i) block */
+    Z11 = ex[0] - Z11;
+    ex[0] = Z11;
+    p1 = ell[-1];
+    Z21 = ex[-1] - Z21 - p1*Z11;
+    ex[-1] = Z21;
+    p1 = ell[-2];
+    p2 = ell[-2+lskip1];
+    Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
+    ex[-2] = Z31;
+    p1 = ell[-3];
+    p2 = ell[-3+lskip1];
+    p3 = ell[-3+lskip2];
+    Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
+    ex[-3] = Z41;
+    /* end of outer loop */
+  }
+  /* compute rows at end that are not a multiple of block size */
+  for (; i < n; i++) {
+    /* compute all 1 x 1 block of X, from rows i..i+1-1 */
+    /* set the Z matrix to 0 */
+    Z11=0;
+    ell = L - i;
+    ex = B;
+    /* the inner loop that computes outer products and adds them to Z */
+    for (j=i-4; j >= 0; j -= 4) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      ell += lskip1;
+      Z11 += m11;
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[-1];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      ell += lskip1;
+      Z11 += m11;
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[-2];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      ell += lskip1;
+      Z11 += m11;
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[-3];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      ell += lskip1;
+      ex -= 4;
+      Z11 += m11;
+      /* end of inner loop */
+    }
+    /* compute left-over iterations */
+    j += 4;
+    for (; j > 0; j--) {
+      /* load p and q values */
+      p1=ell[0];
+      q1=ex[0];
+      /* compute outer product and add it to the Z matrix */
+      m11 = p1 * q1;
+      ell += lskip1;
+      ex -= 1;
+      Z11 += m11;
+    }
+    /* finish computing the X(i) block */
+    Z11 = ex[0] - Z11;
+    ex[0] = Z11;
+  }
+}
+
+
+
+void btVectorScale (btScalar *a, const btScalar *d, int n)
+{
+  btAssert (a && d && n >= 0);
+  for (int i=0; i<n; i++) {
+    a[i] *= d[i];
+  }
+}
+
+void btSolveLDLT (const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
+{
+  btAssert (L && d && b && n > 0 && nskip >= n);
+  btSolveL1 (L,b,n,nskip);
+  btVectorScale (b,d,n);
+  btSolveL1T (L,b,n,nskip);
+}
+
+
+
+//***************************************************************************
+
+// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
+// A is nskip. this only references and swaps the lower triangle.
+// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
+// rows will be swapped by exchanging row pointers. otherwise the data will
+// be copied.
+
+static void btSwapRowsAndCols (BTATYPE A, int n, int i1, int i2, int nskip, 
+  int do_fast_row_swaps)
+{
+  btAssert (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
+    nskip >= n && i1 < i2);
+
+# ifdef BTROWPTRS
+  btScalar *A_i1 = A[i1];
+  btScalar *A_i2 = A[i2];
+  for (int i=i1+1; i<i2; ++i) {
+    btScalar *A_i_i1 = A[i] + i1;
+    A_i1[i] = *A_i_i1;
+    *A_i_i1 = A_i2[i];
+  }
+  A_i1[i2] = A_i1[i1];
+  A_i1[i1] = A_i2[i1];
+  A_i2[i1] = A_i2[i2];
+  // swap rows, by swapping row pointers
+  if (do_fast_row_swaps) {
+    A[i1] = A_i2;
+    A[i2] = A_i1;
+  }
+  else {
+    // Only swap till i2 column to match A plain storage variant.
+    for (int k = 0; k <= i2; ++k) {
+      btScalar tmp = A_i1[k];
+      A_i1[k] = A_i2[k];
+      A_i2[k] = tmp;
+    }
+  }
+  // swap columns the hard way
+  for (int j=i2+1; j<n; ++j) {
+    btScalar *A_j = A[j];
+    btScalar tmp = A_j[i1];
+    A_j[i1] = A_j[i2];
+    A_j[i2] = tmp;
+  }
+# else
+  btScalar *A_i1 = A+i1*nskip;
+  btScalar *A_i2 = A+i2*nskip;
+  for (int k = 0; k < i1; ++k) {
+    btScalar tmp = A_i1[k];
+    A_i1[k] = A_i2[k];
+    A_i2[k] = tmp;
+  }
+  btScalar *A_i = A_i1 + nskip;
+  for (int i=i1+1; i<i2; A_i+=nskip, ++i) {
+    btScalar tmp = A_i2[i];
+    A_i2[i] = A_i[i1];
+    A_i[i1] = tmp;
+  }
+  {
+    btScalar tmp = A_i1[i1];
+    A_i1[i1] = A_i2[i2];
+    A_i2[i2] = tmp;
+  }
+  btScalar *A_j = A_i2 + nskip;
+  for (int j=i2+1; j<n; A_j+=nskip, ++j) {
+    btScalar tmp = A_j[i1];
+    A_j[i1] = A_j[i2];
+    A_j[i2] = tmp;
+  }
+# endif
+}
+
+
+// swap two indexes in the n*n LCP problem. i1 must be <= i2.
+
+static void btSwapProblem (BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo,
+                         btScalar *hi, int *p, bool *state, int *findex,
+                         int n, int i1, int i2, int nskip,
+                         int do_fast_row_swaps)
+{
+  btScalar tmpr;
+  int tmpi;
+  bool tmpb;
+  btAssert (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
+  if (i1==i2) return;
+  
+  btSwapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps);
+  
+  tmpr = x[i1];
+  x[i1] = x[i2];
+  x[i2] = tmpr;
+  
+  tmpr = b[i1];
+  b[i1] = b[i2];
+  b[i2] = tmpr;
+  
+  tmpr = w[i1];
+  w[i1] = w[i2];
+  w[i2] = tmpr;
+  
+  tmpr = lo[i1];
+  lo[i1] = lo[i2];
+  lo[i2] = tmpr;
+
+  tmpr = hi[i1];
+  hi[i1] = hi[i2];
+  hi[i2] = tmpr;
+
+  tmpi = p[i1];
+  p[i1] = p[i2];
+  p[i2] = tmpi;
+
+  tmpb = state[i1];
+  state[i1] = state[i2];
+  state[i2] = tmpb;
+
+  if (findex) {
+    tmpi = findex[i1];
+    findex[i1] = findex[i2];
+    findex[i2] = tmpi;
+  }
+}
+
+
+
+
+//***************************************************************************
+// btLCP manipulator object. this represents an n*n LCP problem.
+//
+// two index sets C and N are kept. each set holds a subset of
+// the variable indexes 0..n-1. an index can only be in one set.
+// initially both sets are empty.
+//
+// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
+
+//***************************************************************************
+// fast implementation of btLCP. see the above definition of btLCP for
+// interface comments.
+//
+// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
+// permuted as the other vectors/matrices are permuted.
+//
+// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
+// contiguous indexes. the don't-care indexes follow N.
+//
+// an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
+// added or removed from the set C the factorization is updated.
+// thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
+// the leading dimension of the matrix L is always `nskip'.
+//
+// at the start there may be other indexes that are unbounded but are not
+// included in `nub'. btLCP will permute the matrix so that absolutely all
+// unbounded vectors are at the start. thus there may be some initial
+// permutation.
+//
+// the algorithms here assume certain patterns, particularly with respect to
+// index transfer.
+
+#ifdef btLCP_FAST
+
+struct btLCP 
+{
+	const int m_n;
+	const int m_nskip;
+	int m_nub;
+	int m_nC, m_nN;				// size of each index set
+	BTATYPE const m_A;				// A rows
+	btScalar *const m_x, * const m_b, *const m_w, *const m_lo,* const m_hi;	// permuted LCP problem data
+	btScalar *const m_L, *const m_d;				// L*D*L' factorization of set C
+	btScalar *const m_Dell, *const m_ell, *const m_tmp;
+	bool *const m_state;
+	int *const m_findex, *const m_p, *const m_C;
+
+	btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
+		btScalar *_lo, btScalar *_hi, btScalar *_L, btScalar *_d,
+		btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
+		bool *_state, int *_findex, int *_p, int *_C, btScalar **Arows);
+	int getNub() const { return m_nub; }
+	void transfer_i_to_C (int i);
+	void transfer_i_to_N (int i) { m_nN++; }			// because we can assume C and N span 1:i-1
+	void transfer_i_from_N_to_C (int i);
+	void transfer_i_from_C_to_N (int i, btAlignedObjectArray<btScalar>& scratch);
+	int numC() const { return m_nC; }
+	int numN() const { return m_nN; }
+	int indexC (int i) const { return i; }
+	int indexN (int i) const { return i+m_nC; }
+	btScalar Aii (int i) const  { return BTAROW(i)[i]; }
+	btScalar AiC_times_qC (int i, btScalar *q) const { return btLargeDot (BTAROW(i), q, m_nC); }
+	btScalar AiN_times_qN (int i, btScalar *q) const { return btLargeDot (BTAROW(i)+m_nC, q+m_nC, m_nN); }
+	void pN_equals_ANC_times_qC (btScalar *p, btScalar *q);
+	void pN_plusequals_ANi (btScalar *p, int i, int sign=1);
+	void pC_plusequals_s_times_qC (btScalar *p, btScalar s, btScalar *q);
+	void pN_plusequals_s_times_qN (btScalar *p, btScalar s, btScalar *q);
+	void solve1 (btScalar *a, int i, int dir=1, int only_transfer=0);
+	void unpermute();
+};
+
+
+btLCP::btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
+            btScalar *_lo, btScalar *_hi, btScalar *_L, btScalar *_d,
+            btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
+            bool *_state, int *_findex, int *_p, int *_C, btScalar **Arows):
+  m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
+# ifdef BTROWPTRS
+  m_A(Arows),
+#else
+  m_A(_Adata),
+#endif
+  m_x(_x), m_b(_b), m_w(_w), m_lo(_lo), m_hi(_hi),
+  m_L(_L), m_d(_d), m_Dell(_Dell), m_ell(_ell), m_tmp(_tmp),
+  m_state(_state), m_findex(_findex), m_p(_p), m_C(_C)
+{
+  {
+    btSetZero (m_x,m_n);
+  }
+
+  {
+# ifdef BTROWPTRS
+    // make matrix row pointers
+    btScalar *aptr = _Adata;
+    BTATYPE A = m_A;
+    const int n = m_n, nskip = m_nskip;
+    for (int k=0; k<n; aptr+=nskip, ++k) A[k] = aptr;
+# endif
+  }
+
+  {
+    int *p = m_p;
+    const int n = m_n;
+    for (int k=0; k<n; ++k) p[k]=k;		// initially unpermuted
+  }
+
+  /*
+  // for testing, we can do some random swaps in the area i > nub
+  {
+    const int n = m_n;
+    const int nub = m_nub;
+    if (nub < n) {
+    for (int k=0; k<100; k++) {
+      int i1,i2;
+      do {
+        i1 = dRandInt(n-nub)+nub;
+        i2 = dRandInt(n-nub)+nub;
+      }
+      while (i1 > i2); 
+      //printf ("--> %d %d\n",i1,i2);
+      btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0);
+    }
+  }
+  */
+
+  // permute the problem so that *all* the unbounded variables are at the
+  // start, i.e. look for unbounded variables not included in `nub'. we can
+  // potentially push up `nub' this way and get a bigger initial factorization.
+  // note that when we swap rows/cols here we must not just swap row pointers,
+  // as the initial factorization relies on the data being all in one chunk.
+  // variables that have findex >= 0 are *not* considered to be unbounded even
+  // if lo=-inf and hi=inf - this is because these limits may change during the
+  // solution process.
+
+  {
+    int *findex = m_findex;
+    btScalar *lo = m_lo, *hi = m_hi;
+    const int n = m_n;
+    for (int k = m_nub; k<n; ++k) {
+      if (findex && findex[k] >= 0) continue;
+      if (lo[k]==-BT_INFINITY && hi[k]==BT_INFINITY) {
+        btSwapProblem (m_A,m_x,m_b,m_w,lo,hi,m_p,m_state,findex,n,m_nub,k,m_nskip,0);
+        m_nub++;
+      }
+    }
+  }
+
+  // if there are unbounded variables at the start, factorize A up to that
+  // point and solve for x. this puts all indexes 0..nub-1 into C.
+  if (m_nub > 0) {
+    const int nub = m_nub;
+    {
+      btScalar *Lrow = m_L;
+      const int nskip = m_nskip;
+      for (int j=0; j<nub; Lrow+=nskip, ++j) memcpy(Lrow,BTAROW(j),(j+1)*sizeof(btScalar));
+    }
+    btFactorLDLT (m_L,m_d,nub,m_nskip);
+    memcpy (m_x,m_b,nub*sizeof(btScalar));
+    btSolveLDLT (m_L,m_d,m_x,nub,m_nskip);
+    btSetZero (m_w,nub);
+    {
+      int *C = m_C;
+      for (int k=0; k<nub; ++k) C[k] = k;
+    }
+    m_nC = nub;
+  }
+
+  // permute the indexes > nub such that all findex variables are at the end
+  if (m_findex) {
+    const int nub = m_nub;
+    int *findex = m_findex;
+    int num_at_end = 0;
+    for (int k=m_n-1; k >= nub; k--) {
+      if (findex[k] >= 0) {
+        btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,findex,m_n,k,m_n-1-num_at_end,m_nskip,1);
+        num_at_end++;
+      }
+    }
+  }
+
+  // print info about indexes
+  /*
+  {
+    const int n = m_n;
+    const int nub = m_nub;
+    for (int k=0; k<n; k++) {
+      if (k<nub) printf ("C");
+      else if (m_lo[k]==-BT_INFINITY && m_hi[k]==BT_INFINITY) printf ("c");
+      else printf (".");
+    }
+    printf ("\n");
+  }
+  */
+}
+
+
+void btLCP::transfer_i_to_C (int i)
+{
+  {
+    if (m_nC > 0) {
+      // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
+      {
+        const int nC = m_nC;
+        btScalar *const Ltgt = m_L + nC*m_nskip, *ell = m_ell;
+        for (int j=0; j<nC; ++j) Ltgt[j] = ell[j];
+      }
+      const int nC = m_nC;
+      m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC));
+    }
+    else {
+      m_d[0] = btRecip (BTAROW(i)[i]);
+    }
+
+    btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,m_nC,i,m_nskip,1);
+
+    const int nC = m_nC;
+    m_C[nC] = nC;
+    m_nC = nC + 1; // nC value is outdated after this line
+  }
+
+}
+
+
+void btLCP::transfer_i_from_N_to_C (int i)
+{
+  {
+    if (m_nC > 0) {
+      {
+        btScalar *const aptr = BTAROW(i);
+        btScalar *Dell = m_Dell;
+        const int *C = m_C;
+#   ifdef BTNUB_OPTIMIZATIONS
+        // if nub>0, initial part of aptr unpermuted
+        const int nub = m_nub;
+        int j=0;
+        for ( ; j<nub; ++j) Dell[j] = aptr[j];
+        const int nC = m_nC;
+        for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
+#   else
+        const int nC = m_nC;
+        for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
+#   endif
+      }
+      btSolveL1 (m_L,m_Dell,m_nC,m_nskip);
+      {
+        const int nC = m_nC;
+        btScalar *const Ltgt = m_L + nC*m_nskip;
+        btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
+        for (int j=0; j<nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j];
+      }
+      const int nC = m_nC;
+      m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC));
+    }
+    else {
+      m_d[0] = btRecip (BTAROW(i)[i]);
+    }
+
+    btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,m_nC,i,m_nskip,1);
+
+    const int nC = m_nC;
+    m_C[nC] = nC;
+    m_nN--;
+    m_nC = nC + 1; // nC value is outdated after this line
+  }
+
+  // @@@ TO DO LATER
+  // if we just finish here then we'll go back and re-solve for
+  // delta_x. but actually we can be more efficient and incrementally
+  // update delta_x here. but if we do this, we wont have ell and Dell
+  // to use in updating the factorization later.
+
+}
+
+void btRemoveRowCol (btScalar *A, int n, int nskip, int r)
+{
+  btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n);
+  if (r >= n-1) return;
+  if (r > 0) {
+    {
+      const size_t move_size = (n-r-1)*sizeof(btScalar);
+      btScalar *Adst = A + r;
+      for (int i=0; i<r; Adst+=nskip,++i) {
+        btScalar *Asrc = Adst + 1;
+        memmove (Adst,Asrc,move_size);
+      }
+    }
+    {
+      const size_t cpy_size = r*sizeof(btScalar);
+      btScalar *Adst = A + r * nskip;
+      for (int i=r; i<(n-1); ++i) {
+        btScalar *Asrc = Adst + nskip;
+        memcpy (Adst,Asrc,cpy_size);
+        Adst = Asrc;
+      }
+    }
+  }
+  {
+    const size_t cpy_size = (n-r-1)*sizeof(btScalar);
+    btScalar *Adst = A + r * (nskip + 1);
+    for (int i=r; i<(n-1); ++i) {
+      btScalar *Asrc = Adst + (nskip + 1);
+      memcpy (Adst,Asrc,cpy_size);
+      Adst = Asrc - 1;
+    }
+  }
+}
+
+
+
+
+void btLDLTAddTL (btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray<btScalar>& scratch)
+{
+  btAssert (L && d && a && n > 0 && nskip >= n);
+
+  if (n < 2) return;
+  scratch.resize(2*nskip);
+  btScalar *W1 = &scratch[0];
+  
+  btScalar *W2 = W1 + nskip;
+
+  W1[0] = btScalar(0.0);
+  W2[0] = btScalar(0.0);
+  for (int j=1; j<n; ++j) {
+    W1[j] = W2[j] = (btScalar) (a[j] * SIMDSQRT12);
+  }
+  btScalar W11 = (btScalar) ((btScalar(0.5)*a[0]+1)*SIMDSQRT12);
+  btScalar W21 = (btScalar) ((btScalar(0.5)*a[0]-1)*SIMDSQRT12);
+
+  btScalar alpha1 = btScalar(1.0);
+  btScalar alpha2 = btScalar(1.0);
+
+  {
+    btScalar dee = d[0];
+    btScalar alphanew = alpha1 + (W11*W11)*dee;
+    btAssert(alphanew != btScalar(0.0));
+    dee /= alphanew;
+    btScalar gamma1 = W11 * dee;
+    dee *= alpha1;
+    alpha1 = alphanew;
+    alphanew = alpha2 - (W21*W21)*dee;
+    dee /= alphanew;
+    //btScalar gamma2 = W21 * dee;
+    alpha2 = alphanew;
+    btScalar k1 = btScalar(1.0) - W21*gamma1;
+    btScalar k2 = W21*gamma1*W11 - W21;
+    btScalar *ll = L + nskip;
+    for (int p=1; p<n; ll+=nskip, ++p) {
+      btScalar Wp = W1[p];
+      btScalar ell = *ll;
+      W1[p] =    Wp - W11*ell;
+      W2[p] = k1*Wp +  k2*ell;
+    }
+  }
+
+  btScalar *ll = L + (nskip + 1);
+  for (int j=1; j<n; ll+=nskip+1, ++j) {
+    btScalar k1 = W1[j];
+    btScalar k2 = W2[j];
+
+    btScalar dee = d[j];
+    btScalar alphanew = alpha1 + (k1*k1)*dee;
+    btAssert(alphanew != btScalar(0.0));
+    dee /= alphanew;
+    btScalar gamma1 = k1 * dee;
+    dee *= alpha1;
+    alpha1 = alphanew;
+    alphanew = alpha2 - (k2*k2)*dee;
+    dee /= alphanew;
+    btScalar gamma2 = k2 * dee;
+    dee *= alpha2;
+    d[j] = dee;
+    alpha2 = alphanew;
+
+    btScalar *l = ll + nskip;
+    for (int p=j+1; p<n; l+=nskip, ++p) {
+      btScalar ell = *l;
+      btScalar Wp = W1[p] - k1 * ell;
+      ell += gamma1 * Wp;
+      W1[p] = Wp;
+      Wp = W2[p] - k2 * ell;
+      ell -= gamma2 * Wp;
+      W2[p] = Wp;
+      *l = ell;
+    }
+  }
+}
+
+
+#define _BTGETA(i,j) (A[i][j])
+//#define _GETA(i,j) (A[(i)*nskip+(j)])
+#define BTGETA(i,j) ((i > j) ? _BTGETA(i,j) : _BTGETA(j,i))
+
+inline size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
+{
+  return nskip * 2 * sizeof(btScalar);
+}
+
+
+void btLDLTRemove (btScalar **A, const int *p, btScalar *L, btScalar *d,
+    int n1, int n2, int r, int nskip, btAlignedObjectArray<btScalar>& scratch)
+{
+  btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 &&
+	   n1 >= n2 && nskip >= n1);
+  #ifdef BT_DEBUG
+	for (int i=0; i<n2; ++i) 
+		btAssert(p[i] >= 0 && p[i] < n1);
+  #endif
+
+  if (r==n2-1) {
+    return;		// deleting last row/col is easy
+  }
+  else {
+    size_t LDLTAddTL_size = btEstimateLDLTAddTLTmpbufSize(nskip);
+    btAssert(LDLTAddTL_size % sizeof(btScalar) == 0);
+	scratch.resize(nskip * 2+n2);
+    btScalar *tmp = &scratch[0];
+    if (r==0) {
+      btScalar *a = (btScalar *)((char *)tmp + LDLTAddTL_size);
+      const int p_0 = p[0];
+      for (int i=0; i<n2; ++i) {
+        a[i] = -BTGETA(p[i],p_0);
+      }
+      a[0] += btScalar(1.0);
+      btLDLTAddTL (L,d,a,n2,nskip,scratch);
+    }
+    else {
+      btScalar *t = (btScalar *)((char *)tmp + LDLTAddTL_size);
+      {
+        btScalar *Lcurr = L + r*nskip;
+        for (int i=0; i<r; ++Lcurr, ++i) {
+          btAssert(d[i] != btScalar(0.0));
+          t[i] = *Lcurr / d[i];
+        }
+      }
+      btScalar *a = t + r;
+      {
+        btScalar *Lcurr = L + r*nskip;
+        const int *pp_r = p + r, p_r = *pp_r;
+        const int n2_minus_r = n2-r;
+        for (int i=0; i<n2_minus_r; Lcurr+=nskip,++i) {
+          a[i] = btLargeDot(Lcurr,t,r) - BTGETA(pp_r[i],p_r);
+        }
+      }
+      a[0] += btScalar(1.0);
+      btLDLTAddTL (L + r*nskip+r, d+r, a, n2-r, nskip, scratch);
+    }
+  }
+
+  // snip out row/column r from L and d
+  btRemoveRowCol (L,n2,nskip,r);
+  if (r < (n2-1)) memmove (d+r,d+r+1,(n2-r-1)*sizeof(btScalar));
+}
+
+
+void btLCP::transfer_i_from_C_to_N (int i, btAlignedObjectArray<btScalar>& scratch)
+{
+  {
+    int *C = m_C;
+    // remove a row/column from the factorization, and adjust the
+    // indexes (black magic!)
+    int last_idx = -1;
+    const int nC = m_nC;
+    int j = 0;
+    for ( ; j<nC; ++j) {
+      if (C[j]==nC-1) {
+        last_idx = j;
+      }
+      if (C[j]==i) {
+        btLDLTRemove (m_A,C,m_L,m_d,m_n,nC,j,m_nskip,scratch);
+        int k;
+        if (last_idx == -1) {
+          for (k=j+1 ; k<nC; ++k) {
+            if (C[k]==nC-1) {
+              break;
+            }
+          }
+          btAssert (k < nC);
+        }
+        else {
+          k = last_idx;
+        }
+        C[k] = C[j];
+        if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int));
+        break;
+      }
+    }
+    btAssert (j < nC);
+
+    btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,i,nC-1,m_nskip,1);
+
+    m_nN++;
+    m_nC = nC - 1; // nC value is outdated after this line
+  }
+
+}
+
+
+void btLCP::pN_equals_ANC_times_qC (btScalar *p, btScalar *q)
+{
+  // we could try to make this matrix-vector multiplication faster using
+  // outer product matrix tricks, e.g. with the dMultidotX() functions.
+  // but i tried it and it actually made things slower on random 100x100
+  // problems because of the overhead involved. so we'll stick with the
+  // simple method for now.
+  const int nC = m_nC;
+  btScalar *ptgt = p + nC;
+  const int nN = m_nN;
+  for (int i=0; i<nN; ++i) {
+    ptgt[i] = btLargeDot (BTAROW(i+nC),q,nC);
+  }
+}
+
+
+void btLCP::pN_plusequals_ANi (btScalar *p, int i, int sign)
+{
+  const int nC = m_nC;
+  btScalar *aptr = BTAROW(i) + nC;
+  btScalar *ptgt = p + nC;
+  if (sign > 0) {
+    const int nN = m_nN;
+    for (int j=0; j<nN; ++j) ptgt[j] += aptr[j];
+  }
+  else {
+    const int nN = m_nN;
+    for (int j=0; j<nN; ++j) ptgt[j] -= aptr[j];
+  }
+}
+
+void btLCP::pC_plusequals_s_times_qC (btScalar *p, btScalar s, btScalar *q)
+{
+  const int nC = m_nC;
+  for (int i=0; i<nC; ++i) {
+    p[i] += s*q[i];
+  }
+}
+
+void btLCP::pN_plusequals_s_times_qN (btScalar *p, btScalar s, btScalar *q)
+{
+  const int nC = m_nC;
+  btScalar *ptgt = p + nC, *qsrc = q + nC;
+  const int nN = m_nN;
+  for (int i=0; i<nN; ++i) {
+    ptgt[i] += s*qsrc[i];
+  }
+}
+
+void btLCP::solve1 (btScalar *a, int i, int dir, int only_transfer)
+{
+  // the `Dell' and `ell' that are computed here are saved. if index i is
+  // later added to the factorization then they can be reused.
+  //
+  // @@@ question: do we need to solve for entire delta_x??? yes, but
+  //     only if an x goes below 0 during the step.
+
+  if (m_nC > 0) {
+    {
+      btScalar *Dell = m_Dell;
+      int *C = m_C;
+      btScalar *aptr = BTAROW(i);
+#   ifdef BTNUB_OPTIMIZATIONS
+      // if nub>0, initial part of aptr[] is guaranteed unpermuted
+      const int nub = m_nub;
+      int j=0;
+      for ( ; j<nub; ++j) Dell[j] = aptr[j];
+      const int nC = m_nC;
+      for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
+#   else
+      const int nC = m_nC;
+      for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
+#   endif
+    }
+    btSolveL1 (m_L,m_Dell,m_nC,m_nskip);
+    {
+      btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
+      const int nC = m_nC;
+      for (int j=0; j<nC; ++j) ell[j] = Dell[j] * d[j];
+    }
+
+    if (!only_transfer) {
+      btScalar *tmp = m_tmp, *ell = m_ell;
+      {
+        const int nC = m_nC;
+        for (int j=0; j<nC; ++j) tmp[j] = ell[j];
+      }
+      btSolveL1T (m_L,tmp,m_nC,m_nskip);
+      if (dir > 0) {
+        int *C = m_C;
+        btScalar *tmp = m_tmp;
+        const int nC = m_nC;
+        for (int j=0; j<nC; ++j) a[C[j]] = -tmp[j];
+      } else {
+        int *C = m_C;
+        btScalar *tmp = m_tmp;
+        const int nC = m_nC;
+        for (int j=0; j<nC; ++j) a[C[j]] = tmp[j];
+      }
+    }
+  }
+}
+
+
+void btLCP::unpermute()
+{
+  // now we have to un-permute x and w
+  {
+    memcpy (m_tmp,m_x,m_n*sizeof(btScalar));
+    btScalar *x = m_x, *tmp = m_tmp;
+    const int *p = m_p;
+    const int n = m_n;
+    for (int j=0; j<n; ++j) x[p[j]] = tmp[j];
+  }
+  {
+    memcpy (m_tmp,m_w,m_n*sizeof(btScalar));
+    btScalar *w = m_w, *tmp = m_tmp;
+    const int *p = m_p;
+    const int n = m_n;
+    for (int j=0; j<n; ++j) w[p[j]] = tmp[j];
+  }
+}
+
+#endif // btLCP_FAST
+
+
+//***************************************************************************
+// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
+
+bool btSolveDantzigLCP (int n, btScalar *A, btScalar *x, btScalar *b,
+                btScalar* outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory& scratchMem)
+{
+	s_error = false;
+
+//	printf("btSolveDantzigLCP n=%d\n",n);
+  btAssert (n>0 && A && x && b && lo && hi && nub >= 0 && nub <= n);
+  btAssert(outer_w);
+
+#ifdef BT_DEBUG
+  {
+    // check restrictions on lo and hi
+    for (int k=0; k<n; ++k) 
+		btAssert (lo[k] <= 0 && hi[k] >= 0);
+  }
+# endif
+
+
+  // if all the variables are unbounded then we can just factor, solve,
+  // and return
+  if (nub >= n) 
+  {
+   
+
+    int nskip = (n);
+    btFactorLDLT (A, outer_w, n, nskip);
+    btSolveLDLT (A, outer_w, b, n, nskip);
+    memcpy (x, b, n*sizeof(btScalar));
+
+    return !s_error;
+  }
+
+  const int nskip = (n);
+  scratchMem.L.resize(n*nskip);
+
+  scratchMem.d.resize(n);
+
+  btScalar *w = outer_w;
+  scratchMem.delta_w.resize(n);
+  scratchMem.delta_x.resize(n);
+  scratchMem.Dell.resize(n);
+  scratchMem.ell.resize(n);
+  scratchMem.Arows.resize(n);
+  scratchMem.p.resize(n);
+  scratchMem.C.resize(n);
+
+  // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
+  scratchMem.state.resize(n);
+
+
+  // create LCP object. note that tmp is set to delta_w to save space, this
+  // optimization relies on knowledge of how tmp is used, so be careful!
+  btLCP lcp(n,nskip,nub,A,x,b,w,lo,hi,&scratchMem.L[0],&scratchMem.d[0],&scratchMem.Dell[0],&scratchMem.ell[0],&scratchMem.delta_w[0],&scratchMem.state[0],findex,&scratchMem.p[0],&scratchMem.C[0],&scratchMem.Arows[0]);
+  int adj_nub = lcp.getNub();
+
+  // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
+  // LCP conditions then i is added to the appropriate index set. otherwise
+  // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
+  // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
+  // while driving x(i) we maintain the LCP conditions on the other variables
+  // 0..i-1. we do this by watching out for other x(i),w(i) values going
+  // outside the valid region, and then switching them between index sets
+  // when that happens.
+
+  bool hit_first_friction_index = false;
+  for (int i=adj_nub; i<n; ++i) 
+  {
+    s_error = false;
+    // the index i is the driving index and indexes i+1..n-1 are "dont care",
+    // i.e. when we make changes to the system those x's will be zero and we
+    // don't care what happens to those w's. in other words, we only consider
+    // an (i+1)*(i+1) sub-problem of A*x=b+w.
+
+    // if we've hit the first friction index, we have to compute the lo and
+    // hi values based on the values of x already computed. we have been
+    // permuting the indexes, so the values stored in the findex vector are
+    // no longer valid. thus we have to temporarily unpermute the x vector. 
+    // for the purposes of this computation, 0*infinity = 0 ... so if the
+    // contact constraint's normal force is 0, there should be no tangential
+    // force applied.
+
+    if (!hit_first_friction_index && findex && findex[i] >= 0) {
+      // un-permute x into delta_w, which is not being used at the moment
+      for (int j=0; j<n; ++j) scratchMem.delta_w[scratchMem.p[j]] = x[j];
+
+      // set lo and hi values
+      for (int k=i; k<n; ++k) {
+        btScalar wfk = scratchMem.delta_w[findex[k]];
+        if (wfk == 0) {
+          hi[k] = 0;
+          lo[k] = 0;
+        }
+        else {
+          hi[k] = btFabs (hi[k] * wfk);
+          lo[k] = -hi[k];
+        }
+      }
+      hit_first_friction_index = true;
+    }
+
+    // thus far we have not even been computing the w values for indexes
+    // greater than i, so compute w[i] now.
+    w[i] = lcp.AiC_times_qC (i,x) + lcp.AiN_times_qN (i,x) - b[i];
+
+    // if lo=hi=0 (which can happen for tangential friction when normals are
+    // 0) then the index will be assigned to set N with some state. however,
+    // set C's line has zero size, so the index will always remain in set N.
+    // with the "normal" switching logic, if w changed sign then the index
+    // would have to switch to set C and then back to set N with an inverted
+    // state. this is pointless, and also computationally expensive. to
+    // prevent this from happening, we use the rule that indexes with lo=hi=0
+    // will never be checked for set changes. this means that the state for
+    // these indexes may be incorrect, but that doesn't matter.
+
+    // see if x(i),w(i) is in a valid region
+    if (lo[i]==0 && w[i] >= 0) {
+      lcp.transfer_i_to_N (i);
+      scratchMem.state[i] = false;
+    }
+    else if (hi[i]==0 && w[i] <= 0) {
+      lcp.transfer_i_to_N (i);
+      scratchMem.state[i] = true;
+    }
+    else if (w[i]==0) {
+      // this is a degenerate case. by the time we get to this test we know
+      // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
+      // and similarly that hi > 0. this means that the line segment
+      // corresponding to set C is at least finite in extent, and we are on it.
+      // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
+      lcp.solve1 (&scratchMem.delta_x[0],i,0,1);
+
+      lcp.transfer_i_to_C (i);
+    }
+    else {
+      // we must push x(i) and w(i)
+      for (;;) {
+        int dir;
+        btScalar dirf;
+        // find direction to push on x(i)
+        if (w[i] <= 0) {
+          dir = 1;
+          dirf = btScalar(1.0);
+        }
+        else {
+          dir = -1;
+          dirf = btScalar(-1.0);
+        }
+
+        // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
+        lcp.solve1 (&scratchMem.delta_x[0],i,dir);
+
+        // note that delta_x[i] = dirf, but we wont bother to set it
+
+        // compute: delta_w = A*delta_x ... note we only care about
+        // delta_w(N) and delta_w(i), the rest is ignored
+        lcp.pN_equals_ANC_times_qC (&scratchMem.delta_w[0],&scratchMem.delta_x[0]);
+        lcp.pN_plusequals_ANi (&scratchMem.delta_w[0],i,dir);
+        scratchMem.delta_w[i] = lcp.AiC_times_qC (i,&scratchMem.delta_x[0]) + lcp.Aii(i)*dirf;
+
+        // find largest step we can take (size=s), either to drive x(i),w(i)
+        // to the valid LCP region or to drive an already-valid variable
+        // outside the valid region.
+
+        int cmd = 1;		// index switching command
+        int si = 0;		// si = index to switch if cmd>3
+        btScalar s = -w[i]/scratchMem.delta_w[i];
+        if (dir > 0) {
+          if (hi[i] < BT_INFINITY) {
+            btScalar s2 = (hi[i]-x[i])*dirf;	// was (hi[i]-x[i])/dirf	// step to x(i)=hi(i)
+            if (s2 < s) {
+              s = s2;
+              cmd = 3;
+            }
+          }
+        }
+        else {
+          if (lo[i] > -BT_INFINITY) {
+            btScalar s2 = (lo[i]-x[i])*dirf;	// was (lo[i]-x[i])/dirf	// step to x(i)=lo(i)
+            if (s2 < s) {
+              s = s2;
+              cmd = 2;
+            }
+          }
+        }
+
+        {
+          const int numN = lcp.numN();
+          for (int k=0; k < numN; ++k) {
+            const int indexN_k = lcp.indexN(k);
+            if (!scratchMem.state[indexN_k] ? scratchMem.delta_w[indexN_k] < 0 : scratchMem.delta_w[indexN_k] > 0) {
+                // don't bother checking if lo=hi=0
+                if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue;
+                btScalar s2 = -w[indexN_k] / scratchMem.delta_w[indexN_k];
+                if (s2 < s) {
+                  s = s2;
+                  cmd = 4;
+                  si = indexN_k;
+                }
+            }
+          }
+        }
+
+        {
+          const int numC = lcp.numC();
+          for (int k=adj_nub; k < numC; ++k) {
+            const int indexC_k = lcp.indexC(k);
+            if (scratchMem.delta_x[indexC_k] < 0 && lo[indexC_k] > -BT_INFINITY) {
+              btScalar s2 = (lo[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k];
+              if (s2 < s) {
+                s = s2;
+                cmd = 5;
+                si = indexC_k;
+              }
+            }
+            if (scratchMem.delta_x[indexC_k] > 0 && hi[indexC_k] < BT_INFINITY) {
+              btScalar s2 = (hi[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k];
+              if (s2 < s) {
+                s = s2;
+                cmd = 6;
+                si = indexC_k;
+              }
+            }
+          }
+        }
+
+        //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
+        //			     "C->NL","C->NH"};
+        //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
+
+        // if s <= 0 then we've got a problem. if we just keep going then
+        // we're going to get stuck in an infinite loop. instead, just cross
+        // our fingers and exit with the current solution.
+        if (s <= btScalar(0.0)) 
+		{
+//          printf("LCP internal error, s <= 0 (s=%.4e)",(double)s);
+          if (i < n) {
+            btSetZero (x+i,n-i);
+            btSetZero (w+i,n-i);
+          }
+          s_error = true;
+          break;
+        }
+
+        // apply x = x + s * delta_x
+        lcp.pC_plusequals_s_times_qC (x, s, &scratchMem.delta_x[0]);
+        x[i] += s * dirf;
+
+        // apply w = w + s * delta_w
+        lcp.pN_plusequals_s_times_qN (w, s, &scratchMem.delta_w[0]);
+        w[i] += s * scratchMem.delta_w[i];
+
+//        void *tmpbuf;
+        // switch indexes between sets if necessary
+        switch (cmd) {
+        case 1:		// done
+          w[i] = 0;
+          lcp.transfer_i_to_C (i);
+          break;
+        case 2:		// done
+          x[i] = lo[i];
+          scratchMem.state[i] = false;
+          lcp.transfer_i_to_N (i);
+          break;
+        case 3:		// done
+          x[i] = hi[i];
+          scratchMem.state[i] = true;
+          lcp.transfer_i_to_N (i);
+          break;
+        case 4:		// keep going
+          w[si] = 0;
+          lcp.transfer_i_from_N_to_C (si);
+          break;
+        case 5:		// keep going
+          x[si] = lo[si];
+          scratchMem.state[si] = false;
+		  lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch);
+          break;
+        case 6:		// keep going
+          x[si] = hi[si];
+          scratchMem.state[si] = true;
+          lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch);
+          break;
+        }
+
+        if (cmd <= 3) break;
+      } // for (;;)
+    } // else
+
+    if (s_error) 
+	{
+      break;
+    }
+  } // for (int i=adj_nub; i<n; ++i)
+
+  lcp.unpermute();
+
+
+  return !s_error;
+}
+

+ 77 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btDantzigLCP.h

@@ -0,0 +1,77 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: [email protected]   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of                                          * 
+ *   The BSD-style license that is included with this library in         *
+ *   the file LICENSE-BSD.TXT.                                           *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+given (A,b,lo,hi), solve the LCP problem: A*x = b+w, where each x(i),w(i)
+satisfies one of
+	(1) x = lo, w >= 0
+	(2) x = hi, w <= 0
+	(3) lo < x < hi, w = 0
+A is a matrix of dimension n*n, everything else is a vector of size n*1.
+lo and hi can be +/- dInfinity as needed. the first `nub' variables are
+unbounded, i.e. hi and lo are assumed to be +/- dInfinity.
+
+we restrict lo(i) <= 0 and hi(i) >= 0.
+
+the original data (A,b) may be modified by this function.
+
+if the `findex' (friction index) parameter is nonzero, it points to an array
+of index values. in this case constraints that have findex[i] >= 0 are
+special. all non-special constraints are solved for, then the lo and hi values
+for the special constraints are set:
+  hi[i] = abs( hi[i] * x[findex[i]] )
+  lo[i] = -hi[i]
+and the solution continues. this mechanism allows a friction approximation
+to be implemented. the first `nub' variables are assumed to have findex < 0.
+
+*/
+
+
+#ifndef _BT_LCP_H_
+#define _BT_LCP_H_
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+
+
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+struct btDantzigScratchMemory
+{
+	btAlignedObjectArray<btScalar> m_scratch;
+	btAlignedObjectArray<btScalar> L;
+	btAlignedObjectArray<btScalar> d;
+	btAlignedObjectArray<btScalar> delta_w;
+	btAlignedObjectArray<btScalar> delta_x;
+	btAlignedObjectArray<btScalar> Dell;
+	btAlignedObjectArray<btScalar> ell;
+	btAlignedObjectArray<btScalar*> Arows;
+	btAlignedObjectArray<int> p;
+	btAlignedObjectArray<int> C;
+	btAlignedObjectArray<bool> state;
+};
+
+//return false if solving failed
+bool btSolveDantzigLCP (int n, btScalar *A, btScalar *x, btScalar *b, btScalar *w,
+	int nub, btScalar *lo, btScalar *hi, int *findex,btDantzigScratchMemory& scratch);
+
+
+
+#endif //_BT_LCP_H_

+ 112 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btDantzigSolver.h

@@ -0,0 +1,112 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+#ifndef BT_DANTZIG_SOLVER_H
+#define BT_DANTZIG_SOLVER_H
+
+#include "btMLCPSolverInterface.h"
+#include "btDantzigLCP.h"
+
+
+class btDantzigSolver : public btMLCPSolverInterface
+{
+protected:
+
+	btScalar m_acceptableUpperLimitSolution;
+
+	btAlignedObjectArray<char>	m_tempBuffer;
+
+	btAlignedObjectArray<btScalar> m_A;
+	btAlignedObjectArray<btScalar> m_b;
+	btAlignedObjectArray<btScalar> m_x;
+	btAlignedObjectArray<btScalar> m_lo;
+	btAlignedObjectArray<btScalar> m_hi;
+	btAlignedObjectArray<int>	m_dependencies;
+	btDantzigScratchMemory m_scratchMemory;
+public:
+
+	btDantzigSolver()
+		:m_acceptableUpperLimitSolution(btScalar(1000))
+	{
+	}
+
+	virtual bool solveMLCP(const btMatrixXu & A, const btVectorXu & b, btVectorXu& x, const btVectorXu & lo,const btVectorXu & hi,const btAlignedObjectArray<int>& limitDependency, int numIterations, bool useSparsity = true)
+	{
+		bool result = true;
+		int n = b.rows();
+		if (n)
+		{
+			int nub = 0;
+			btAlignedObjectArray<btScalar> ww;
+			ww.resize(n);
+	
+
+			const btScalar* Aptr = A.getBufferPointer();
+			m_A.resize(n*n);
+			for (int i=0;i<n*n;i++)
+			{
+				m_A[i] = Aptr[i];
+
+			}
+
+			m_b.resize(n);
+			m_x.resize(n);
+			m_lo.resize(n);
+			m_hi.resize(n);
+			m_dependencies.resize(n);
+			for (int i=0;i<n;i++)
+			{
+				m_lo[i] = lo[i];
+				m_hi[i] = hi[i];
+				m_b[i] = b[i];
+				m_x[i] = x[i];
+				m_dependencies[i] = limitDependency[i];
+			}
+
+
+			result = btSolveDantzigLCP (n,&m_A[0],&m_x[0],&m_b[0],&ww[0],nub,&m_lo[0],&m_hi[0],&m_dependencies[0],m_scratchMemory);
+			if (!result)
+				return result;
+
+//			printf("numAllocas = %d\n",numAllocas);
+			for (int i=0;i<n;i++)
+			{
+				volatile btScalar xx = m_x[i];
+				if (xx != m_x[i])
+					return false;
+				if (x[i] >= m_acceptableUpperLimitSolution)
+				{
+					return false;
+				}
+
+				if (x[i] <= -m_acceptableUpperLimitSolution)
+				{
+					return false;
+				}
+			}
+
+			for (int i=0;i<n;i++)
+			{
+				x[i] = m_x[i];
+			}
+			
+		}
+
+		return result;
+	}
+};
+
+#endif //BT_DANTZIG_SOLVER_H

+ 626 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btMLCPSolver.cpp

@@ -0,0 +1,626 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+#include "btMLCPSolver.h"
+#include "LinearMath/btMatrixX.h"
+#include "LinearMath/btQuickprof.h"
+#include "btSolveProjectedGaussSeidel.h"
+
+btMLCPSolver::btMLCPSolver(	 btMLCPSolverInterface* solver)
+:m_solver(solver),
+m_fallback(0)
+{
+}
+
+btMLCPSolver::~btMLCPSolver()
+{
+}
+
+bool gUseMatrixMultiply = false;
+bool interleaveContactAndFriction = false;
+
+btScalar btMLCPSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodiesUnUsed, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
+{
+	btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup( bodies, numBodiesUnUsed, manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer);
+
+	{
+		BT_PROFILE("gather constraint data");
+
+		int numFrictionPerContact = m_tmpSolverContactConstraintPool.size()==m_tmpSolverContactFrictionConstraintPool.size()? 1 : 2;
+
+
+		int numBodies = m_tmpSolverBodyPool.size();
+		m_allConstraintArray.resize(0);
+		m_limitDependencies.resize(m_tmpSolverNonContactConstraintPool.size()+m_tmpSolverContactConstraintPool.size()+m_tmpSolverContactFrictionConstraintPool.size());
+		btAssert(m_limitDependencies.size() == m_tmpSolverNonContactConstraintPool.size()+m_tmpSolverContactConstraintPool.size()+m_tmpSolverContactFrictionConstraintPool.size());
+	//	printf("m_limitDependencies.size() = %d\n",m_limitDependencies.size());
+
+		int dindex = 0;
+		for (int i=0;i<m_tmpSolverNonContactConstraintPool.size();i++)
+		{
+			m_allConstraintArray.push_back(m_tmpSolverNonContactConstraintPool[i]);
+			m_limitDependencies[dindex++] = -1;
+		}
+ 
+		///The btSequentialImpulseConstraintSolver moves all friction constraints at the very end, we can also interleave them instead
+		
+		int firstContactConstraintOffset=dindex;
+
+		if (interleaveContactAndFriction)
+		{
+			for (int i=0;i<m_tmpSolverContactConstraintPool.size();i++)
+			{
+				m_allConstraintArray.push_back(m_tmpSolverContactConstraintPool[i]);
+				m_limitDependencies[dindex++] = -1;
+				m_allConstraintArray.push_back(m_tmpSolverContactFrictionConstraintPool[i*numFrictionPerContact]);
+				int findex = (m_tmpSolverContactFrictionConstraintPool[i*numFrictionPerContact].m_frictionIndex*(1+numFrictionPerContact));
+				m_limitDependencies[dindex++] = findex +firstContactConstraintOffset;
+				if (numFrictionPerContact==2)
+				{
+					m_allConstraintArray.push_back(m_tmpSolverContactFrictionConstraintPool[i*numFrictionPerContact+1]);
+					m_limitDependencies[dindex++] = findex+firstContactConstraintOffset;
+				}
+			}
+		} else
+		{
+			for (int i=0;i<m_tmpSolverContactConstraintPool.size();i++)
+			{
+				m_allConstraintArray.push_back(m_tmpSolverContactConstraintPool[i]);
+				m_limitDependencies[dindex++] = -1;
+			}
+			for (int i=0;i<m_tmpSolverContactFrictionConstraintPool.size();i++)
+			{
+				m_allConstraintArray.push_back(m_tmpSolverContactFrictionConstraintPool[i]);
+				m_limitDependencies[dindex++] = m_tmpSolverContactFrictionConstraintPool[i].m_frictionIndex+firstContactConstraintOffset;
+			}
+			
+		}
+
+
+		if (!m_allConstraintArray.size())
+		{
+			m_A.resize(0,0);
+			m_b.resize(0);
+			m_x.resize(0);
+			m_lo.resize(0);
+			m_hi.resize(0);
+			return 0.f;
+		}
+	}
+
+	
+	if (gUseMatrixMultiply)
+	{
+		BT_PROFILE("createMLCP");
+		createMLCP(infoGlobal);
+	}
+	else
+	{
+		BT_PROFILE("createMLCPFast");
+		createMLCPFast(infoGlobal);
+	}
+
+	return 0.f;
+}
+
+bool btMLCPSolver::solveMLCP(const btContactSolverInfo& infoGlobal)
+{
+	bool result = true;
+
+	if (m_A.rows()==0)
+		return true;
+
+	//if using split impulse, we solve 2 separate (M)LCPs
+	if (infoGlobal.m_splitImpulse)
+	{
+		btMatrixXu Acopy = m_A;
+		btAlignedObjectArray<int> limitDependenciesCopy = m_limitDependencies;
+//		printf("solve first LCP\n");
+		result = m_solver->solveMLCP(m_A, m_b, m_x, m_lo,m_hi, m_limitDependencies,infoGlobal.m_numIterations );
+		if (result)
+			result = m_solver->solveMLCP(Acopy, m_bSplit, m_xSplit, m_lo,m_hi, limitDependenciesCopy,infoGlobal.m_numIterations );
+
+	} else
+	{
+		result = m_solver->solveMLCP(m_A, m_b, m_x, m_lo,m_hi, m_limitDependencies,infoGlobal.m_numIterations );
+	}
+	return result;
+}
+
+struct btJointNode
+{
+	int jointIndex;     // pointer to enclosing dxJoint object
+	int otherBodyIndex;       // *other* body this joint is connected to
+	int nextJointNodeIndex;//-1 for null
+	int constraintRowIndex;
+};
+
+
+
+void btMLCPSolver::createMLCPFast(const btContactSolverInfo& infoGlobal)
+{
+	int numContactRows = interleaveContactAndFriction ? 3 : 1;
+
+	int numConstraintRows = m_allConstraintArray.size();
+	int n = numConstraintRows;
+	{
+		BT_PROFILE("init b (rhs)");
+		m_b.resize(numConstraintRows);
+		m_bSplit.resize(numConstraintRows);
+		//m_b.setZero();
+		for (int i=0;i<numConstraintRows ;i++)
+		{
+			if (m_allConstraintArray[i].m_jacDiagABInv)
+			{
+				m_b[i]=m_allConstraintArray[i].m_rhs/m_allConstraintArray[i].m_jacDiagABInv;
+				m_bSplit[i] = m_allConstraintArray[i].m_rhsPenetration/m_allConstraintArray[i].m_jacDiagABInv;
+			}
+
+		}
+	}
+
+	btScalar* w = 0;
+	int nub = 0;
+
+	m_lo.resize(numConstraintRows);
+	m_hi.resize(numConstraintRows);
+ 
+	{
+		BT_PROFILE("init lo/ho");
+
+		for (int i=0;i<numConstraintRows;i++)
+		{
+			if (0)//m_limitDependencies[i]>=0)
+			{
+				m_lo[i] = -BT_INFINITY;
+				m_hi[i] = BT_INFINITY;
+			} else
+			{
+				m_lo[i] = m_allConstraintArray[i].m_lowerLimit;
+				m_hi[i] = m_allConstraintArray[i].m_upperLimit;
+			}
+		}
+	}
+
+	//
+	int m=m_allConstraintArray.size();
+
+	int numBodies = m_tmpSolverBodyPool.size();
+	btAlignedObjectArray<int> bodyJointNodeArray;
+	{
+		BT_PROFILE("bodyJointNodeArray.resize");
+		bodyJointNodeArray.resize(numBodies,-1);
+	}
+	btAlignedObjectArray<btJointNode> jointNodeArray;
+	{
+		BT_PROFILE("jointNodeArray.reserve");
+		jointNodeArray.reserve(2*m_allConstraintArray.size());
+	}
+
+	static btMatrixXu J3;
+	{
+		BT_PROFILE("J3.resize");
+		J3.resize(2*m,8);
+	}
+	static btMatrixXu JinvM3;
+	{
+		BT_PROFILE("JinvM3.resize/setZero");
+
+		JinvM3.resize(2*m,8);
+		JinvM3.setZero();
+		J3.setZero();
+	}
+	int cur=0;
+	int rowOffset = 0;
+	static btAlignedObjectArray<int> ofs;
+	{
+		BT_PROFILE("ofs resize");
+		ofs.resize(0);
+		ofs.resizeNoInitialize(m_allConstraintArray.size());
+	}				
+	{
+		BT_PROFILE("Compute J and JinvM");
+		int c=0;
+
+		int numRows = 0;
+
+		for (int i=0;i<m_allConstraintArray.size();i+=numRows,c++)
+		{
+			ofs[c] = rowOffset;
+			int sbA = m_allConstraintArray[i].m_solverBodyIdA;
+			int sbB = m_allConstraintArray[i].m_solverBodyIdB;
+			btRigidBody* orgBodyA = m_tmpSolverBodyPool[sbA].m_originalBody;
+			btRigidBody* orgBodyB = m_tmpSolverBodyPool[sbB].m_originalBody;
+
+			numRows = i<m_tmpSolverNonContactConstraintPool.size() ? m_tmpConstraintSizesPool[c].m_numConstraintRows : numContactRows ;
+			if (orgBodyA)
+			{
+				{
+					int slotA=-1;
+					//find free jointNode slot for sbA
+					slotA =jointNodeArray.size();
+					jointNodeArray.expand();//NonInitializing();
+					int prevSlot = bodyJointNodeArray[sbA];
+					bodyJointNodeArray[sbA] = slotA;
+					jointNodeArray[slotA].nextJointNodeIndex = prevSlot;
+					jointNodeArray[slotA].jointIndex = c;
+					jointNodeArray[slotA].constraintRowIndex = i;
+					jointNodeArray[slotA].otherBodyIndex = orgBodyB ? sbB : -1;
+				}
+				for (int row=0;row<numRows;row++,cur++)
+				{
+					btVector3 normalInvMass =				m_allConstraintArray[i+row].m_contactNormal1 *		orgBodyA->getInvMass();
+					btVector3 relPosCrossNormalInvInertia = m_allConstraintArray[i+row].m_relpos1CrossNormal *	orgBodyA->getInvInertiaTensorWorld();
+
+					for (int r=0;r<3;r++)
+					{
+						J3.setElem(cur,r,m_allConstraintArray[i+row].m_contactNormal1[r]);
+						J3.setElem(cur,r+4,m_allConstraintArray[i+row].m_relpos1CrossNormal[r]);
+						JinvM3.setElem(cur,r,normalInvMass[r]);
+						JinvM3.setElem(cur,r+4,relPosCrossNormalInvInertia[r]);
+					}
+					J3.setElem(cur,3,0);
+					JinvM3.setElem(cur,3,0);
+					J3.setElem(cur,7,0);
+					JinvM3.setElem(cur,7,0);
+				}
+			} else
+			{
+				cur += numRows;
+			}
+			if (orgBodyB)
+			{
+
+				{
+					int slotB=-1;
+					//find free jointNode slot for sbA
+					slotB =jointNodeArray.size();
+					jointNodeArray.expand();//NonInitializing();
+					int prevSlot = bodyJointNodeArray[sbB];
+					bodyJointNodeArray[sbB] = slotB;
+					jointNodeArray[slotB].nextJointNodeIndex = prevSlot;
+					jointNodeArray[slotB].jointIndex = c;
+					jointNodeArray[slotB].otherBodyIndex = orgBodyA ? sbA : -1;
+					jointNodeArray[slotB].constraintRowIndex = i;
+				}
+
+				for (int row=0;row<numRows;row++,cur++)
+				{
+					btVector3 normalInvMassB = m_allConstraintArray[i+row].m_contactNormal2*orgBodyB->getInvMass();
+					btVector3 relPosInvInertiaB = m_allConstraintArray[i+row].m_relpos2CrossNormal * orgBodyB->getInvInertiaTensorWorld();
+
+					for (int r=0;r<3;r++)
+					{
+						J3.setElem(cur,r,m_allConstraintArray[i+row].m_contactNormal2[r]);
+						J3.setElem(cur,r+4,m_allConstraintArray[i+row].m_relpos2CrossNormal[r]);
+						JinvM3.setElem(cur,r,normalInvMassB[r]);
+						JinvM3.setElem(cur,r+4,relPosInvInertiaB[r]);
+					}
+					J3.setElem(cur,3,0);
+					JinvM3.setElem(cur,3,0);
+					J3.setElem(cur,7,0);
+					JinvM3.setElem(cur,7,0);
+				}
+			}
+			else
+			{
+				cur += numRows;
+			}
+			rowOffset+=numRows;
+
+		}
+		
+	}
+
+
+	//compute JinvM = J*invM.
+	const btScalar* JinvM = JinvM3.getBufferPointer();
+
+	const btScalar* Jptr = J3.getBufferPointer();
+	{
+		BT_PROFILE("m_A.resize");
+		m_A.resize(n,n);
+	}
+	
+	{
+		BT_PROFILE("m_A.setZero");
+		m_A.setZero();
+	}
+	int c=0;
+	{
+		int numRows = 0;
+		BT_PROFILE("Compute A");
+		for (int i=0;i<m_allConstraintArray.size();i+= numRows,c++)
+		{
+			int row__ = ofs[c];
+			int sbA = m_allConstraintArray[i].m_solverBodyIdA;
+			int sbB = m_allConstraintArray[i].m_solverBodyIdB;
+			btRigidBody* orgBodyA = m_tmpSolverBodyPool[sbA].m_originalBody;
+			btRigidBody* orgBodyB = m_tmpSolverBodyPool[sbB].m_originalBody;
+
+			numRows = i<m_tmpSolverNonContactConstraintPool.size() ? m_tmpConstraintSizesPool[c].m_numConstraintRows : numContactRows ;
+					
+			const btScalar *JinvMrow = JinvM + 2*8*(size_t)row__;
+
+			{
+				int startJointNodeA = bodyJointNodeArray[sbA];
+				while (startJointNodeA>=0)
+				{
+					int j0 = jointNodeArray[startJointNodeA].jointIndex;
+					int cr0 = jointNodeArray[startJointNodeA].constraintRowIndex;
+					if (j0<c)
+					{
+								 
+						int numRowsOther = cr0 < m_tmpSolverNonContactConstraintPool.size() ? m_tmpConstraintSizesPool[j0].m_numConstraintRows : numContactRows;
+						size_t ofsother = (m_allConstraintArray[cr0].m_solverBodyIdB == sbA) ? 8*numRowsOther  : 0;
+						//printf("%d joint i %d and j0: %d: ",count++,i,j0);
+						m_A.multiplyAdd2_p8r ( JinvMrow, 
+						Jptr + 2*8*(size_t)ofs[j0] + ofsother, numRows, numRowsOther,  row__,ofs[j0]);
+					}
+					startJointNodeA = jointNodeArray[startJointNodeA].nextJointNodeIndex;
+				}
+			}
+
+			{
+				int startJointNodeB = bodyJointNodeArray[sbB];
+				while (startJointNodeB>=0)
+				{
+					int j1 = jointNodeArray[startJointNodeB].jointIndex;
+					int cj1 = jointNodeArray[startJointNodeB].constraintRowIndex;
+
+					if (j1<c)
+					{
+						int numRowsOther =  cj1 < m_tmpSolverNonContactConstraintPool.size() ? m_tmpConstraintSizesPool[j1].m_numConstraintRows : numContactRows;
+						size_t ofsother = (m_allConstraintArray[cj1].m_solverBodyIdB == sbB) ? 8*numRowsOther  : 0;
+						m_A.multiplyAdd2_p8r ( JinvMrow + 8*(size_t)numRows, 
+						Jptr + 2*8*(size_t)ofs[j1] + ofsother, numRows, numRowsOther, row__,ofs[j1]);
+					}
+					startJointNodeB = jointNodeArray[startJointNodeB].nextJointNodeIndex;
+				}
+			}
+		}
+
+		{
+			BT_PROFILE("compute diagonal");
+			// compute diagonal blocks of m_A
+
+			int  row__ = 0;
+			int numJointRows = m_allConstraintArray.size();
+
+			int jj=0;
+			for (;row__<numJointRows;)
+			{
+
+				int sbA = m_allConstraintArray[row__].m_solverBodyIdA;
+				int sbB = m_allConstraintArray[row__].m_solverBodyIdB;
+				btRigidBody* orgBodyA = m_tmpSolverBodyPool[sbA].m_originalBody;
+				btRigidBody* orgBodyB = m_tmpSolverBodyPool[sbB].m_originalBody;
+
+
+				const unsigned int infom =  row__ < m_tmpSolverNonContactConstraintPool.size() ? m_tmpConstraintSizesPool[jj].m_numConstraintRows : numContactRows;
+				
+				const btScalar *JinvMrow = JinvM + 2*8*(size_t)row__;
+				const btScalar *Jrow = Jptr + 2*8*(size_t)row__;
+				m_A.multiply2_p8r (JinvMrow, Jrow, infom, infom, row__,row__);
+				if (orgBodyB) 
+				{
+					m_A.multiplyAdd2_p8r (JinvMrow + 8*(size_t)infom, Jrow + 8*(size_t)infom, infom, infom,  row__,row__);
+				}
+				row__ += infom;
+				jj++;
+			}
+		}
+	}
+
+	///todo: use proper cfm values from the constraints (getInfo2)
+	if (1)
+	{
+		// add cfm to the diagonal of m_A
+		for ( int i=0; i<m_A.rows(); ++i) 
+		{
+			float cfm = 0.00001f;
+			m_A.setElem(i,i,m_A(i,i)+ cfm / infoGlobal.m_timeStep);
+		}
+	}
+				   
+	///fill the upper triangle of the matrix, to make it symmetric
+	{
+		BT_PROFILE("fill the upper triangle ");
+		m_A.copyLowerToUpperTriangle();
+	}
+
+	{
+		BT_PROFILE("resize/init x");
+		m_x.resize(numConstraintRows);
+		m_xSplit.resize(numConstraintRows);
+
+		if (infoGlobal.m_solverMode&SOLVER_USE_WARMSTARTING)
+		{
+			for (int i=0;i<m_allConstraintArray.size();i++)
+			{
+				const btSolverConstraint& c = m_allConstraintArray[i];
+				m_x[i]=c.m_appliedImpulse;
+				m_xSplit[i] = c.m_appliedPushImpulse;
+			}
+		} else
+		{
+			m_x.setZero();
+			m_xSplit.setZero();
+		}
+	}
+
+}
+
+void btMLCPSolver::createMLCP(const btContactSolverInfo& infoGlobal)
+{
+	int numBodies = this->m_tmpSolverBodyPool.size();
+	int numConstraintRows = m_allConstraintArray.size();
+
+	m_b.resize(numConstraintRows);
+	if (infoGlobal.m_splitImpulse)
+		m_bSplit.resize(numConstraintRows);
+ 
+	for (int i=0;i<numConstraintRows ;i++)
+	{
+		if (m_allConstraintArray[i].m_jacDiagABInv)
+		{
+			m_b[i]=m_allConstraintArray[i].m_rhs/m_allConstraintArray[i].m_jacDiagABInv;
+			if (infoGlobal.m_splitImpulse)
+				m_bSplit[i] = m_allConstraintArray[i].m_rhsPenetration/m_allConstraintArray[i].m_jacDiagABInv;
+		}
+	}
+ 
+	static btMatrixXu Minv;
+	Minv.resize(6*numBodies,6*numBodies);
+	Minv.setZero();
+	for (int i=0;i<numBodies;i++)
+	{
+		const btSolverBody& rb = m_tmpSolverBodyPool[i];
+		const btVector3& invMass = rb.m_invMass;
+		setElem(Minv,i*6+0,i*6+0,invMass[0]);
+		setElem(Minv,i*6+1,i*6+1,invMass[1]);
+		setElem(Minv,i*6+2,i*6+2,invMass[2]);
+		btRigidBody* orgBody = m_tmpSolverBodyPool[i].m_originalBody;
+ 
+		for (int r=0;r<3;r++)
+			for (int c=0;c<3;c++)
+				setElem(Minv,i*6+3+r,i*6+3+c,orgBody? orgBody->getInvInertiaTensorWorld()[r][c] : 0);
+	}
+ 
+	static btMatrixXu J;
+	J.resize(numConstraintRows,6*numBodies);
+	J.setZero();
+ 
+	m_lo.resize(numConstraintRows);
+	m_hi.resize(numConstraintRows);
+ 
+	for (int i=0;i<numConstraintRows;i++)
+	{
+
+		m_lo[i] = m_allConstraintArray[i].m_lowerLimit;
+		m_hi[i] = m_allConstraintArray[i].m_upperLimit;
+ 
+		int bodyIndex0 = m_allConstraintArray[i].m_solverBodyIdA;
+		int bodyIndex1 = m_allConstraintArray[i].m_solverBodyIdB;
+		if (m_tmpSolverBodyPool[bodyIndex0].m_originalBody)
+		{
+			setElem(J,i,6*bodyIndex0+0,m_allConstraintArray[i].m_contactNormal1[0]);
+			setElem(J,i,6*bodyIndex0+1,m_allConstraintArray[i].m_contactNormal1[1]);
+			setElem(J,i,6*bodyIndex0+2,m_allConstraintArray[i].m_contactNormal1[2]);
+			setElem(J,i,6*bodyIndex0+3,m_allConstraintArray[i].m_relpos1CrossNormal[0]);
+			setElem(J,i,6*bodyIndex0+4,m_allConstraintArray[i].m_relpos1CrossNormal[1]);
+			setElem(J,i,6*bodyIndex0+5,m_allConstraintArray[i].m_relpos1CrossNormal[2]);
+		}
+		if (m_tmpSolverBodyPool[bodyIndex1].m_originalBody)
+		{
+			setElem(J,i,6*bodyIndex1+0,m_allConstraintArray[i].m_contactNormal2[0]);
+			setElem(J,i,6*bodyIndex1+1,m_allConstraintArray[i].m_contactNormal2[1]);
+			setElem(J,i,6*bodyIndex1+2,m_allConstraintArray[i].m_contactNormal2[2]);
+			setElem(J,i,6*bodyIndex1+3,m_allConstraintArray[i].m_relpos2CrossNormal[0]);
+			setElem(J,i,6*bodyIndex1+4,m_allConstraintArray[i].m_relpos2CrossNormal[1]);
+			setElem(J,i,6*bodyIndex1+5,m_allConstraintArray[i].m_relpos2CrossNormal[2]);
+		}
+	}
+ 
+	static btMatrixXu J_transpose;
+	J_transpose= J.transpose();
+
+	static btMatrixXu tmp;
+
+	{
+		{
+			BT_PROFILE("J*Minv");
+			tmp = J*Minv;
+
+		}
+		{
+			BT_PROFILE("J*tmp");
+			m_A = tmp*J_transpose;
+		}
+	}
+
+	if (1)
+	{
+		///todo: use proper cfm values from the constraints (getInfo2)
+		// add cfm to the diagonal of m_A
+		for ( int i=0; i<m_A.rows(); ++i) 
+		{
+			float cfm = 0.0001f;
+			m_A.setElem(i,i,m_A(i,i)+ cfm / infoGlobal.m_timeStep);
+		}
+	}
+
+	m_x.resize(numConstraintRows);
+	if (infoGlobal.m_splitImpulse)
+		m_xSplit.resize(numConstraintRows);
+//	m_x.setZero();
+
+	for (int i=0;i<m_allConstraintArray.size();i++)
+	{
+		const btSolverConstraint& c = m_allConstraintArray[i];
+		m_x[i]=c.m_appliedImpulse;
+		if (infoGlobal.m_splitImpulse)
+			m_xSplit[i] = c.m_appliedPushImpulse;
+	}
+
+}
+
+
+btScalar btMLCPSolver::solveGroupCacheFriendlyIterations(btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer)
+{
+	bool result = true;
+	{
+		BT_PROFILE("solveMLCP");
+//		printf("m_A(%d,%d)\n", m_A.rows(),m_A.cols());
+		result = solveMLCP(infoGlobal);
+	}
+
+	//check if solution is valid, and otherwise fallback to btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations
+	if (result)
+	{
+		BT_PROFILE("process MLCP results");
+		for (int i=0;i<m_allConstraintArray.size();i++)
+		{
+			{
+				btSolverConstraint& c = m_allConstraintArray[i];
+				int sbA = c.m_solverBodyIdA;
+				int sbB = c.m_solverBodyIdB;
+				btRigidBody* orgBodyA = m_tmpSolverBodyPool[sbA].m_originalBody;
+				btRigidBody* orgBodyB = m_tmpSolverBodyPool[sbB].m_originalBody;
+
+				btSolverBody& solverBodyA = m_tmpSolverBodyPool[sbA];
+				btSolverBody& solverBodyB = m_tmpSolverBodyPool[sbB];
+ 
+				solverBodyA.internalApplyImpulse(c.m_contactNormal1*solverBodyA.internalGetInvMass(),c.m_angularComponentA,m_x[i]);
+				solverBodyB.internalApplyImpulse(c.m_contactNormal2*solverBodyB.internalGetInvMass(),c.m_angularComponentB,m_x[i]);
+				if (infoGlobal.m_splitImpulse)
+				{
+					solverBodyA.internalApplyPushImpulse(c.m_contactNormal1*solverBodyA.internalGetInvMass(),c.m_angularComponentA,m_xSplit[i]);
+					solverBodyB.internalApplyPushImpulse(c.m_contactNormal2*solverBodyB.internalGetInvMass(),c.m_angularComponentB,m_xSplit[i]);
+					c.m_appliedPushImpulse = m_xSplit[i];
+				}
+				c.m_appliedImpulse = m_x[i];
+			}
+		}
+	}
+	else
+	{
+		m_fallback++;
+		btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(bodies ,numBodies,manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer);
+	}
+
+	return 0.f;
+}

+ 81 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btMLCPSolver.h

@@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+#ifndef BT_MLCP_SOLVER_H
+#define BT_MLCP_SOLVER_H
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+#include "LinearMath/btMatrixX.h"
+#include "BulletDynamics/MLCPSolvers/btMLCPSolverInterface.h"
+
+class btMLCPSolver : public btSequentialImpulseConstraintSolver
+{
+
+protected:
+	
+	btMatrixXu m_A;
+	btVectorXu m_b;
+	btVectorXu m_x;
+	btVectorXu m_lo;
+	btVectorXu m_hi;
+	
+	///when using 'split impulse' we solve two separate (M)LCPs
+	btVectorXu m_bSplit;
+	btVectorXu m_xSplit;
+	btVectorXu m_bSplit1;
+	btVectorXu m_xSplit2;
+
+	btAlignedObjectArray<int> m_limitDependencies;
+	btConstraintArray m_allConstraintArray;
+	btMLCPSolverInterface* m_solver;
+	int m_fallback;
+
+	virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+	virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer);
+	virtual void createMLCP(const btContactSolverInfo& infoGlobal);
+	virtual void createMLCPFast(const btContactSolverInfo& infoGlobal);
+
+	//return true is it solves the problem successfully
+	virtual bool solveMLCP(const btContactSolverInfo& infoGlobal);
+
+public:
+
+	btMLCPSolver(	 btMLCPSolverInterface* solver);
+	virtual ~btMLCPSolver();
+
+	void setMLCPSolver(btMLCPSolverInterface* solver)
+	{
+		m_solver = solver;
+	}
+
+	int getNumFallbacks() const
+	{
+		return m_fallback;
+	}
+	void setNumFallbacks(int num)
+	{
+		m_fallback = num;
+	}
+
+	virtual btConstraintSolverType	getSolverType() const
+	{
+		return BT_MLCP_SOLVER;
+	}
+
+};
+
+
+#endif //BT_MLCP_SOLVER_H

+ 33 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btMLCPSolverInterface.h

@@ -0,0 +1,33 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+#ifndef BT_MLCP_SOLVER_INTERFACE_H
+#define BT_MLCP_SOLVER_INTERFACE_H
+
+#include "LinearMath/btMatrixX.h"
+
+class btMLCPSolverInterface
+{
+public:
+	virtual ~btMLCPSolverInterface()
+	{
+	}
+
+	//return true is it solves the problem successfully
+	virtual bool solveMLCP(const btMatrixXu & A, const btVectorXu & b, btVectorXu& x, const btVectorXu & lo,const btVectorXu & hi,const btAlignedObjectArray<int>& limitDependency, int numIterations, bool useSparsity = true)=0;
+};
+
+#endif //BT_MLCP_SOLVER_INTERFACE_H

+ 151 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btPATHSolver.h

@@ -0,0 +1,151 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+
+#ifndef BT_PATH_SOLVER_H
+#define BT_PATH_SOLVER_H
+
+//#define BT_USE_PATH
+#ifdef BT_USE_PATH
+
+extern "C" {
+#include "PATH/SimpleLCP.h"
+#include "PATH/License.h"
+#include "PATH/Error_Interface.h"
+};
+  void __stdcall MyError(Void *data, Char *msg)
+{
+	printf("Path Error: %s\n",msg);
+}
+  void __stdcall MyWarning(Void *data, Char *msg)
+{
+	printf("Path Warning: %s\n",msg);
+}
+
+Error_Interface e;
+
+
+
+#include "btMLCPSolverInterface.h"
+#include "Dantzig/lcp.h"
+
+class btPathSolver : public btMLCPSolverInterface
+{
+public:
+
+	btPathSolver()
+	{
+		License_SetString("2069810742&Courtesy_License&&&USR&2013&14_12_2011&1000&PATH&GEN&31_12_2013&0_0_0&0&0_0");
+		e.error_data = 0;
+		e.warning = MyWarning;
+		e.error = MyError;
+		Error_SetInterface(&e);
+	}
+
+
+	virtual bool solveMLCP(const btMatrixXu & A, const btVectorXu & b, btVectorXu& x, const btVectorXu & lo,const btVectorXu & hi,const btAlignedObjectArray<int>& limitDependency, int numIterations, bool useSparsity = true)
+	{
+		MCP_Termination status;
+		
+
+		int numVariables = b.rows();
+		if (0==numVariables)
+			return true;
+
+			/*	 - variables - the number of variables in the problem
+			- m_nnz - the number of nonzeros in the M matrix
+			- m_i - a vector of size m_nnz containing the row indices for M
+			- m_j - a vector of size m_nnz containing the column indices for M
+			- m_ij - a vector of size m_nnz containing the data for M
+			- q - a vector of size variables
+			- lb - a vector of size variables containing the lower bounds on x
+			- ub - a vector of size variables containing the upper bounds on x
+			*/
+		btAlignedObjectArray<double> values;
+		btAlignedObjectArray<int> rowIndices;
+		btAlignedObjectArray<int> colIndices;
+
+		for (int i=0;i<A.rows();i++)
+		{
+			for (int j=0;j<A.cols();j++)
+			{
+				if (A(i,j)!=0.f)
+				{
+					//add 1, because Path starts at 1, instead of 0
+					rowIndices.push_back(i+1);
+					colIndices.push_back(j+1);
+					values.push_back(A(i,j));
+				}
+			}
+		}
+		int numNonZero = rowIndices.size();
+		btAlignedObjectArray<double> zResult;
+		zResult.resize(numVariables);
+		btAlignedObjectArray<double> rhs;
+		btAlignedObjectArray<double> upperBounds;
+		btAlignedObjectArray<double> lowerBounds;
+		for (int i=0;i<numVariables;i++)
+		{
+			upperBounds.push_back(hi[i]);
+			lowerBounds.push_back(lo[i]);
+			rhs.push_back(-b[i]);
+		}
+
+
+		SimpleLCP(numVariables,numNonZero,&rowIndices[0],&colIndices[0],&values[0],&rhs[0],&lowerBounds[0],&upperBounds[0], &status, &zResult[0]);
+
+		if (status != MCP_Solved)
+		{
+			static const char* gReturnMsgs[] = {
+				"Invalid return",
+				"MCP_Solved: The problem was solved",
+				"MCP_NoProgress: A stationary point was found",
+				"MCP_MajorIterationLimit: Major iteration limit met",
+				"MCP_MinorIterationLimit: Cumulative minor iteration limit met",
+				"MCP_TimeLimit: Ran out of time",
+				"MCP_UserInterrupt: Control-C, typically",
+				"MCP_BoundError: Problem has a bound error",
+				"MCP_DomainError: Could not find starting point",
+				"MCP_Infeasible: Problem has no solution",
+				"MCP_Error: An error occurred within the code",
+				"MCP_LicenseError: License could not be found",
+				"MCP_OK"
+			};
+
+			printf("ERROR: The PATH MCP solver failed: %s\n", gReturnMsgs[(unsigned int)status]);// << std::endl;
+			printf("using Projected Gauss Seidel fallback\n");
+			
+			return false;
+		} else
+		{
+			for (int i=0;i<numVariables;i++)
+			{
+				x[i] = zResult[i];
+				//check for #NAN
+				if (x[i] != zResult[i])
+					return false;
+			}
+			return true;
+
+		}
+
+	}
+};
+
+#endif //BT_USE_PATH
+
+
+#endif //BT_PATH_SOLVER_H

+ 80 - 0
Source/ThirdParty/Bullet/src/BulletDynamics/MLCPSolvers/btSolveProjectedGaussSeidel.h

@@ -0,0 +1,80 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///original version written by Erwin Coumans, October 2013
+
+#ifndef BT_SOLVE_PROJECTED_GAUSS_SEIDEL_H
+#define BT_SOLVE_PROJECTED_GAUSS_SEIDEL_H
+
+
+#include "btMLCPSolverInterface.h"
+
+class btSolveProjectedGaussSeidel : public btMLCPSolverInterface
+{
+public:
+	virtual bool solveMLCP(const btMatrixXu & A, const btVectorXu & b, btVectorXu& x, const btVectorXu & lo,const btVectorXu & hi,const btAlignedObjectArray<int>& limitDependency, int numIterations, bool useSparsity = true)
+	{
+		//A is a m-n matrix, m rows, n columns
+		btAssert(A.rows() == b.rows());
+
+		int i, j, numRows = A.rows();
+	
+		float delta;
+
+		for (int k = 0; k <numIterations; k++)
+		{
+			for (i = 0; i <numRows; i++)
+			{
+				delta = 0.0f;
+				if (useSparsity)
+				{
+					for (int h=0;h<A.m_rowNonZeroElements1[i].size();h++)
+					{
+						int j = A.m_rowNonZeroElements1[i][h];
+						if (j != i)//skip main diagonal
+						{
+							delta += A(i,j) * x[j];
+						}
+					}
+				} else
+				{
+					for (j = 0; j <i; j++) 
+						delta += A(i,j) * x[j];
+					for (j = i+1; j<numRows; j++) 
+						delta += A(i,j) * x[j];
+				}
+
+				float aDiag = A(i,i);
+				x [i] = (b [i] - delta) / A(i,i);
+				float s = 1.f;
+
+				if (limitDependency[i]>=0)
+				{
+					s = x[limitDependency[i]];
+					if (s<0)
+						s=1;
+				}
+			
+				if (x[i]<lo[i]*s)
+					x[i]=lo[i]*s;
+				if (x[i]>hi[i]*s)
+					x[i]=hi[i]*s;
+			}
+		}
+		return true;
+	}
+
+};
+
+#endif //BT_SOLVE_PROJECTED_GAUSS_SEIDEL_H

+ 0 - 1
Source/ThirdParty/MojoShader/CMakeLists.txt

@@ -6,7 +6,6 @@ file (GLOB C_FILES *.c)
 file (GLOB H_FILES *.h)
 set (SOURCE_FILES ${C_FILES} ${H_FILES})
 
-add_definitions (-DMOJOSHADER_NO_VERSION_INCLUDE)
 if (MSVC)
     add_definitions (-D_CRT_SECURE_NO_WARNINGS=1 -TP)
 endif (MSVC)

+ 5 - 0
Source/ThirdParty/MojoShader/mojoshader.h

@@ -7,6 +7,8 @@
  *  This file written by Ryan C. Gordon.
  */
 
+// Modified by Lasse Oorni for Urho3D
+
 #ifndef _INCL_MOJOSHADER_H_
 #define _INCL_MOJOSHADER_H_
 
@@ -15,9 +17,12 @@ extern "C" {
 #endif
 
 /* You can define this if you aren't generating mojoshader_version.h */
+// Urho3D: commented out to avoid the define
+/*
 #ifndef MOJOSHADER_NO_VERSION_INCLUDE
 #include "mojoshader_version.h"
 #endif
+*/
 
 #ifndef MOJOSHADER_VERSION
 #define MOJOSHADER_VERSION -1

+ 0 - 5
Source/Tools/CMakeLists.txt

@@ -49,9 +49,4 @@ if (NOT IOS AND NOT ANDROID AND ENABLE_TOOLS)
     if (ENABLE_ANGELSCRIPT)
         add_subdirectory (ScriptCompiler)
     endif ()
-
-    # Urho3D non-OpenGL tools
-    if (NOT USE_OPENGL)
-        add_subdirectory (ShaderCompiler)
-    endif ()
 endif ()

+ 0 - 36
Source/Tools/ShaderCompiler/CMakeLists.txt

@@ -1,36 +0,0 @@
-#
-# Copyright (c) 2008-2014 the Urho3D project.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-# Define target name
-set (TARGET_NAME ShaderCompiler)
-
-# Define source files
-define_source_files ()
-
-# Define dependency libs
-add_subdirectory (../../ThirdParty/MojoShader ../../ThirdParty/MojoShader)
-set (LIBS ../../ThirdParty/MojoShader)
-set (ABSOLUTE_PATH_LIBS ${DIRECT3D_COMPILER_LIBRARIES})
-
-# Setup target
-add_definitions (-DMOJOSHADER_NO_VERSION_INCLUDE)
-setup_executable ()

+ 0 - 606
Source/Tools/ShaderCompiler/ShaderCompiler.cpp

@@ -1,606 +0,0 @@
-//
-// Copyright (c) 2008-2014 the Urho3D project.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-
-#include "Context.h"
-#include "File.h"
-#include "FileSystem.h"
-#include "GraphicsDefs.h"
-#include "List.h"
-#include "Mutex.h"
-#include "ProcessUtils.h"
-#include "ShaderParser.h"
-#include "StringUtils.h"
-#include "Thread.h"
-#include "XMLFile.h"
-
-#include <cstdio>
-#include <cstring>
-
-#include <windows.h>
-#include <d3dcompiler.h>
-#include <mojoshader.h>
-
-#include "DebugNew.h"
-
-using namespace Urho3D;
-
-struct Parameter
-{
-    Parameter()
-    {
-    }
-    
-    Parameter(const String& name, unsigned reg, unsigned regCount) :
-        name_(name),
-        register_(reg),
-        regCount_(regCount)
-    {
-    }
-    
-    bool operator < (const Parameter& rhs) const
-    {
-        if (register_ != rhs.register_)
-            return register_ < rhs.register_;
-        else if (name_ != rhs.name_)
-            return name_ < rhs.name_;
-        else
-            return regCount_ < rhs.regCount_;
-    }
-    
-    bool operator > (const Parameter& rhs) const
-    {
-        if (register_ != rhs.register_)
-            return register_ > rhs.register_;
-        else if (name_ != rhs.name_)
-            return name_ > rhs.name_;
-        else
-            return regCount_ > rhs.regCount_;
-    }
-    
-    bool operator == (const Parameter& rhs) const { return register_ == rhs.register_ && name_ == rhs.name_ && regCount_ == rhs.regCount_; }
-    
-    bool operator != (const Parameter& rhs) const { return register_ != rhs.register_ || name_ != rhs.name_ || regCount_ != rhs.regCount_; }
-    
-    String name_;
-    unsigned register_;
-    unsigned regCount_;
-};
-
-struct CompiledVariation
-{
-    ShaderType type_;
-    String name_;
-    String outFileName_;
-    Vector<String> defines_;
-    Vector<String> defineValues_;
-    PODVector<unsigned char> byteCode_;
-    Vector<Parameter> constants_;
-    Vector<Parameter> textureUnits_;
-    String errorMsg_;
-};
-
-SharedPtr<Context> context_(new Context());
-SharedPtr<FileSystem> fileSystem_(new FileSystem(context_));
-String inDir_;
-String outDir_;
-Vector<String> defines_;
-Vector<String> defineValues_;
-String variationName_;
-volatile bool compileFailed_ = false;
-bool useSM3_ = false;
-bool compileVariation_ = false;
-bool compileVS_ = true;
-bool compilePS_ = true;
-
-List<CompiledVariation> variations_;
-List<CompiledVariation*> workList_;
-Mutex workMutex_;
-String hlslCode_;
-
-int main(int argc, char** argv);
-void Run(const Vector<String>& arguments);
-void CompileShader(const String& fileName);
-void CompileVariation(CompiledVariation* variation);
-void CopyStrippedCode(PODVector<unsigned char>& dest, void* src, unsigned srcSize);
-
-class WorkerThread : public RefCounted, public Thread
-{
-public:
-    void ThreadFunction()
-    {
-        for (;;)
-        {
-            CompiledVariation* workItem = 0;
-            {
-                MutexLock lock(workMutex_);
-                if (!workList_.Empty())
-                {
-                    workItem = workList_.Front();
-                    workList_.Erase(workList_.Begin());
-                }
-            }
-            if (!workItem)
-                return;
-            
-            // If compile(s) failed, just empty the list, but do not compile more
-            if (!compileFailed_)
-                CompileVariation(workItem);
-        }
-    }
-};
-
-class IncludeHandler : public ID3DInclude
-{
-public:
-    STDMETHOD(Open)(D3D_INCLUDE_TYPE IncludeType, LPCSTR pFileName, LPCVOID pParentData, LPCVOID *ppData, UINT *pBytes)
-    {
-        String fileName = inDir_ + String((const char*)pFileName);
-        File file(context_, fileName);
-        if (!file.IsOpen())
-            return E_FAIL;
-        
-        unsigned fileSize = file.GetSize();
-        void* fileData = new unsigned char[fileSize];
-        *pBytes = fileSize;
-        *ppData = fileData;
-        file.Read(fileData, fileSize);
-        
-        return S_OK;
-    }
-    
-    STDMETHOD(Close)(LPCVOID pData)
-    {
-        delete[] (unsigned char*)pData;
-        return S_OK;
-    }
-};
-
-int main(int argc, char** argv)
-{
-    Vector<String> arguments;
-    
-    #ifdef WIN32
-    arguments = ParseArguments(GetCommandLineW());
-    #else
-    arguments = ParseArguments(argc, argv);
-    #endif
-    
-    Run(arguments);
-    return 0;
-}
-
-void Run(const Vector<String>& arguments)
-{
-    if (arguments.Size() < 1)
-    {
-        ErrorExit(
-            "Usage: ShaderCompiler <definitionfile> [outputpath] [options]\n\n"
-            "Options:\n"
-            "-t <VS|PS>  Compile only vertex or pixel shaders, by default compile both\n"
-            "-v <name>   Compile only the shader variation with name\n"
-            "-d <define> Add a define. Add SM3 to compile for Shader Model 3\n\n"
-            "If output path is not specified, shader binaries will be output into the same\n"
-            "directory as the definition file. Specify a wildcard to compile multiple\n"
-            "shaders."
-        );
-    }
-    
-    String path, file, extension;
-    SplitPath(arguments[0], path, file, extension);
-    inDir_ = AddTrailingSlash(path);
-    if (arguments.Size() > 1 && arguments[1][0] != '-')
-        outDir_ = AddTrailingSlash(arguments[1]);
-    else
-        outDir_ = inDir_;
-    
-    for (unsigned i = 1; i < arguments.Size(); ++i)
-    {
-        if (arguments[i].Length() > 1 && arguments[i][0] == '-')
-        {
-            String argument = arguments[i].Substring(1).ToLower();
-            String value = i + 1 < arguments.Size() ? arguments[i + 1] : String::EMPTY;
-            
-            if (argument == "d" && !value.Empty())
-            {
-                Vector<String> nameAndValue = value.Split('=');
-                if (nameAndValue.Size() == 2)
-                {
-                    defines_.Push(nameAndValue[0]);
-                    defineValues_.Push(nameAndValue[1]);
-                    if (nameAndValue[0] == "SM3" && ToInt(nameAndValue[1]) > 0)
-                        useSM3_ = true;
-                }
-                else
-                {
-                    defines_.Push(value);
-                    defineValues_.Push("1");
-                    if (value == "SM3")
-                        useSM3_ = true;
-                }
-                ++i;
-            }
-            else if (argument == "t" && !value.Empty())
-            {
-                if (value.ToLower() == "vs")
-                {
-                    compileVS_ = true;
-                    compilePS_ = false;
-                }
-                else if (value.ToLower() == "ps")
-                {
-                    compileVS_ = false;
-                    compilePS_ = true;
-                }
-                ++i;
-            }
-            // Note: variation name must be allowed to be empty
-            else if (argument == "v")
-            {
-                compileVariation_ = true;
-                variationName_ = value;
-                ++i;
-            }
-        }
-    }
-    
-    if (!file.StartsWith("*"))
-        CompileShader(arguments[0]);
-    else
-    {
-        Vector<String> shaderFiles;
-        fileSystem_->ScanDir(shaderFiles, inDir_, file + extension, SCAN_FILES, false);
-        for (unsigned i = 0; i < shaderFiles.Size(); ++i)
-            CompileShader(inDir_ + shaderFiles[i]);
-    }
-}
-
-void CompileShader(const String& fileName)
-{
-    String file = GetFileName(fileName);
-    
-    XMLFile doc(context_);
-    File source(context_);
-    if (!source.Open(fileName))
-        ErrorExit("Could not open input file " + fileName);
-    if (!doc.Load(source))
-        ErrorExit("Could not parse input file " + fileName);
-    
-    XMLElement shaders = doc.GetRoot("shaders");
-    if (!shaders)
-        ErrorExit("No shaders element in " + source.GetName());
-    
-    if (compileVS_)
-    {
-        ShaderParser vsParser;
-        if (!vsParser.Parse(VS, shaders, !compileVariation_, defines_, defineValues_))
-            ErrorExit("VS: " + vsParser.GetErrorMessage());
-        
-        // If compiling a specific variation only, request it beforehand
-        if (compileVariation_)
-            vsParser.GetCombination(variationName_);
-
-        const HashMap<String, unsigned>& combinations = vsParser.GetAllCombinations();
-        for (HashMap<String, unsigned>::ConstIterator i = combinations.Begin(); i != combinations.End(); ++i)
-        {
-            if (!compileVariation_ || i->first_ == variationName_)
-            {
-                ShaderCombination src = vsParser.GetCombination(i->first_);
-                CompiledVariation compile;
-                
-                compile.type_ = VS;
-                compile.name_ = file;
-                compile.outFileName_ = outDir_ + file;
-                if (!src.name_.Empty())
-                {
-                    compile.name_ += "_" + src.name_;
-                    compile.outFileName_ += "_" + src.name_;
-                }
-                compile.outFileName_ += useSM3_ ? ".vs3" : ".vs2";
-                compile.defines_ = src.defines_;
-                compile.defineValues_ = src.defineValues_;
-                
-                variations_.Push(compile);
-                workList_.Push(&variations_.Back());
-            }
-        }
-    }
-    
-    if (compilePS_)
-    {
-        ShaderParser psParser;
-        if (!psParser.Parse(PS, shaders, !compileVariation_, defines_, defineValues_))
-            ErrorExit("PS: " + psParser.GetErrorMessage());
-        
-        // If compiling a specific variation only, request it beforehand
-        if (compileVariation_)
-            psParser.GetCombination(variationName_);
-
-        const HashMap<String, unsigned>& combinations = psParser.GetAllCombinations();
-        for (HashMap<String, unsigned>::ConstIterator i = combinations.Begin(); i != combinations.End(); ++i)
-        {
-            if (!compileVariation_ || i->first_ == variationName_)
-            {
-                ShaderCombination src = psParser.GetCombination(i->first_);
-                CompiledVariation compile;
-                
-                compile.type_ = PS;
-                compile.name_ = file;
-                compile.outFileName_ = outDir_ + file;
-                if (!src.name_.Empty())
-                {
-                    compile.name_ += "_" + src.name_;
-                    compile.outFileName_ += "_" + src.name_;
-                }
-                compile.outFileName_ += useSM3_ ? ".ps3" : ".ps2";
-                compile.defines_ = src.defines_;
-                compile.defineValues_ = src.defineValues_;
-                
-                variations_.Push(compile);
-                workList_.Push(&variations_.Back());
-            }
-        }
-    }
-    
-    if (variations_.Empty())
-    {
-        PrintLine("No variations to compile");
-        return;
-    }
-    
-    // Load the shader source code
-    {
-        String inputFileName = inDir_ + file + ".hlsl";
-        File hlslFile(context_, inputFileName);
-        if (!hlslFile.IsOpen())
-            ErrorExit("Could not open input file " + inputFileName);
-        
-        hlslCode_.Clear();
-        while (!hlslFile.IsEof())
-            hlslCode_ += hlslFile.ReadLine() + "\n";
-    }
-    
-    if (!compileVariation_)
-    {
-        // Create and start worker threads. Use all logical CPUs except one to not lock up the computer completely
-        unsigned numWorkerThreads = GetNumLogicalCPUs() - 1;
-        if (!numWorkerThreads)
-            numWorkerThreads = 1;
-        
-        Vector<SharedPtr<WorkerThread> > workerThreads;
-        workerThreads.Resize(numWorkerThreads);
-        for (unsigned i = 0; i < workerThreads.Size(); ++i)
-        {
-            workerThreads[i] = new WorkerThread();
-            workerThreads[i]->Run();
-        }
-        // This will wait until the thread functions have stopped
-        for (unsigned i = 0; i < workerThreads.Size(); ++i)
-            workerThreads[i]->Stop();
-    }
-    else
-    {
-        WorkerThread dummyThread;
-        dummyThread.ThreadFunction();
-    }
-    
-    // Check that all shaders compiled
-    for (List<CompiledVariation>::Iterator i = variations_.Begin(); i != variations_.End(); ++i)
-    {
-        if (!i->errorMsg_.Empty())
-        {
-            if (i->type_ == VS)
-                ErrorExit("Failed to compile vertex shader " + i->name_ + ": " + i->errorMsg_);
-            else
-                ErrorExit("Failed to compile pixel shader " + i->name_ + ": " + i->errorMsg_);
-        }
-    }
-}
-
-void CompileVariation(CompiledVariation* variation)
-{
-    IncludeHandler includeHandler;
-    PODVector<D3D_SHADER_MACRO> macros;
-    
-    // Insert variation-specific and global defines
-    for (unsigned i = 0; i < variation->defines_.Size(); ++i)
-    {
-        D3D_SHADER_MACRO macro;
-        macro.Name = variation->defines_[i].CString();
-        macro.Definition = variation->defineValues_[i].CString();
-        macros.Push(macro);
-    }
-    for (unsigned i = 0; i < defines_.Size(); ++i)
-    {
-        D3D_SHADER_MACRO macro;
-        macro.Name = defines_[i].CString();
-        macro.Definition = defineValues_[i].CString();
-        macros.Push(macro);
-    }
-    
-    D3D_SHADER_MACRO endMacro;
-    endMacro.Name = 0;
-    endMacro.Definition = 0;
-    macros.Push(endMacro);
-    
-    LPD3DBLOB shaderCode = NULL;
-    LPD3DBLOB errorMsgs = NULL;
-    
-    // Set the profile, entrypoint and flags according to the shader being compiled
-    String profile;
-    String entryPoint;
-    unsigned flags = D3DCOMPILE_OPTIMIZATION_LEVEL3;
-    
-    if (variation->type_ == VS)
-    {
-        entryPoint = "VS";
-        if (!useSM3_)
-            profile = "vs_2_0";
-        else
-            profile = "vs_3_0";
-    }
-    else
-    {
-        entryPoint = "PS";
-        if (!useSM3_)
-            profile = "ps_2_0";
-        else
-        {
-            profile = "ps_3_0";
-            flags |= D3DCOMPILE_PREFER_FLOW_CONTROL;
-        }
-    }
-    
-    // Compile using D3DCompiler
-    HRESULT hr = D3DCompile(hlslCode_.CString(), hlslCode_.Length(), NULL, &macros.Front(), &includeHandler,
-        entryPoint.CString(), profile.CString(), flags, 0, &shaderCode, &errorMsgs);
-
-    if (FAILED(hr))
-    {
-        variation->errorMsg_ = String((const char*)errorMsgs->GetBufferPointer(), errorMsgs->GetBufferSize());
-        compileFailed_ = true;
-    }
-    else
-    {
-        BYTE const *const bufData = static_cast<BYTE *>(shaderCode->GetBufferPointer());
-        SIZE_T const bufSize = shaderCode->GetBufferSize();
-        MOJOSHADER_parseData const *parseData = MOJOSHADER_parse("bytecode", bufData, bufSize, NULL, 0, NULL, 0, NULL, NULL, NULL);
-
-        CopyStrippedCode(variation->byteCode_, shaderCode->GetBufferPointer(), shaderCode->GetBufferSize());
-        
-        if (!variation->name_.Empty())
-            PrintLine("Compiled shader variation " + variation->name_ + ", code size " + String(variation->byteCode_.Size()));
-        else
-            PrintLine("Compiled base shader variation, code size " + String(variation->byteCode_.Size()));
-            
-        // Print warnings if any
-        if (errorMsgs && errorMsgs->GetBufferSize())
-        {
-            String warning((const char*)errorMsgs->GetBufferPointer(), errorMsgs->GetBufferSize());
-            PrintLine("WARNING: " + warning);
-        }
-
-        for(int i = 0; i < parseData->symbol_count; i++)
-        {
-            MOJOSHADER_symbol const& symbol = parseData->symbols[i];
-
-            String name(symbol.name);
-            unsigned const reg = symbol.register_index;
-            unsigned const regCount = symbol.register_count;
-
-            // Check if the parameter is a constant or a texture sampler
-            bool const isSampler = (name[0] == 's');
-            name = name.Substring(1);
-            
-            if (isSampler)
-            {
-                // Skip if it's a G-buffer sampler, which are aliases for the standard texture units
-                if (name != "AlbedoBuffer" && name != "NormalBuffer" && name != "DepthBuffer" && name != "LightBuffer")
-                {
-                    Parameter newTextureUnit(name, reg, 1);
-                    variation->textureUnits_.Push(newTextureUnit);
-                }
-            }
-            else
-            {
-                Parameter newParam(name, reg, regCount);
-                variation->constants_.Push(newParam);
-            }
-        }
-
-        MOJOSHADER_freeParseData(parseData);
-
-        // Create the last part of the output path (SM2/SM3) if it does not exist
-        String outPath = GetPath(variation->outFileName_);
-        if (!fileSystem_->DirExists(outPath))
-            fileSystem_->CreateDir(outPath);
-
-        File outFile(context_);
-        if (!outFile.Open(variation->outFileName_, FILE_WRITE))
-        {
-            variation->errorMsg_ = "Could not open output file " + variation->outFileName_;
-            compileFailed_ = true;
-        }
-        else
-        {
-            outFile.WriteFileID("USHD");
-            outFile.WriteShort((unsigned short)variation->type_);
-            outFile.WriteShort(useSM3_ ? 3 : 2);
-            
-            outFile.WriteUInt(variation->constants_.Size());
-            for (unsigned i = 0; i < variation->constants_.Size(); ++i)
-            {
-                outFile.WriteString(variation->constants_[i].name_);
-                outFile.WriteUByte(variation->constants_[i].register_);
-                outFile.WriteUByte(variation->constants_[i].regCount_);
-            }
-            
-            outFile.WriteUInt(variation->textureUnits_.Size());
-            for (unsigned i = 0; i < variation->textureUnits_.Size(); ++i)
-            {
-                outFile.WriteString(variation->textureUnits_[i].name_);
-                outFile.WriteUByte(variation->textureUnits_[i].register_);
-            }
-            
-            unsigned dataSize = variation->byteCode_.Size();
-            outFile.WriteUInt(dataSize);
-            if (dataSize)
-                outFile.Write(&variation->byteCode_[0], dataSize);
-        }
-    }
-    
-    if (shaderCode)
-        shaderCode->Release();
-    if (errorMsgs)
-        errorMsgs->Release();
-}
-
-void CopyStrippedCode(PODVector<unsigned char>& dest, void* src, unsigned srcSize)
-{
-    unsigned const D3DSIO_COMMENT = 0xFFFE;
-    unsigned* srcWords = (unsigned*)src;
-    unsigned srcWordSize = srcSize >> 2;
-    
-    dest.Clear();
-    
-    for (unsigned i = 0; i < srcWordSize; ++i)
-    {
-        unsigned opcode = srcWords[i] & 0xffff;
-        unsigned paramLength = (srcWords[i] & 0x0f000000) >> 24;
-        unsigned commentLength = srcWords[i] >> 16;
-        
-        // For now, skip comment only at fixed position to prevent false positives
-        if (i == 1 && opcode == D3DSIO_COMMENT)
-        {
-            // Skip the comment
-            i += commentLength;
-        }
-        else
-        {
-            // Not a comment, copy the data
-            unsigned char* srcBytes = (unsigned char*)(srcWords + i);
-            dest.Push(*srcBytes++);
-            dest.Push(*srcBytes++);
-            dest.Push(*srcBytes++);
-            dest.Push(*srcBytes++);
-        }
-    }
-}