Browse Source

Change order of matrix multiplies in GLSL to eliminate need to transpose matrix uniforms and make the order consistent with skinned & instanced rendering.

Lasse Öörni 11 years ago
parent
commit
b96f47db61

+ 20 - 27
Source/Urho3D/Graphics/OpenGL/OGLGraphics.cpp

@@ -1189,15 +1189,11 @@ void Graphics::SetShaderParameter(StringHash param, const float* data, unsigned
                 break;
 
             case GL_FLOAT_MAT3:
-                count = Min((int)count, (int)NUM_TEMP_MATRICES * 9);
-                Matrix3::BulkTranspose(&tempMatrices3_[0].m00_, data, count / 9);
-                glUniformMatrix3fv(info->location_, count / 9, GL_FALSE, tempMatrices3_[0].Data());
+                glUniformMatrix3fv(info->location_, count / 9, GL_FALSE, data);
                 break;
 
             case GL_FLOAT_MAT4:
-                count = Min((int)count, (int)NUM_TEMP_MATRICES * 16);
-                Matrix4::BulkTranspose(&tempMatrices4_[0].m00_, data, count / 16);
-                glUniformMatrix4fv(info->location_, count / 16, GL_FALSE, tempMatrices4_[0].Data());
+                glUniformMatrix4fv(info->location_, count / 16, GL_FALSE, data);
                 break;
             }
         }
@@ -1247,7 +1243,7 @@ void Graphics::SetShaderParameter(StringHash param, const Matrix3& matrix)
     {
         const ShaderParameter* info = shaderProgram_->GetParameter(param);
         if (info)
-            glUniformMatrix3fv(info->location_, 1, GL_FALSE, matrix.Transpose().Data());
+            glUniformMatrix3fv(info->location_, 1, GL_FALSE, matrix.Data());
     }
 }
 
@@ -1283,7 +1279,7 @@ void Graphics::SetShaderParameter(StringHash param, const Matrix4& matrix)
     {
         const ShaderParameter* info = shaderProgram_->GetParameter(param);
         if (info)
-            glUniformMatrix4fv(info->location_, 1, GL_FALSE, matrix.Transpose().Data());
+            glUniformMatrix4fv(info->location_, 1, GL_FALSE, matrix.Data());
     }
 }
 
@@ -1324,25 +1320,22 @@ void Graphics::SetShaderParameter(StringHash param, const Matrix3x4& matrix)
         const ShaderParameter* info = shaderProgram_->GetParameter(param);
         if (info)
         {
-            float data[16];
-            data[0] = matrix.m00_;
-            data[1] = matrix.m10_;
-            data[2] = matrix.m20_;
-            data[3] = 0.0f;
-            data[4] = matrix.m01_;
-            data[5] = matrix.m11_;
-            data[6] = matrix.m21_;
-            data[7] = 0.0f;
-            data[8] = matrix.m02_;
-            data[9] = matrix.m12_;
-            data[10] = matrix.m22_;
-            data[11] = 0.0f;
-            data[12] = matrix.m03_;
-            data[13] = matrix.m13_;
-            data[14] = matrix.m23_;
-            data[15] = 1.0f;
-
-            glUniformMatrix4fv(info->location_, 1, GL_FALSE, data);
+            // Expand to a full Matrix4
+            static Matrix4 fullMatrix;
+            fullMatrix.m00_ = matrix.m00_;
+            fullMatrix.m01_ = matrix.m01_;
+            fullMatrix.m02_ = matrix.m02_;
+            fullMatrix.m03_ = matrix.m03_;
+            fullMatrix.m10_ = matrix.m10_;
+            fullMatrix.m11_ = matrix.m11_;
+            fullMatrix.m12_ = matrix.m12_;
+            fullMatrix.m13_ = matrix.m13_;
+            fullMatrix.m20_ = matrix.m20_;
+            fullMatrix.m21_ = matrix.m21_;
+            fullMatrix.m22_ = matrix.m22_;
+            fullMatrix.m23_ = matrix.m23_;
+
+            glUniformMatrix4fv(info->location_, 1, GL_FALSE, fullMatrix.Data());
         }
     }
 }

+ 0 - 5
Source/Urho3D/Graphics/OpenGL/OGLGraphics.h

@@ -54,7 +54,6 @@ class VertexBuffer;
 typedef HashMap<Pair<ShaderVariation*, ShaderVariation*>, SharedPtr<ShaderProgram> > ShaderProgramMap;
 
 static const unsigned NUM_SCREEN_BUFFERS = 2;
-static const unsigned NUM_TEMP_MATRICES = 8;
 
 /// CPU-side scratch buffer for vertex data updates.
 struct ScratchBuffer
@@ -604,10 +603,6 @@ private:
     HashMap<int, SharedPtr<Texture2D> > depthTextures_;
     /// Remembered shader parameter sources.
     const void* shaderParameterSources_[MAX_SHADER_PARAMETER_GROUPS];
-    /// Temp matrices for transposing shader parameters.
-    Matrix3 tempMatrices3_[NUM_TEMP_MATRICES];
-    /// Temp matrices for transposing shader parameters.
-    Matrix4 tempMatrices4_[NUM_TEMP_MATRICES];
     /// Base directory for shaders.
     String shaderPath_;
     /// File extension for shaders.

+ 2 - 2
bin/CoreData/Shaders/GLSL/DeferredLight.glsl

@@ -78,11 +78,11 @@ void PS()
     #endif
 
     #if defined(SPOTLIGHT)
-        vec4 spotPos = cLightMatricesPS[0] * projWorldPos;
+        vec4 spotPos = projWorldPos * cLightMatricesPS[0];
         lightColor = spotPos.w > 0.0 ? texture2DProj(sLightSpotMap, spotPos).rgb * cLightColor.rgb : vec3(0.0);
     #elif defined(CUBEMASK)
         mat3 lightVecRot = mat3(cLightMatricesPS[0][0].xyz, cLightMatricesPS[0][1].xyz, cLightMatricesPS[0][2].xyz);
-        lightColor = textureCube(sLightCubeMap, lightVecRot * (worldPos - cLightPosPS.xyz)).rgb * cLightColor.rgb;
+        lightColor = textureCube(sLightCubeMap, (worldPos - cLightPosPS.xyz) * lightVecRot).rgb * cLightColor.rgb;
     #else
         lightColor = cLightColor.rgb;
     #endif

+ 7 - 7
bin/CoreData/Shaders/GLSL/Lighting.glsl

@@ -69,9 +69,9 @@ float GetVertexLightVolumetric(int index, vec3 worldPos)
 vec4 GetShadowPos(int index, vec4 projWorldPos)
 {
     #if defined(DIRLIGHT)
-        return cLightMatrices[index] * projWorldPos;
+        return projWorldPos * cLightMatrices[index];
     #elif defined(SPOTLIGHT)
-        return cLightMatrices[1] * projWorldPos;
+        return projWorldPos * cLightMatrices[1];
     #else
         return vec4(projWorldPos.xyz - cLightPos.xyz, 1.0);
     #endif
@@ -215,13 +215,13 @@ float GetDirShadowDeferred(vec4 projWorldPos, float depth)
     vec4 shadowPos;
 
     if (depth < cShadowSplits.x)
-        shadowPos = cLightMatricesPS[0] * projWorldPos;
+        shadowPos = projWorldPos * cLightMatricesPS[0];
     else if (depth < cShadowSplits.y)
-        shadowPos = cLightMatricesPS[1] * projWorldPos;
+        shadowPos = projWorldPos * cLightMatricesPS[1];
     else if (depth < cShadowSplits.z)
-        shadowPos = cLightMatricesPS[2] * projWorldPos;
+        shadowPos = projWorldPos * cLightMatricesPS[2];
     else
-        shadowPos = cLightMatricesPS[3] * projWorldPos;
+        shadowPos = projWorldPos * cLightMatricesPS[3];
 
     return GetDirShadowFade(GetShadow(shadowPos), depth);
 }
@@ -250,7 +250,7 @@ float GetShadowDeferred(vec4 projWorldPos, float depth)
     #if defined(DIRLIGHT)
         return GetDirShadowDeferred(projWorldPos, depth);
     #elif defined(SPOTLIGHT)
-        vec4 shadowPos = cLightMatricesPS[1] * projWorldPos;
+        vec4 shadowPos = projWorldPos * cLightMatricesPS[1];
         return GetShadow(shadowPos);
     #else
         vec3 shadowPos = projWorldPos.xyz - cLightPosPS.xyz;

+ 2 - 2
bin/CoreData/Shaders/GLSL/LitParticle.glsl

@@ -47,11 +47,11 @@ void VS()
 
         #ifdef SPOTLIGHT
             // Spotlight projection: transform from world space to projector texture coordinates
-            vSpotPos = cLightMatrices[0] * projWorldPos;
+            vSpotPos = projWorldPos * cLightMatrices[0];
         #endif
     
         #ifdef POINTLIGHT
-            vCubeMaskVec = mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz) * (worldPos - cLightPos.xyz);
+            vCubeMaskVec = (worldPos - cLightPos.xyz) * mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz);
         #endif
     #else
         // Ambient & per-vertex lighting

+ 2 - 2
bin/CoreData/Shaders/GLSL/LitSolid.glsl

@@ -63,11 +63,11 @@ void VS()
 
         #ifdef SPOTLIGHT
             // Spotlight projection: transform from world space to projector texture coordinates
-            vSpotPos = cLightMatrices[0] * projWorldPos;
+            vSpotPos =  projWorldPos * cLightMatrices[0];
         #endif
     
         #ifdef POINTLIGHT
-            vCubeMaskVec = mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz) * (worldPos - cLightPos.xyz);
+            vCubeMaskVec = (worldPos - cLightPos.xyz) * mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz);
         #endif
     #else
         // Ambient & per-vertex lighting

+ 2 - 2
bin/CoreData/Shaders/GLSL/PrepassLight.glsl

@@ -76,11 +76,11 @@ void PS()
     #endif
     
     #if defined(SPOTLIGHT)
-        vec4 spotPos = cLightMatricesPS[0] * projWorldPos;
+        vec4 spotPos = projWorldPos * cLightMatricesPS[0];
         lightColor = spotPos.w > 0.0 ? texture2DProj(sLightSpotMap, spotPos).rgb * cLightColor.rgb : vec3(0.0);
     #elif defined(CUBEMASK)
         mat3 lightVecRot = mat3(cLightMatricesPS[0][0].xyz, cLightMatricesPS[0][1].xyz, cLightMatricesPS[0][2].xyz);
-        lightColor = textureCube(sLightCubeMap, lightVecRot * (worldPos - cLightPosPS.xyz)).rgb * cLightColor.rgb;
+        lightColor = textureCube(sLightCubeMap, (worldPos - cLightPosPS.xyz) * lightVecRot).rgb * cLightColor.rgb;
     #else
         lightColor = cLightColor.rgb;
     #endif

+ 2 - 2
bin/CoreData/Shaders/GLSL/ScreenPos.glsl

@@ -36,7 +36,7 @@ vec3 GetFarRay(vec4 clipPos)
         clipPos.y / clipPos.w * cFrustumSize.y,
         cFrustumSize.z);
 
-    return cCameraRot * viewRay;
+    return viewRay * cCameraRot;
 }
 
 vec3 GetNearRay(vec4 clipPos)
@@ -46,6 +46,6 @@ vec3 GetNearRay(vec4 clipPos)
         clipPos.y / clipPos.w * cFrustumSize.y,
         0.0);
     
-    return (cCameraRot * viewRay) * cDepthMode.x;
+    return (viewRay * cCameraRot) * cDepthMode.x;
 }
 #endif

+ 2 - 2
bin/CoreData/Shaders/GLSL/TerrainBlend.glsl

@@ -59,11 +59,11 @@ void VS()
 
         #ifdef SPOTLIGHT
             // Spotlight projection: transform from world space to projector texture coordinates
-            vSpotPos = cLightMatrices[0] * projWorldPos;
+            vSpotPos = projWorldPos * cLightMatrices[0];
         #endif
     
         #ifdef POINTLIGHT
-            vCubeMaskVec = mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz) * (worldPos - cLightPos.xyz);
+            vCubeMaskVec = (worldPos - cLightPos.xyz) * mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz);
         #endif
     #else
         // Ambient & per-vertex lighting

+ 10 - 21
bin/CoreData/Shaders/GLSL/Transform.glsl

@@ -47,7 +47,7 @@ vec2 GetTexCoord(vec2 texCoord)
 
 vec4 GetClipPos(vec3 worldPos)
 {
-    vec4 ret = cViewProj * vec4(worldPos, 1.0);
+    vec4 ret = vec4(worldPos, 1.0) * cViewProj;
     // While getting the clip coordinate, also automatically set gl_ClipVertex for user clip planes
     #ifndef GL_ES
     gl_ClipVertex = ret;
@@ -57,7 +57,7 @@ vec4 GetClipPos(vec3 worldPos)
 
 float GetZonePos(vec3 worldPos)
 {
-    return clamp((cZone * vec4(worldPos, 1.0)).z, 0.0, 1.0);
+    return clamp((vec4(worldPos, 1.0) * cZone).z, 0.0, 1.0);
 }
 
 float GetDepth(vec4 clipPos)
@@ -67,16 +67,14 @@ float GetDepth(vec4 clipPos)
 
 vec3 GetBillboardPos(vec4 iPos, vec2 iSize, mat4 modelMatrix)
 {
-    return (modelMatrix * iPos).xyz + cBillboardRot * vec3(iSize.x, iSize.y, 0.0);
+    return (iPos * modelMatrix).xyz + vec3(iSize.x, iSize.y, 0.0) * cBillboardRot;
 }
 
 vec3 GetBillboardNormal()
 {
-    return vec3(-cBillboardRot[2][0], -cBillboardRot[2][1], -cBillboardRot[2][2]);
+    return vec3(-cBillboardRot[0][2], -cBillboardRot[1][2], -cBillboardRot[2][2]);
 }
 
-// Note: the skinning/instancing model matrix is a transpose, so the matrix multiply order must be swapped
-// (see GetWorldPos(), GetWorldNormal() and GetWorldTangent() below)
 #if defined(SKINNED)
     #define iModelMatrix GetSkinMatrix(iBlendWeights, iBlendIndices)
 #elif defined(INSTANCED)
@@ -87,33 +85,24 @@ vec3 GetBillboardNormal()
 
 vec3 GetWorldPos(mat4 modelMatrix)
 {
-    #if defined(SKINNED) || defined(INSTANCED)
-        return (iPos * modelMatrix).xyz;
-    #elif defined(BILLBOARD)
+    #if defined(BILLBOARD)
         return GetBillboardPos(iPos, iTexCoord2, modelMatrix);
     #else
-        return (modelMatrix * iPos).xyz;
+        return (iPos * modelMatrix).xyz;
     #endif
 }
 
 vec3 GetWorldNormal(mat4 modelMatrix)
 {
-    #if defined(SKINNED) || defined(INSTANCED)
-        return normalize(iNormal * GetNormalMatrix(modelMatrix));
-    #elif defined(BILLBOARD)
+    #if defined(BILLBOARD)
         return GetBillboardNormal();
     #else
-        return normalize(GetNormalMatrix(modelMatrix) * iNormal);
+        return normalize(iNormal * GetNormalMatrix(modelMatrix));
     #endif
 }
 
 vec3 GetWorldTangent(mat4 modelMatrix)
-{   
-    mat3 normalMatrix = GetNormalMatrix(modelMatrix);
-    #if defined(SKINNED) || defined(INSTANCED)
-        return normalize(iTangent.xyz * normalMatrix);
-    #else
-        return normalize(normalMatrix * iTangent.xyz);
-    #endif
+{
+    return normalize(iTangent.xyz * GetNormalMatrix(modelMatrix));
 }
 #endif

+ 3 - 3
bin/CoreData/Shaders/GLSL/Vegetation.glsl

@@ -41,7 +41,7 @@ void VS()
 {
     mat4 modelMatrix = iModelMatrix;
     vec3 worldPos = GetWorldPos(modelMatrix);
-    float height = worldPos.y - cModel[3][1];
+    float height = worldPos.y - cModel[1][3];
 
     float windStrength = max(height - cWindHeightPivot, 0.0) * cWindHeightFactor;
     float windPeriod = cElapsedTime * cWindPeriod + dot(worldPos.xz, cWindWorldSpacing);
@@ -73,11 +73,11 @@ void VS()
 
         #ifdef SPOTLIGHT
             // Spotlight projection: transform from world space to projector texture coordinates
-            vSpotPos = cLightMatrices[0] * projWorldPos;
+            vSpotPos = projWorldPos * cLightMatrices[0];
         #endif
     
         #ifdef POINTLIGHT
-            vCubeMaskVec = mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz) * (worldPos - cLightPos.xyz);
+            vCubeMaskVec = (worldPos - cLightPos.xyz) * mat3(cLightMatrices[0][0].xyz, cLightMatrices[0][1].xyz, cLightMatrices[0][2].xyz);
         #endif
     #else
         // Ambient & per-vertex lighting