Selaa lähdekoodia

Ported optimizations and code cleanup from the prepass-branch.

Lasse Öörni 14 vuotta sitten
vanhempi
sitoutus
da685e99de

+ 2 - 2
Engine/Graphics/Batch.h

@@ -256,6 +256,6 @@ struct LightBatchQueue
     BatchQueue litBatches_;
     /// Light volume draw calls, should be only one.
     PODVector<Batch> volumeBatches_;
-    /// Last split flag for clearing the stencil buffer.
-    bool lastSplit_;
+    /// First split flag for clearing the stencil buffer.
+    bool firstSplit_;
 };

+ 37 - 26
Engine/Graphics/Direct3D9/D3D9Graphics.cpp

@@ -164,7 +164,6 @@ Graphics::Graphics(Context* context) :
     vsync_(false),
     flushGPU_(true),
     deviceLost_(false),
-    queryIssued_(false),
     renderTargetSupport_(true),
     deferredSupport_(false),
     hardwareShadowSupport_(false),
@@ -172,6 +171,7 @@ Graphics::Graphics(Context* context) :
     streamOffsetSupport_(false),
     hasSM3_(false),
     forceSM2_(false),
+    queryIndex_(0),
     numPrimitives_(0),
     numBatches_(0),
     immediateBuffer_(0),
@@ -192,10 +192,13 @@ Graphics::~Graphics()
     
     vertexDeclarations_.Clear();
     
-    if (impl_->frameQuery_)
+    for (unsigned i = 0; i < NUM_QUERIES; ++i)
     {
-        impl_->frameQuery_->Release();
-        impl_->frameQuery_ = 0;
+        if (impl_->frameQueries_[i])
+        {
+            impl_->frameQueries_[i]->Release();
+            impl_->frameQueries_[i] = 0;
+        }
     }
     if (impl_->defaultColorSurface_)
     {
@@ -545,16 +548,6 @@ bool Graphics::BeginFrame()
     
     impl_->device_->BeginScene();
     
-    // If a query was issued on the previous frame, wait for it to finish before beginning the next
-    if (impl_->frameQuery_ && queryIssued_)
-    {
-        while (impl_->frameQuery_->GetData(0, 0, D3DGETDATA_FLUSH) == S_FALSE)
-        {
-        }
-        
-        queryIssued_ = false;
-    }
-    
     // Set default rendertarget and depth buffer
     ResetRenderTargets();
     viewTexture_ = 0;
@@ -587,16 +580,26 @@ void Graphics::EndFrame()
     
     SendEvent(E_ENDRENDERING);
     
-    // Optionally flush GPU buffer to avoid control lag or framerate fluctuations due to pre-render
-    if (impl_->frameQuery_ && flushGPU_)
+    impl_->device_->EndScene();
+    
+    // Issue a new GPU flush query now if necessary
+    if (flushGPU_ && impl_->frameQueries_[queryIndex_])
     {
-        impl_->frameQuery_->Issue(D3DISSUE_END);
-        queryIssued_ = true;
+        impl_->frameQueries_[queryIndex_]->Issue(D3DISSUE_END);
+        queryIssued_[queryIndex_] = true;
+        
+        ++queryIndex_;
+        if (queryIndex_ >= NUM_QUERIES)
+            queryIndex_ = 0;
     }
     
+    impl_->device_->Present(0, 0, 0, 0);
+    
+    // If a previous GPU flush query is in progress, wait for it to finish
+    if (queryIssued_[queryIndex_])
     {
-        impl_->device_->EndScene();
-        impl_->device_->Present(0, 0, 0, 0);
+        while (impl_->frameQueries_[queryIndex_]->GetData(0, 0, D3DGETDATA_FLUSH) == S_FALSE);
+        queryIssued_[queryIndex_] = false;
     }
 }
 
@@ -2225,10 +2228,13 @@ void Graphics::ResetDevice()
 
 void Graphics::OnDeviceLost()
 {
-    if (impl_->frameQuery_)
+    for (unsigned i = 0; i < NUM_QUERIES; ++i)
     {
-        impl_->frameQuery_->Release();
-        impl_->frameQuery_ = 0;
+        if (impl_->frameQueries_[i])
+        {
+            impl_->frameQueries_[i]->Release();
+            impl_->frameQueries_[i] = 0;
+        }
     }
     if (impl_->defaultColorSurface_)
     {
@@ -2249,8 +2255,12 @@ void Graphics::OnDeviceReset()
 {
     ResetCachedState();
     
-    // Create frame query
-    impl_->device_->CreateQuery(D3DQUERYTYPE_EVENT, &impl_->frameQuery_);
+    // Create frame queries for GPU buffer flushing
+    for (unsigned i = 0; i < NUM_QUERIES; ++i)
+        impl_->device_->CreateQuery(D3DQUERYTYPE_EVENT, &impl_->frameQueries_[i]);
+    
+    // In case AutoDepthStencil is not used, depth buffering must be enabled manually
+    impl_->device_->SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE);
     
     // Create deferred rendering buffers now
     CreateRenderTargets();
@@ -2325,7 +2335,8 @@ void Graphics::ResetCachedState()
     impl_->srcBlend_ = D3DBLEND_ONE;
     impl_->destBlend_ = D3DBLEND_ZERO;
     
-    queryIssued_ = false;
+    for (unsigned i = 0; i < NUM_QUERIES; ++i)
+        queryIssued_[i] = false;
 }
 
 void Graphics::SetTextureUnitMappings()

+ 12 - 9
Engine/Graphics/Direct3D9/D3D9Graphics.h

@@ -47,6 +47,7 @@ class VertexBuffer;
 class VertexDeclaration;
 
 static const int IMMEDIATE_BUFFER_DEFAULT_SIZE = 1024;
+static const unsigned NUM_QUERIES = 2;
 
 /// %Shader parameter definition.
 struct ShaderParameter
@@ -260,25 +261,25 @@ public:
     PODVector<int> GetMultiSampleLevels() const;
     /// Return vertex buffer by index.
     VertexBuffer* GetVertexBuffer(unsigned index) const;
-    /// Return index buffer.
+    /// Return current index buffer.
     IndexBuffer* GetIndexBuffer() const { return indexBuffer_; }
-    /// Return vertex declaration.
+    /// Return current vertex declaration.
     VertexDeclaration* GetVertexDeclaration() const { return vertexDeclaration_; }
-    /// Return vertex shader.
+    /// Return current vertex shader.
     ShaderVariation* GetVertexShader() const { return vertexShader_; }
-    /// Return pixel shader.
+    /// Return current pixel shader.
     ShaderVariation* GetPixelShader() const { return pixelShader_; }
     /// Return texture unit index by name.
     TextureUnit GetTextureUnit(const String& name);
-    /// Return texture by texture unit index.
+    /// Return current texture by texture unit index.
     Texture* GetTexture(unsigned index) const;
     /// Return deferred rendering destination render target.
     Texture* GetViewTexture() const { return viewTexture_; }
     /// Return default texture filtering mode.
     TextureFilterMode GetDefaultTextureFilterMode() const { return defaultTextureFilterMode_; }
-    /// Return render target by index.
+    /// Return current render target by index.
     RenderSurface* GetRenderTarget(unsigned index) const;
-    /// Return depth stencil buffer.
+    /// Return current depth stencil buffer.
     RenderSurface* GetDepthStencil() const { return depthStencil_; }
     /// Return the viewport coordinates.
     IntRect GetViewport() const { return viewport_; }
@@ -405,8 +406,6 @@ private:
     bool flushGPU_;
     /// Direct3D device lost flag.
     bool deviceLost_;
-    /// Query issued (used to flush the GPU command queue) flag.
-    bool queryIssued_;
     /// Texture render target support flag.
     bool renderTargetSupport_;
     /// Deferred rendering support flag.
@@ -421,6 +420,10 @@ private:
     bool hasSM3_;
     /// Force Shader Model 2 flag.
     bool forceSM2_;
+    /// Query issued (used to flush the GPU command queue) flag.
+    bool queryIssued_[NUM_QUERIES];
+    /// Current query index
+    unsigned queryIndex_;
     /// Number of primitives this frame.
     unsigned numPrimitives_;
     /// Number of batches this frame.

+ 3 - 1
Engine/Graphics/Direct3D9/D3D9GraphicsImpl.cpp

@@ -30,13 +30,15 @@ GraphicsImpl::GraphicsImpl() :
     interface_(0),
     device_(0),
     defaultColorSurface_(0),
-    frameQuery_(0),
     adapter_(D3DADAPTER_DEFAULT),
     deviceType_(D3DDEVTYPE_HAL),
     instance_(GetModuleHandle(0)),
     window_(0)
 {
     memset(&presentParams_, 0, sizeof presentParams_);
+    
+    for (unsigned i = 0; i < NUM_QUERIES; ++i)
+        frameQueries_[i] = 0;
 }
 
 bool GraphicsImpl::CheckFormatSupport(D3DFORMAT format, DWORD usage, D3DRESOURCETYPE type)

+ 1 - 1
Engine/Graphics/Direct3D9/D3D9GraphicsImpl.h

@@ -62,7 +62,7 @@ private:
     /// Default depth stencil surface.
     IDirect3DSurface9* defaultDepthStencilSurface_;
     /// Frame query for flushing the GPU command queue.
-    IDirect3DQuery9* frameQuery_;
+    IDirect3DQuery9* frameQueries_[NUM_QUERIES];
     /// Adapter number.
     DWORD adapter_;
     /// Device type.

+ 0 - 39
Engine/Graphics/OpenGL/OGLGraphics.cpp

@@ -54,34 +54,6 @@
 
 #include <stdio.h>
 
-#ifdef _MSC_VER
-#include <float.h>
-#else
-// From http://stereopsis.com/FPU.html
-
-#define FPU_CW_PREC_MASK        0x0300
-#define FPU_CW_PREC_SINGLE      0x0000
-#define FPU_CW_PREC_DOUBLE      0x0200
-#define FPU_CW_PREC_EXTENDED    0x0300
-#define FPU_CW_ROUND_MASK       0x0c00
-#define FPU_CW_ROUND_NEAR       0x0000
-#define FPU_CW_ROUND_DOWN       0x0400
-#define FPU_CW_ROUND_UP         0x0800
-#define FPU_CW_ROUND_CHOP       0x0c00
-
-inline unsigned GetFPUState()
-{
-    unsigned control = 0;
-    __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
-    return control;
-}
-
-inline void SetFPUState(unsigned control)
-{
-    __asm__ __volatile__ ("fldcw %0" : : "m" (control));
-}
-#endif
-
 #include "DebugNew.h"
 
 static const unsigned glCmpFunc[] =
@@ -277,17 +249,6 @@ bool Graphics::SetMode(RenderMode mode, int width, int height, bool fullscreen,
         // Set window close callback
         glfwSetWindowCloseCallback(CloseCallback);
         
-        // Mimic Direct3D way of setting FPU into round-to-nearest, single precision mode
-        // This is actually needed for ODE to behave predictably in float mode
-        #ifdef _MSC_VER
-        _controlfp(_RC_NEAR | _PC_24, _MCW_RC | _MCW_PC);
-        #else
-        unsigned control = GetFPUState();
-        control &= ~(FPU_CW_PREC_MASK | FPU_CW_ROUND_MASK);
-        control |= (FPU_CW_PREC_SINGLE | FPU_CW_ROUND_NEAR);
-        SetFPUState(control);
-        #endif
-        
         // Associate GLFW window with the execution context
         SetWindowContext(impl_->window_, context_);
     }

+ 0 - 139
Engine/Graphics/Renderer.cpp

@@ -1339,145 +1339,6 @@ Node* Renderer::CreateTempNode()
     return node;
 }
 
-void Renderer::SetupLightBatch(Batch& batch)
-{
-    Matrix3x4 view(batch.camera_->GetInverseWorldTransform());
-    
-    Light* light = batch.light_;
-    float lightExtent = light->GetVolumeExtent();
-    float lightViewDist = (light->GetWorldPosition() - batch.camera_->GetWorldPosition()).LengthFast();
-    
-    graphics_->SetAlphaTest(false);
-    graphics_->SetBlendMode(BLEND_ADD);
-    graphics_->SetDepthWrite(false);
-    
-    if (light->GetLightType() == LIGHT_DIRECTIONAL)
-    {
-        // Get projection without jitter offset to ensure the whole screen is filled
-        Matrix4 projection(batch.camera_->GetProjection(false));
-        
-        // If the light does not extend to the near plane, use a stencil test. Else just draw with depth fail
-        if (light->GetNearSplit() <= batch.camera_->GetNearClip())
-        {
-            graphics_->SetCullMode(CULL_NONE);
-            graphics_->SetDepthTest(CMP_GREATER);
-            graphics_->SetStencilTest(false);
-        }
-        else
-        {
-            Matrix3x4 nearTransform = light->GetDirLightTransform(*batch.camera_, true);
-            
-            // Set state for stencil rendering
-            graphics_->SetColorWrite(false);
-            graphics_->SetCullMode(CULL_NONE);
-            graphics_->SetDepthTest(CMP_LESSEQUAL);
-            graphics_->SetStencilTest(true, CMP_ALWAYS, OP_INCR, OP_KEEP, OP_KEEP, 1);
-            graphics_->SetShaders(stencilVS_, stencilPS_);
-            graphics_->SetShaderParameter(VSP_VIEWPROJ, projection);
-            graphics_->SetShaderParameter(VSP_MODEL, nearTransform);
-            graphics_->ClearTransformSources();
-            
-            // Draw to stencil
-            batch.geometry_->Draw(graphics_);
-            
-            // Re-enable color write, set test for rendering the actual light
-            graphics_->SetColorWrite(true);
-            graphics_->SetDepthTest(CMP_GREATER);
-            graphics_->SetStencilTest(true, CMP_EQUAL, OP_ZERO, OP_KEEP, OP_ZERO, 1);
-        }
-    }
-    else
-    {
-        Matrix4 projection(batch.camera_->GetProjection());
-        const Matrix3x4& model = light->GetVolumeTransform(*batch.camera_);
-        
-        if (light->GetLightType() == LIGHT_SPLITPOINT)
-        {
-            // Shadowed point light, split in 6 frustums: mask out overlapping pixels to prevent overlighting
-            // Check whether we should draw front or back faces
-            bool drawBackFaces = lightViewDist < (lightExtent + batch.camera_->GetNearClip());
-            graphics_->SetColorWrite(false);
-            graphics_->SetCullMode(drawBackFaces ? CULL_CCW : CULL_CW);
-            graphics_->SetDepthTest(drawBackFaces ? CMP_GREATER : CMP_LESS);
-            graphics_->SetStencilTest(true, CMP_EQUAL, OP_INCR, OP_KEEP, OP_KEEP, 0);
-            graphics_->SetShaders(stencilVS_, stencilPS_);
-            graphics_->SetShaderParameter(VSP_VIEWPROJ, projection * view);
-            graphics_->SetShaderParameter(VSP_MODEL, model);
-            
-            // Draw the other faces to stencil to mark where we should not draw
-            batch.geometry_->Draw(graphics_);
-            
-            graphics_->SetColorWrite(true);
-            graphics_->SetCullMode(drawBackFaces ? CULL_CW : CULL_CCW);
-            graphics_->SetStencilTest(true, CMP_EQUAL, OP_DECR, OP_DECR, OP_KEEP, 0);
-        }
-        else
-        {
-            // If light is close to near clip plane, we might be inside light volume
-            if (lightViewDist < (lightExtent + batch.camera_->GetNearClip()))
-            {
-                // In this case reverse cull mode & depth test and render back faces
-                graphics_->SetCullMode(CULL_CW);
-                graphics_->SetDepthTest(CMP_GREATER);
-                graphics_->SetStencilTest(false);
-            }
-            else
-            {
-                // If not too close to far clip plane, write the back faces to stencil for optimization,
-                // then render front faces. Else just render front faces.
-                if (lightViewDist < (batch.camera_->GetFarClip() - lightExtent))
-                {
-                    // Set state for stencil rendering
-                    graphics_->SetColorWrite(false);
-                    graphics_->SetCullMode(CULL_CW);
-                    graphics_->SetDepthTest(CMP_GREATER);
-                    graphics_->SetStencilTest(true, CMP_ALWAYS, OP_INCR, OP_KEEP, OP_KEEP, 1);
-                    graphics_->SetShaders(stencilVS_, stencilPS_);
-                    graphics_->SetShaderParameter(VSP_VIEWPROJ, projection * view);
-                    graphics_->SetShaderParameter(VSP_MODEL, model);
-                    
-                    // Draw to stencil
-                    batch.geometry_->Draw(graphics_);
-                    
-                    // Re-enable color write, set test for rendering the actual light
-                    graphics_->SetColorWrite(true);
-                    graphics_->SetStencilTest(true, CMP_EQUAL, OP_ZERO, OP_KEEP, OP_ZERO, 1);
-                    graphics_->SetCullMode(CULL_CCW);
-                    graphics_->SetDepthTest(CMP_LESS);
-                }
-                else
-                {
-                    graphics_->SetStencilTest(false);
-                    graphics_->SetCullMode(CULL_CCW);
-                    graphics_->SetDepthTest(CMP_LESS);
-                }
-            }
-        }
-    }
-}
-
-void Renderer::DrawFullScreenQuad(Camera& camera, ShaderVariation* vs, ShaderVariation* ps, bool nearQuad, const HashMap<StringHash, Vector4>& shaderParameters)
-{
-    Light quadDirLight(context_);
-    Matrix3x4 model(quadDirLight.GetDirLightTransform(camera, nearQuad));
-    
-    graphics_->SetCullMode(CULL_NONE);
-    graphics_->SetShaders(vs, ps);
-    graphics_->SetShaderParameter(VSP_MODEL, model);
-    // Get projection without jitter offset to ensure the whole screen is filled
-    graphics_->SetShaderParameter(VSP_VIEWPROJ, camera.GetProjection(false));
-    graphics_->ClearTransformSources();
-    
-    // Set global shader parameters as needed
-    for (HashMap<StringHash, Vector4>::ConstIterator i = shaderParameters.Begin(); i != shaderParameters.End(); ++i)
-    {
-        if (graphics_->NeedParameterUpdate(i->first_, &shaderParameters))
-            graphics_->SetShaderParameter(i->first_, i->second_);
-    }
-    
-    dirLightGeometry_->Draw(graphics_);
-}
-
 void Renderer::HandleScreenMode(StringHash eventType, VariantMap& eventData)
 {
     if (!initialized_)

+ 0 - 4
Engine/Graphics/Renderer.h

@@ -330,10 +330,6 @@ private:
     Light* CreateSplitLight(Light* original);
     /// Allocate a temporary scene node for attaching a split light or a shadow camera.
     Node* CreateTempNode();
-    /// %Set up a light volume rendering batch.
-    void SetupLightBatch(Batch& batch);
-    /// Draw a full screen quad (either near or far.)
-    void DrawFullScreenQuad(Camera& camera, ShaderVariation* vs, ShaderVariation* ps, bool nearQuad, const HashMap<StringHash, Vector4>& shaderParameters);
     /// Handle screen mode event.
     void HandleScreenMode(StringHash eventType, VariantMap& eventData);
     /// Handle render update event.

+ 193 - 70
Engine/Graphics/View.cpp

@@ -488,7 +488,7 @@ void View::GetBatches()
             // Prepare lit object + shadow caster queues for each split
             if (lightQueues_.Size() < lightQueueCount + splits)
                 lightQueues_.Resize(lightQueueCount + splits);
-            unsigned prevLightQueueCount = lightQueueCount;
+            bool firstSplitStored = false;
             
             for (unsigned j = 0; j < splits; ++j)
             {
@@ -498,7 +498,7 @@ void View::GetBatches()
                 lightQueue.shadowBatches_.Clear();
                 lightQueue.litBatches_.Clear();
                 lightQueue.volumeBatches_.Clear();
-                lightQueue.lastSplit_ = false;
+                lightQueue.firstSplit_ = !firstSplitStored;
                 
                 // Loop through shadow casters
                 Camera* shadowCamera = splitLight->GetShadowCamera();
@@ -577,14 +577,11 @@ void View::GetBatches()
                     if (storeLightQueue)
                     {
                         lightQueueIndex[splitLight] = lightQueueCount;
+                        firstSplitStored = true;
                         ++lightQueueCount;
                     }
                 }
             }
-            
-            // Mark the last split
-            if (lightQueueCount != prevLightQueueCount)
-                lightQueues_[lightQueueCount - 1].lastSplit_ = true;
         }
         
         // Resize the light queue vector now that final size is known
@@ -730,15 +727,7 @@ void View::RenderBatchesForward()
             graphics_->SetDepthStencil(depthStencil_);
             graphics_->SetViewport(screenRect_);
             
-            RenderForwardLightBatchQueue(queue.litBatches_, queue.light_);
-            
-            // Clear the stencil buffer after the last split
-            if (queue.lastSplit_)
-            {
-                LightType type = queue.light_->GetLightType();
-                if (type == LIGHT_SPLITPOINT || type == LIGHT_DIRECTIONAL)
-                    DrawSplitLightToStencil(*camera_, queue.light_, true);
-            }
+            RenderForwardLightBatchQueue(queue.litBatches_, queue.light_, queue.firstSplit_);
         }
     }
     
@@ -839,8 +828,8 @@ void View::RenderBatchesDeferred()
         graphics_->ResetRenderTarget(2);
         graphics_->SetRenderTarget(1, depthBuffer);
         
-        renderer_->DrawFullScreenQuad(*camera_, renderer_->GetVertexShader("GBufferFill"),
-            renderer_->GetPixelShader("GBufferFill"), false, shaderParameters_);
+        DrawFullScreenQuad(*camera_, renderer_->GetVertexShader("GBufferFill"), renderer_->GetPixelShader("GBufferFill"),
+            false, shaderParameters_);
         #endif
     }
     
@@ -869,8 +858,8 @@ void View::RenderBatchesDeferred()
         graphics_->SetStencilTest(true, CMP_ALWAYS, OP_ZERO, OP_KEEP, OP_KEEP);
         #endif
         
-        renderer_->DrawFullScreenQuad(*camera_, renderer_->GetVertexShader("Ambient"),
-            renderer_->GetPixelShader("Ambient"), false, shaderParameters_);
+        DrawFullScreenQuad(*camera_, renderer_->GetVertexShader("Ambient"), renderer_->GetPixelShader("Ambient"),
+            false, shaderParameters_);
         
         #ifdef USE_OPENGL
         graphics_->SetStencilTest(false);
@@ -902,13 +891,9 @@ void View::RenderBatchesDeferred()
                 
                 for (unsigned j = 0; j < queue.volumeBatches_.Size(); ++j)
                 {
-                    renderer_->SetupLightBatch(queue.volumeBatches_[j]);
+                    SetupLightBatch(queue.volumeBatches_[j], queue.firstSplit_);
                     queue.volumeBatches_[j].Draw(graphics_, shaderParameters_);
                 }
-                
-                // If was the last split of a split point light, clear the stencil by rendering the point light again
-                if (queue.lastSplit_ && queue.light_->GetLightType() == LIGHT_SPLITPOINT)
-                    DrawSplitLightToStencil(*camera_, queue.light_, true);
             }
         }
         
@@ -924,7 +909,7 @@ void View::RenderBatchesDeferred()
             
             for (unsigned i = 0; i < noShadowLightQueue_.sortedBatches_.Size(); ++i)
             {
-                renderer_->SetupLightBatch(*noShadowLightQueue_.sortedBatches_[i]);
+                SetupLightBatch(*noShadowLightQueue_.sortedBatches_[i], false);
                 noShadowLightQueue_.sortedBatches_[i]->Draw(graphics_, shaderParameters_);
             }
         }
@@ -982,7 +967,7 @@ void View::RenderBatchesDeferred()
         graphics_->SetDepthStencil(depthStencil_);
         graphics_->SetViewport(screenRect_);
         graphics_->SetTexture(TU_DIFFBUFFER, graphics_->GetScreenBuffer());
-        renderer_->DrawFullScreenQuad(*camera_, vs, ps, false, shaderParameters);
+        DrawFullScreenQuad(*camera_, vs, ps, false, shaderParameters);
     }
 }
 
@@ -1945,15 +1930,149 @@ void View::CalculateShaderParameters()
     shaderParameters_[PSP_FOGPARAMS] = fogParams;
 }
 
-void View::DrawSplitLightToStencil(Camera& camera, Light* light, bool clear)
+
+void View::SetupLightBatch(Batch& batch, bool firstSplit)
+{
+    Matrix3x4 view(batch.camera_->GetInverseWorldTransform());
+    
+    Light* light = batch.light_;
+    float lightExtent = light->GetVolumeExtent();
+    float lightViewDist = (light->GetWorldPosition() - batch.camera_->GetWorldPosition()).LengthFast();
+    
+    graphics_->SetAlphaTest(false);
+    graphics_->SetBlendMode(BLEND_ADD);
+    graphics_->SetDepthWrite(false);
+    
+    if (light->GetLightType() == LIGHT_DIRECTIONAL)
+    {
+        // Get projection without jitter offset to ensure the whole screen is filled
+        Matrix4 projection(batch.camera_->GetProjection(false));
+        
+        // If the light does not extend to the near plane, use a stencil test. Else just draw with depth fail
+        if (light->GetNearSplit() <= batch.camera_->GetNearClip())
+        {
+            graphics_->SetCullMode(CULL_NONE);
+            graphics_->SetDepthTest(CMP_GREATER);
+            graphics_->SetStencilTest(false);
+        }
+        else
+        {
+            Matrix3x4 nearTransform = light->GetDirLightTransform(*batch.camera_, true);
+            
+            // Set state for stencil rendering
+            graphics_->SetColorWrite(false);
+            graphics_->SetCullMode(CULL_NONE);
+            graphics_->SetDepthTest(CMP_LESSEQUAL);
+            graphics_->SetStencilTest(true, CMP_ALWAYS, OP_REF, OP_ZERO, OP_ZERO, 1);
+            graphics_->SetShaders(renderer_->stencilVS_, renderer_->stencilPS_);
+            graphics_->SetShaderParameter(VSP_VIEWPROJ, projection);
+            graphics_->SetShaderParameter(VSP_MODEL, nearTransform);
+            graphics_->ClearTransformSources();
+            
+            // Draw to stencil
+            batch.geometry_->Draw(graphics_);
+            
+            // Re-enable color write, set test for rendering the actual light
+            graphics_->SetColorWrite(true);
+            graphics_->SetDepthTest(CMP_GREATER);
+            graphics_->SetStencilTest(true, CMP_EQUAL, OP_KEEP, OP_KEEP, OP_KEEP, 1);
+        }
+    }
+    else
+    {
+        Matrix4 projection(batch.camera_->GetProjection());
+        const Matrix3x4& model = light->GetVolumeTransform(*batch.camera_);
+        
+        if (light->GetLightType() == LIGHT_SPLITPOINT)
+        {
+            // Shadowed point light, split in 6 frustums: mask out overlapping pixels to prevent overlighting
+            // If it is the first split, zero the stencil with a scissored clear operation
+            if (firstSplit)
+            {
+                OptimizeLightByScissor(light->GetOriginalLight());
+                graphics_->Clear(CLEAR_STENCIL);
+                graphics_->SetScissorTest(false);
+            }
+            
+            // Check whether we should draw front or back faces
+            bool drawBackFaces = lightViewDist < (lightExtent + batch.camera_->GetNearClip());
+            graphics_->SetColorWrite(false);
+            graphics_->SetCullMode(drawBackFaces ? CULL_CCW : CULL_CW);
+            graphics_->SetDepthTest(drawBackFaces ? CMP_GREATER : CMP_LESS);
+            graphics_->SetStencilTest(true, CMP_EQUAL, OP_INCR, OP_KEEP, OP_KEEP, 0);
+            graphics_->SetShaders(renderer_->stencilVS_, renderer_->stencilPS_);
+            graphics_->SetShaderParameter(VSP_VIEWPROJ, projection * view);
+            graphics_->SetShaderParameter(VSP_MODEL, model);
+            
+            // Draw the other faces to stencil to mark where we should not draw
+            batch.geometry_->Draw(graphics_);
+            
+            graphics_->SetColorWrite(true);
+            graphics_->SetCullMode(drawBackFaces ? CULL_CW : CULL_CCW);
+            graphics_->SetStencilTest(true, CMP_EQUAL, OP_DECR, OP_DECR, OP_KEEP, 0);
+        }
+        else
+        {
+            // If light is close to near clip plane, we might be inside light volume
+            if (lightViewDist < (lightExtent + batch.camera_->GetNearClip()))
+            {
+                // In this case reverse cull mode & depth test and render back faces
+                graphics_->SetCullMode(CULL_CW);
+                graphics_->SetDepthTest(CMP_GREATER);
+                graphics_->SetStencilTest(false);
+            }
+            else
+            {
+                // If not too close to far clip plane, write the back faces to stencil for optimization,
+                // then render front faces. Else just render front faces.
+                if (lightViewDist < (batch.camera_->GetFarClip() - lightExtent))
+                {
+                    // Set state for stencil rendering
+                    graphics_->SetColorWrite(false);
+                    graphics_->SetCullMode(CULL_CW);
+                    graphics_->SetDepthTest(CMP_GREATER);
+                    graphics_->SetStencilTest(true, CMP_ALWAYS, OP_REF, OP_ZERO, OP_ZERO, 1);
+                    graphics_->SetShaders(renderer_->stencilVS_, renderer_->stencilPS_);
+                    graphics_->SetShaderParameter(VSP_VIEWPROJ, projection * view);
+                    graphics_->SetShaderParameter(VSP_MODEL, model);
+                    
+                    // Draw to stencil
+                    batch.geometry_->Draw(graphics_);
+                    
+                    // Re-enable color write, set test for rendering the actual light
+                    graphics_->SetColorWrite(true);
+                    graphics_->SetStencilTest(true, CMP_EQUAL, OP_KEEP, OP_KEEP, OP_KEEP, 1);
+                    graphics_->SetCullMode(CULL_CCW);
+                    graphics_->SetDepthTest(CMP_LESS);
+                }
+                else
+                {
+                    graphics_->SetStencilTest(false);
+                    graphics_->SetCullMode(CULL_CCW);
+                    graphics_->SetDepthTest(CMP_LESS);
+                }
+            }
+        }
+    }
+}
+
+void View::DrawSplitLightToStencil(Camera& camera, Light* light, bool firstSplit)
 {
     Matrix3x4 view(camera.GetInverseWorldTransform());
     
     switch (light->GetLightType())
     {
     case LIGHT_SPLITPOINT:
-        if (!clear)
         {
+            // Shadowed point light, split in 6 frustums: mask out overlapping pixels to prevent overlighting
+            // If it is the first split, zero the stencil with a scissored clear operation
+            if (firstSplit)
+            {
+                OptimizeLightByScissor(light->GetOriginalLight());
+                graphics_->Clear(CLEAR_STENCIL);
+                graphics_->SetScissorTest(false);
+            }
+            
             Matrix4 projection(camera.GetProjection());
             const Matrix3x4& model = light->GetVolumeTransform(camera);
             float lightExtent = light->GetVolumeExtent();
@@ -1983,14 +2102,6 @@ void View::DrawSplitLightToStencil(Camera& camera, Light* light, bool clear)
             graphics_->SetStencilTest(true, CMP_EQUAL, OP_INCR, OP_KEEP, OP_KEEP, 1);
             graphics_->SetColorWrite(true);
         }
-        else
-        {
-            // Clear stencil with a scissored clear operation
-            OptimizeLightByScissor(light->GetOriginalLight());
-            graphics_->Clear(CLEAR_STENCIL);
-            graphics_->SetScissorTest(false);
-            graphics_->SetStencilTest(false);
-        }
         break;
         
     case LIGHT_DIRECTIONAL:
@@ -2002,43 +2113,55 @@ void View::DrawSplitLightToStencil(Camera& camera, Light* light, bool clear)
         }
         else
         {
-            if (!clear)
-            {
-                // Get projection without jitter offset to ensure the whole screen is filled
-                Matrix4 projection(camera.GetProjection(false));
-                Matrix3x4 nearTransform(light->GetDirLightTransform(camera, true));
-                Matrix3x4 farTransform(light->GetDirLightTransform(camera, false));
-                
-                graphics_->SetAlphaTest(false);
-                graphics_->SetColorWrite(false);
-                graphics_->SetDepthWrite(false);
-                graphics_->SetCullMode(CULL_NONE);
-                
-                // If the split begins at the near plane (first split), draw at split far plane, otherwise at near plane
-                bool firstSplit = light->GetNearSplit() <= camera.GetNearClip();
-                graphics_->SetDepthTest(firstSplit ? CMP_GREATER : CMP_LESS);
-                graphics_->SetShaders(renderer_->stencilVS_, renderer_->stencilPS_);
-                graphics_->SetShaderParameter(VSP_MODEL, firstSplit ? farTransform : nearTransform);
-                graphics_->SetShaderParameter(VSP_VIEWPROJ, projection);
-                graphics_->SetStencilTest(true, CMP_ALWAYS, OP_REF, OP_ZERO, OP_ZERO, 1);
-                graphics_->ClearTransformSources();
-                
-                renderer_->dirLightGeometry_->Draw(graphics_);
-                graphics_->SetColorWrite(true);
-                graphics_->SetStencilTest(true, CMP_EQUAL, OP_KEEP, OP_KEEP, OP_KEEP, 1);
-            }
-            else
-            {
-                // Clear the whole stencil
-                graphics_->SetScissorTest(false);
-                graphics_->Clear(CLEAR_STENCIL);
-                graphics_->SetStencilTest(false);
-            }
+            // Get projection without jitter offset to ensure the whole screen is filled
+            Matrix4 projection(camera.GetProjection(false));
+            Matrix3x4 nearTransform(light->GetDirLightTransform(camera, true));
+            Matrix3x4 farTransform(light->GetDirLightTransform(camera, false));
+            
+            graphics_->SetAlphaTest(false);
+            graphics_->SetColorWrite(false);
+            graphics_->SetDepthWrite(false);
+            graphics_->SetCullMode(CULL_NONE);
+            
+            // If the split begins at the near plane (first split), draw at split far plane, otherwise at near plane
+            bool nearPlaneSplit = light->GetNearSplit() <= camera.GetNearClip();
+            graphics_->SetDepthTest(nearPlaneSplit ? CMP_GREATER : CMP_LESS);
+            graphics_->SetShaders(renderer_->stencilVS_, renderer_->stencilPS_);
+            graphics_->SetShaderParameter(VSP_MODEL, nearPlaneSplit ? farTransform : nearTransform);
+            graphics_->SetShaderParameter(VSP_VIEWPROJ, projection);
+            graphics_->SetStencilTest(true, CMP_ALWAYS, OP_REF, OP_ZERO, OP_ZERO, 1);
+            graphics_->ClearTransformSources();
+            
+            renderer_->dirLightGeometry_->Draw(graphics_);
+            graphics_->SetColorWrite(true);
+            graphics_->SetStencilTest(true, CMP_EQUAL, OP_KEEP, OP_KEEP, OP_KEEP, 1);
         }
         break;
     }
 }
 
+void View::DrawFullScreenQuad(Camera& camera, ShaderVariation* vs, ShaderVariation* ps, bool nearQuad, const HashMap<StringHash, Vector4>& shaderParameters)
+{
+    Light quadDirLight(context_);
+    Matrix3x4 model(quadDirLight.GetDirLightTransform(camera, nearQuad));
+    
+    graphics_->SetCullMode(CULL_NONE);
+    graphics_->SetShaders(vs, ps);
+    graphics_->SetShaderParameter(VSP_MODEL, model);
+    // Get projection without jitter offset to ensure the whole screen is filled
+    graphics_->SetShaderParameter(VSP_VIEWPROJ, camera.GetProjection(false));
+    graphics_->ClearTransformSources();
+    
+    // Set global shader parameters as needed
+    for (HashMap<StringHash, Vector4>::ConstIterator i = shaderParameters.Begin(); i != shaderParameters.End(); ++i)
+    {
+        if (graphics_->NeedParameterUpdate(i->first_, &shaderParameters))
+            graphics_->SetShaderParameter(i->first_, i->second_);
+    }
+    
+    renderer_->dirLightGeometry_->Draw(graphics_);
+}
+
 void View::RenderBatchQueue(const BatchQueue& queue, bool useScissor, bool disableScissor)
 {
     VertexBuffer* instancingBuffer = 0;
@@ -2087,7 +2210,7 @@ void View::RenderBatchQueue(const BatchQueue& queue, bool useScissor, bool disab
     }
 }
 
-void View::RenderForwardLightBatchQueue(const BatchQueue& queue, Light* light)
+void View::RenderForwardLightBatchQueue(const BatchQueue& queue, Light* light, bool firstSplit)
 {
     VertexBuffer* instancingBuffer = 0;
     if (renderer_->GetDynamicInstancing())
@@ -2116,7 +2239,7 @@ void View::RenderForwardLightBatchQueue(const BatchQueue& queue, Light* light)
         OptimizeLightByScissor(light);
         LightType type = light->GetLightType();
         if (type == LIGHT_SPLITPOINT || type == LIGHT_DIRECTIONAL)
-            DrawSplitLightToStencil(*camera_, light);
+            DrawSplitLightToStencil(*camera_, light, firstSplit);
     }
     
     // Non-priority instanced

+ 6 - 2
Engine/Graphics/View.h

@@ -175,12 +175,16 @@ private:
     void PrepareInstancingBuffer();
     /// Calculate view-global shader parameters.
     void CalculateShaderParameters();
+    /// %Set up a light volume rendering batch.
+    void SetupLightBatch(Batch& batch, bool firstSplit);
     /// Draw a split light to stencil buffer.
-    void DrawSplitLightToStencil(Camera& camera, Light* light, bool clear = false);
+    void DrawSplitLightToStencil(Camera& camera, Light* light, bool firstSplit);
+    /// Draw a full screen quad (either near or far.)
+    void DrawFullScreenQuad(Camera& camera, ShaderVariation* vs, ShaderVariation* ps, bool nearQuad, const HashMap<StringHash, Vector4>& shaderParameters);
     /// Draw everything in a batch queue, priority batches first.
     void RenderBatchQueue(const BatchQueue& queue, bool useScissor = false, bool disableScissor = true);
     /// Draw a forward (shadowed) light batch queue.
-    void RenderForwardLightBatchQueue(const BatchQueue& queue, Light* forwardQueueLight);
+    void RenderForwardLightBatchQueue(const BatchQueue& queue, Light* forwardQueueLight, bool firstSplit);
     /// Render a shadow map.
     void RenderShadowMap(const LightBatchQueue& queue);
     

+ 39 - 0
Engine/Physics/PhysicsWorld.cpp

@@ -40,6 +40,34 @@
 #include <ode/ode.h>
 #include "Sort.h"
 
+#ifdef _MSC_VER
+#include <float.h>
+#else
+// From http://stereopsis.com/FPU.html
+
+#define FPU_CW_PREC_MASK        0x0300
+#define FPU_CW_PREC_SINGLE      0x0000
+#define FPU_CW_PREC_DOUBLE      0x0200
+#define FPU_CW_PREC_EXTENDED    0x0300
+#define FPU_CW_ROUND_MASK       0x0c00
+#define FPU_CW_ROUND_NEAR       0x0000
+#define FPU_CW_ROUND_DOWN       0x0400
+#define FPU_CW_ROUND_UP         0x0800
+#define FPU_CW_ROUND_CHOP       0x0c00
+
+inline unsigned GetFPUState()
+{
+    unsigned control = 0;
+    __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
+    return control;
+}
+
+inline void SetFPUState(unsigned control)
+{
+    __asm__ __volatile__ ("fldcw %0" : : "m" (control));
+}
+#endif
+
 #include "DebugNew.h"
 
 static const int DEFAULT_FPS = 60;
@@ -76,6 +104,17 @@ PhysicsWorld::PhysicsWorld(Context* context) :
         ++numInstances;
     }
     
+    // Make sure FPU is in round-to-nearest, single precision mode
+    // This is needed for ODE to behave predictably in float mode
+    #ifdef _MSC_VER
+    _controlfp(_RC_NEAR | _PC_24, _MCW_RC | _MCW_PC);
+    #else
+    unsigned control = GetFPUState();
+    control &= ~(FPU_CW_PREC_MASK | FPU_CW_ROUND_MASK);
+    control |= (FPU_CW_PREC_SINGLE | FPU_CW_ROUND_NEAR);
+    SetFPUState(control);
+    #endif
+        
     // Create the world, the collision space, and contact joint group
     physicsWorld_ = dWorldCreate();
     space_ = dHashSpaceCreate(0);

+ 1 - 4
SourceAssets/HLSLShaders/Forward.xml

@@ -1,10 +1,7 @@
 <shaders>
     <shader name="Forward" type="vs">
         <option name="Normal" define="NORMALMAP" />
-        <option name="VCol" define="VERTEXCOLOR">
-            <exclude name="Normal" />
-            <exclude name="Shadow" />
-        </option>
+        <option name="VCol" define="VERTEXCOLOR" />
         <option name="Spot" define="SPOTLIGHT" />
         <option name="Shadow" define="SHADOW" />
         <variation name="" />