Browse Source

Limit to three worker threads for more consistent performance.
Ensure the FPU is set up similarly on each thread to avoid for example small errors in shadow camera calculations.

Lasse Öörni 14 years ago
parent
commit
57070cafc3

+ 42 - 0
Engine/Core/ProcessUtils.cpp

@@ -36,6 +36,34 @@
 #include <unistd.h>
 #include <unistd.h>
 #endif
 #endif
 
 
+#ifdef _MSC_VER
+#include <float.h>
+#else
+// From http://stereopsis.com/FPU.html
+
+#define FPU_CW_PREC_MASK        0x0300
+#define FPU_CW_PREC_SINGLE      0x0000
+#define FPU_CW_PREC_DOUBLE      0x0200
+#define FPU_CW_PREC_EXTENDED    0x0300
+#define FPU_CW_ROUND_MASK       0x0c00
+#define FPU_CW_ROUND_NEAR       0x0000
+#define FPU_CW_ROUND_DOWN       0x0400
+#define FPU_CW_ROUND_UP         0x0800
+#define FPU_CW_ROUND_CHOP       0x0c00
+
+inline unsigned GetFPUState()
+{
+    unsigned control = 0;
+    __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
+    return control;
+}
+
+inline void SetFPUState(unsigned control)
+{
+    __asm__ __volatile__ ("fldcw %0" : : "m" (control));
+}
+#endif
+
 #include "DebugNew.h"
 #include "DebugNew.h"
 
 
 #ifdef WIN32
 #ifdef WIN32
@@ -45,6 +73,20 @@ static String currentLine;
 static Vector<String> arguments;
 static Vector<String> arguments;
 static Mutex staticMutex;
 static Mutex staticMutex;
 
 
+void InitFPU()
+{
+    // Make sure FPU is in round-to-nearest, single precision mode
+    // This is needed for ODE to behave predictably in float mode
+    #ifdef _MSC_VER
+    _controlfp(_RC_NEAR | _PC_24, _MCW_RC | _MCW_PC);
+    #else
+    unsigned control = GetFPUState();
+    control &= ~(FPU_CW_PREC_MASK | FPU_CW_ROUND_MASK);
+    control |= (FPU_CW_PREC_SINGLE | FPU_CW_ROUND_NEAR);
+    SetFPUState(control);
+    #endif
+}
+
 void ErrorDialog(const char* title, const char* message)
 void ErrorDialog(const char* title, const char* message)
 {
 {
     #ifdef WIN32
     #ifdef WIN32

+ 2 - 0
Engine/Core/ProcessUtils.h

@@ -27,6 +27,8 @@
 
 
 class Mutex;
 class Mutex;
 
 
+/// Initialize the FPU to round-to-nearest, single precision mode.
+void InitFPU();
 /// Display an error dialog with the specified title and message.
 /// Display an error dialog with the specified title and message.
 void ErrorDialog(const char* title, const char* message);
 void ErrorDialog(const char* title, const char* message);
 /// Exit the application with an error message to the console.
 /// Exit the application with an error message to the console.

+ 45 - 29
Engine/Core/WorkQueue.cpp

@@ -22,6 +22,7 @@
 //
 //
 
 
 #include "Precompiled.h"
 #include "Precompiled.h"
+#include "ProcessUtils.h"
 #include "Thread.h"
 #include "Thread.h"
 #include "Timer.h"
 #include "Timer.h"
 #include "WorkQueue.h"
 #include "WorkQueue.h"
@@ -38,7 +39,12 @@ public:
     }
     }
     
     
     /// Process work items until stopped.
     /// Process work items until stopped.
-    virtual void ThreadFunction() { owner_->ProcessItems(index_); }
+    virtual void ThreadFunction()
+    {
+        // Init FPU state first
+        InitFPU();
+        owner_->ProcessItems(index_);
+    }
     
     
     /// Return thread index.
     /// Return thread index.
     unsigned GetIndex() const { return index_; }
     unsigned GetIndex() const { return index_; }
@@ -56,6 +62,7 @@ WorkQueue::WorkQueue(Context* context) :
     Object(context),
     Object(context),
     numActive_(0),
     numActive_(0),
     shutDown_(false),
     shutDown_(false),
+    pausing_(false),
     paused_(false)
     paused_(false)
 {
 {
 }
 }
@@ -113,8 +120,12 @@ void WorkQueue::Pause()
 {
 {
     if (!paused_)
     if (!paused_)
     {
     {
+        pausing_ = true;
+        
         queueMutex_.Acquire();
         queueMutex_.Acquire();
         paused_ = true;
         paused_ = true;
+        
+        pausing_ = false;
     }
     }
 }
 }
 
 
@@ -134,7 +145,8 @@ void WorkQueue::Complete()
     {
     {
         Resume();
         Resume();
         
         
-        for (;;)
+        // Take work items in the main thread until queue empty
+        while (!queue_.Empty())
         {
         {
             queueMutex_.Acquire();
             queueMutex_.Acquire();
             if (!queue_.Empty())
             if (!queue_.Empty())
@@ -145,17 +157,16 @@ void WorkQueue::Complete()
                 item.workFunction_(&item, 0);
                 item.workFunction_(&item, 0);
             }
             }
             else
             else
-            {
-                if (numActive_)
-                    queueMutex_.Release();
-                else
-                {
-                    // All work items are done. Leave the mutex locked and re-enter pause mode
-                    paused_ = true;
-                    return;
-                }
-            }
+                queueMutex_.Release();
         }
         }
+        
+        // Wait for all work to finish
+        while (!IsCompleted())
+        {
+        }
+        
+        // Pause worker threads by leaving the mutex locked
+        Pause();
     }
     }
 }
 }
 
 
@@ -176,28 +187,33 @@ void WorkQueue::ProcessItems(unsigned threadIndex)
         if (shutDown_)
         if (shutDown_)
             return;
             return;
         
         
-        queueMutex_.Acquire();
-        if (!queue_.Empty())
+        if (pausing_ && !wasActive)
+            Time::Sleep(0);
+        else
         {
         {
-            if (!wasActive)
+            queueMutex_.Acquire();
+            if (!queue_.Empty())
             {
             {
-                ++numActive_;
-                wasActive = true;
+                if (!wasActive)
+                {
+                    ++numActive_;
+                    wasActive = true;
+                }
+                WorkItem item = queue_.Front();
+                queue_.PopFront();
+                queueMutex_.Release();
+                item.workFunction_(&item, threadIndex);
             }
             }
-            WorkItem item = queue_.Front();
-            queue_.PopFront();
-            queueMutex_.Release();
-            item.workFunction_(&item, threadIndex);
-        }
-        else
-        {
-            if (wasActive)
+            else
             {
             {
-                --numActive_;
-                wasActive = false;
+                if (wasActive)
+                {
+                    --numActive_;
+                    wasActive = false;
+                }
+                queueMutex_.Release();
+                Time::Sleep(0);
             }
             }
-            queueMutex_.Release();
-            Time::Sleep(0);
         }
         }
     }
     }
 }
 }

+ 2 - 0
Engine/Core/WorkQueue.h

@@ -86,6 +86,8 @@ private:
     volatile unsigned numActive_;
     volatile unsigned numActive_;
     /// Shutting down flag.
     /// Shutting down flag.
     volatile bool shutDown_;
     volatile bool shutDown_;
+    /// Pausing flag. Indicates the worker threads should not contend for the queue mutex.
+    bool pausing_;
     /// Paused flag. Indicates the queue mutex being locked to prevent worker threads using up CPU time.
     /// Paused flag. Indicates the queue mutex being locked to prevent worker threads using up CPU time.
     bool paused_;
     bool paused_;
 };
 };

+ 5 - 2
Engine/Engine/Engine.cpp

@@ -176,11 +176,11 @@ bool Engine::Initialize(const String& windowTitle, const String& logName, const
     log->Open(logName);
     log->Open(logName);
     
     
     // Set amount of worker threads according to the free CPU cores. Leave one for the main thread and another for
     // Set amount of worker threads according to the free CPU cores. Leave one for the main thread and another for
-    // GPU & audio drivers, and clamp to a maximum of four for now
+    // GPU & audio drivers, and clamp to a maximum of three for now
     int numCores = GetNumCPUCores();
     int numCores = GetNumCPUCores();
     if (threads && numCores > 1)
     if (threads && numCores > 1)
     {
     {
-        int numThreads = Clamp(numCores - 2, 1, 4);
+        int numThreads = Clamp(numCores - 2, 1, 3);
         GetSubsystem<WorkQueue>()->CreateThreads(numThreads);
         GetSubsystem<WorkQueue>()->CreateThreads(numThreads);
         
         
         String workerThreadString = "Created " + String(numThreads) + " worker thread";
         String workerThreadString = "Created " + String(numThreads) + " worker thread";
@@ -233,6 +233,9 @@ bool Engine::Initialize(const String& windowTitle, const String& logName, const
             GetSubsystem<Audio>()->SetMode(buffer, mixRate, stereo, interpolate);
             GetSubsystem<Audio>()->SetMode(buffer, mixRate, stereo, interpolate);
     }
     }
     
     
+    // Init FPU state of main thread
+    InitFPU();
+    
     frameTimer_.Reset();
     frameTimer_.Reset();
     
     
     initialized_ = true;
     initialized_ = true;

+ 1 - 39
Engine/Physics/PhysicsWorld.cpp

@@ -40,34 +40,6 @@
 #include <ode/ode.h>
 #include <ode/ode.h>
 #include "Sort.h"
 #include "Sort.h"
 
 
-#ifdef _MSC_VER
-#include <float.h>
-#else
-// From http://stereopsis.com/FPU.html
-
-#define FPU_CW_PREC_MASK        0x0300
-#define FPU_CW_PREC_SINGLE      0x0000
-#define FPU_CW_PREC_DOUBLE      0x0200
-#define FPU_CW_PREC_EXTENDED    0x0300
-#define FPU_CW_ROUND_MASK       0x0c00
-#define FPU_CW_ROUND_NEAR       0x0000
-#define FPU_CW_ROUND_DOWN       0x0400
-#define FPU_CW_ROUND_UP         0x0800
-#define FPU_CW_ROUND_CHOP       0x0c00
-
-inline unsigned GetFPUState()
-{
-    unsigned control = 0;
-    __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
-    return control;
-}
-
-inline void SetFPUState(unsigned control)
-{
-    __asm__ __volatile__ ("fldcw %0" : : "m" (control));
-}
-#endif
-
 #include "DebugNew.h"
 #include "DebugNew.h"
 
 
 static const int DEFAULT_FPS = 60;
 static const int DEFAULT_FPS = 60;
@@ -104,17 +76,7 @@ PhysicsWorld::PhysicsWorld(Context* context) :
         ++numInstances;
         ++numInstances;
     }
     }
     
     
-    // Make sure FPU is in round-to-nearest, single precision mode
-    // This is needed for ODE to behave predictably in float mode
-    #ifdef _MSC_VER
-    _controlfp(_RC_NEAR | _PC_24, _MCW_RC | _MCW_PC);
-    #else
-    unsigned control = GetFPUState();
-    control &= ~(FPU_CW_PREC_MASK | FPU_CW_ROUND_MASK);
-    control |= (FPU_CW_PREC_SINGLE | FPU_CW_ROUND_NEAR);
-    SetFPUState(control);
-    #endif
-        
+    
     // Create the world, the collision space, and contact joint group
     // Create the world, the collision space, and contact joint group
     physicsWorld_ = dWorldCreate();
     physicsWorld_ = dWorldCreate();
     space_ = dHashSpaceCreate(0);
     space_ = dHashSpaceCreate(0);