Bladeren bron

Limit to three worker threads for more consistent performance.
Ensure the FPU is set up similarly on each thread to avoid for example small errors in shadow camera calculations.

Lasse Öörni 14 jaren geleden
bovenliggende
commit
57070cafc3

+ 42 - 0
Engine/Core/ProcessUtils.cpp

@@ -36,6 +36,34 @@
 #include <unistd.h>
 #endif
 
+#ifdef _MSC_VER
+#include <float.h>
+#else
+// From http://stereopsis.com/FPU.html
+
+#define FPU_CW_PREC_MASK        0x0300
+#define FPU_CW_PREC_SINGLE      0x0000
+#define FPU_CW_PREC_DOUBLE      0x0200
+#define FPU_CW_PREC_EXTENDED    0x0300
+#define FPU_CW_ROUND_MASK       0x0c00
+#define FPU_CW_ROUND_NEAR       0x0000
+#define FPU_CW_ROUND_DOWN       0x0400
+#define FPU_CW_ROUND_UP         0x0800
+#define FPU_CW_ROUND_CHOP       0x0c00
+
+inline unsigned GetFPUState()
+{
+    unsigned control = 0;
+    __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
+    return control;
+}
+
+inline void SetFPUState(unsigned control)
+{
+    __asm__ __volatile__ ("fldcw %0" : : "m" (control));
+}
+#endif
+
 #include "DebugNew.h"
 
 #ifdef WIN32
@@ -45,6 +73,20 @@ static String currentLine;
 static Vector<String> arguments;
 static Mutex staticMutex;
 
+void InitFPU()
+{
+    // Make sure FPU is in round-to-nearest, single precision mode
+    // This is needed for ODE to behave predictably in float mode
+    #ifdef _MSC_VER
+    _controlfp(_RC_NEAR | _PC_24, _MCW_RC | _MCW_PC);
+    #else
+    unsigned control = GetFPUState();
+    control &= ~(FPU_CW_PREC_MASK | FPU_CW_ROUND_MASK);
+    control |= (FPU_CW_PREC_SINGLE | FPU_CW_ROUND_NEAR);
+    SetFPUState(control);
+    #endif
+}
+
 void ErrorDialog(const char* title, const char* message)
 {
     #ifdef WIN32

+ 2 - 0
Engine/Core/ProcessUtils.h

@@ -27,6 +27,8 @@
 
 class Mutex;
 
+/// Initialize the FPU to round-to-nearest, single precision mode.
+void InitFPU();
 /// Display an error dialog with the specified title and message.
 void ErrorDialog(const char* title, const char* message);
 /// Exit the application with an error message to the console.

+ 45 - 29
Engine/Core/WorkQueue.cpp

@@ -22,6 +22,7 @@
 //
 
 #include "Precompiled.h"
+#include "ProcessUtils.h"
 #include "Thread.h"
 #include "Timer.h"
 #include "WorkQueue.h"
@@ -38,7 +39,12 @@ public:
     }
     
     /// Process work items until stopped.
-    virtual void ThreadFunction() { owner_->ProcessItems(index_); }
+    virtual void ThreadFunction()
+    {
+        // Init FPU state first
+        InitFPU();
+        owner_->ProcessItems(index_);
+    }
     
     /// Return thread index.
     unsigned GetIndex() const { return index_; }
@@ -56,6 +62,7 @@ WorkQueue::WorkQueue(Context* context) :
     Object(context),
     numActive_(0),
     shutDown_(false),
+    pausing_(false),
     paused_(false)
 {
 }
@@ -113,8 +120,12 @@ void WorkQueue::Pause()
 {
     if (!paused_)
     {
+        pausing_ = true;
+        
         queueMutex_.Acquire();
         paused_ = true;
+        
+        pausing_ = false;
     }
 }
 
@@ -134,7 +145,8 @@ void WorkQueue::Complete()
     {
         Resume();
         
-        for (;;)
+        // Take work items in the main thread until queue empty
+        while (!queue_.Empty())
         {
             queueMutex_.Acquire();
             if (!queue_.Empty())
@@ -145,17 +157,16 @@ void WorkQueue::Complete()
                 item.workFunction_(&item, 0);
             }
             else
-            {
-                if (numActive_)
-                    queueMutex_.Release();
-                else
-                {
-                    // All work items are done. Leave the mutex locked and re-enter pause mode
-                    paused_ = true;
-                    return;
-                }
-            }
+                queueMutex_.Release();
         }
+        
+        // Wait for all work to finish
+        while (!IsCompleted())
+        {
+        }
+        
+        // Pause worker threads by leaving the mutex locked
+        Pause();
     }
 }
 
@@ -176,28 +187,33 @@ void WorkQueue::ProcessItems(unsigned threadIndex)
         if (shutDown_)
             return;
         
-        queueMutex_.Acquire();
-        if (!queue_.Empty())
+        if (pausing_ && !wasActive)
+            Time::Sleep(0);
+        else
         {
-            if (!wasActive)
+            queueMutex_.Acquire();
+            if (!queue_.Empty())
             {
-                ++numActive_;
-                wasActive = true;
+                if (!wasActive)
+                {
+                    ++numActive_;
+                    wasActive = true;
+                }
+                WorkItem item = queue_.Front();
+                queue_.PopFront();
+                queueMutex_.Release();
+                item.workFunction_(&item, threadIndex);
             }
-            WorkItem item = queue_.Front();
-            queue_.PopFront();
-            queueMutex_.Release();
-            item.workFunction_(&item, threadIndex);
-        }
-        else
-        {
-            if (wasActive)
+            else
             {
-                --numActive_;
-                wasActive = false;
+                if (wasActive)
+                {
+                    --numActive_;
+                    wasActive = false;
+                }
+                queueMutex_.Release();
+                Time::Sleep(0);
             }
-            queueMutex_.Release();
-            Time::Sleep(0);
         }
     }
 }

+ 2 - 0
Engine/Core/WorkQueue.h

@@ -86,6 +86,8 @@ private:
     volatile unsigned numActive_;
     /// Shutting down flag.
     volatile bool shutDown_;
+    /// Pausing flag. Indicates the worker threads should not contend for the queue mutex.
+    bool pausing_;
     /// Paused flag. Indicates the queue mutex being locked to prevent worker threads using up CPU time.
     bool paused_;
 };

+ 5 - 2
Engine/Engine/Engine.cpp

@@ -176,11 +176,11 @@ bool Engine::Initialize(const String& windowTitle, const String& logName, const
     log->Open(logName);
     
     // Set amount of worker threads according to the free CPU cores. Leave one for the main thread and another for
-    // GPU & audio drivers, and clamp to a maximum of four for now
+    // GPU & audio drivers, and clamp to a maximum of three for now
     int numCores = GetNumCPUCores();
     if (threads && numCores > 1)
     {
-        int numThreads = Clamp(numCores - 2, 1, 4);
+        int numThreads = Clamp(numCores - 2, 1, 3);
         GetSubsystem<WorkQueue>()->CreateThreads(numThreads);
         
         String workerThreadString = "Created " + String(numThreads) + " worker thread";
@@ -233,6 +233,9 @@ bool Engine::Initialize(const String& windowTitle, const String& logName, const
             GetSubsystem<Audio>()->SetMode(buffer, mixRate, stereo, interpolate);
     }
     
+    // Init FPU state of main thread
+    InitFPU();
+    
     frameTimer_.Reset();
     
     initialized_ = true;

+ 1 - 39
Engine/Physics/PhysicsWorld.cpp

@@ -40,34 +40,6 @@
 #include <ode/ode.h>
 #include "Sort.h"
 
-#ifdef _MSC_VER
-#include <float.h>
-#else
-// From http://stereopsis.com/FPU.html
-
-#define FPU_CW_PREC_MASK        0x0300
-#define FPU_CW_PREC_SINGLE      0x0000
-#define FPU_CW_PREC_DOUBLE      0x0200
-#define FPU_CW_PREC_EXTENDED    0x0300
-#define FPU_CW_ROUND_MASK       0x0c00
-#define FPU_CW_ROUND_NEAR       0x0000
-#define FPU_CW_ROUND_DOWN       0x0400
-#define FPU_CW_ROUND_UP         0x0800
-#define FPU_CW_ROUND_CHOP       0x0c00
-
-inline unsigned GetFPUState()
-{
-    unsigned control = 0;
-    __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
-    return control;
-}
-
-inline void SetFPUState(unsigned control)
-{
-    __asm__ __volatile__ ("fldcw %0" : : "m" (control));
-}
-#endif
-
 #include "DebugNew.h"
 
 static const int DEFAULT_FPS = 60;
@@ -104,17 +76,7 @@ PhysicsWorld::PhysicsWorld(Context* context) :
         ++numInstances;
     }
     
-    // Make sure FPU is in round-to-nearest, single precision mode
-    // This is needed for ODE to behave predictably in float mode
-    #ifdef _MSC_VER
-    _controlfp(_RC_NEAR | _PC_24, _MCW_RC | _MCW_PC);
-    #else
-    unsigned control = GetFPUState();
-    control &= ~(FPU_CW_PREC_MASK | FPU_CW_ROUND_MASK);
-    control |= (FPU_CW_PREC_SINGLE | FPU_CW_ROUND_NEAR);
-    SetFPUState(control);
-    #endif
-        
+    
     // Create the world, the collision space, and contact joint group
     physicsWorld_ = dWorldCreate();
     space_ = dHashSpaceCreate(0);