Browse Source

Fixed hyperthreading detection again.
Send frame update events in Engine::Update(), not in Timer::BeginFrame().

Lasse Öörni 14 years ago
parent
commit
b1ba025970

+ 1 - 1
Docs/Reference.dox

@@ -114,7 +114,7 @@ Events can also be unsubscribed from. See \ref Object::UnsubscribeFromEvent "Uns
 
 \page MainLoop Main loop and frame update
 
-The main loop iteration (also called a frame) is driven by the Engine. In contrast it is the program's (for example Urho3D.exe) responsibility to continuously loop this iteration. The iteration consists of the Engine calling the Time subsystem's \ref Time::BeginFrame "BeginFrame()" and \ref Time::EndFrame "EndFrame()" functions, which causes several events to be sent:
+The main loop iteration (also called a frame) is driven by the Engine. In contrast it is the program's (for example Urho3D.exe) responsibility to continuously loop this iteration. The iteration consists of the Engine calling the Time subsystem's \ref Time::BeginFrame "BeginFrame()" and \ref Time::EndFrame "EndFrame()" functions, and in between sending various update events. The event order is:
 
 - E_BEGINFRAME: signals the beginning of the new frame. Input and Network react to this to check for operating system window messages and arrived network packets.
 - E_UPDATE: application-wide logic update event. By default each active Scene reacts to this and triggers the scene update (more on this below.)

+ 1 - 1
Engine/Core/CoreEvents.h

@@ -53,7 +53,7 @@ EVENT(E_RENDERUPDATE, RenderUpdate)
 /// Post-render update event.
 EVENT(E_POSTRENDERUPDATE, PostRenderUpdate)
 {
-    PARAM(P_TIMESTEP, TimeStep);          // float
+    PARAM(P_TIMESTEP, TimeStep);            // float
 }
 
 /// Frame end event.

+ 40 - 28
Engine/Core/ProcessUtils.cpp

@@ -38,6 +38,7 @@
 
 #ifdef _MSC_VER
 #include <float.h>
+#include <intrin.h>
 #else
 // From http://stereopsis.com/FPU.html
 
@@ -64,6 +65,14 @@ inline void SetFPUState(unsigned control)
 }
 #endif
 
+void CpuID(int i, int regs[4]) {
+    #ifdef _MSC_VER
+    __cpuid(regs, i);
+    #else
+    __asm__ __volatile__ ("cpuid" : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3]) : "a" (i), "c" (0));
+    #endif
+}
+
 #include "DebugNew.h"
 
 #ifdef WIN32
@@ -252,39 +261,42 @@ String GetConsoleInput()
 
 unsigned GetNumCPUCores()
 {
-    #ifdef WIN32
-    SYSTEM_INFO info;
-    GetSystemInfo(&info);
-    unsigned numCores = info.dwNumberOfProcessors;
-    #else
-    unsigned numCores = sysconf(_SC_NPROCESSORS_ONLN);
-    #endif
+    // Get number of CPU cores without counting hyperthreaded cores, as creating a worker thread for each threaded core results
+    // in extra time spent synchronizing. Code based on
+    // http://stackoverflow.com/questions/2901694/programatically-detect-number-of-physical-processors-cores-or-if-hyper-threading
+    int regs[4];
     
-    // If CPU uses hyperthreading, report only half of the cores, as using the "extra" cores for worker threads
-    // seems to result in extra time spent synchronizing
-    unsigned func = 1;
-    unsigned a, b, c, d;
+    // Get vendor
+    char vendor[12];
+    CpuID(0, regs);
+    ((unsigned*)vendor)[0] = regs[1]; // EBX
+    ((unsigned*)vendor)[1] = regs[3]; // EDX
+    ((unsigned*)vendor)[2] = regs[2]; // ECX
+    String cpuVendor = String(vendor, 12);
     
-    // CPUID inline assembly from http://softpixel.com/~cwright/programming/simd/cpuid.php
-    #ifdef _MSC_VER
-    __asm
+    // Get CPU features
+    CpuID(1, regs);
+    unsigned cpuFeatures = regs[3]; // EDX
+    
+    // Logical core count per CPU
+    CpuID(1, regs);
+    unsigned logical = (regs[1] >> 16) & 0xff; // EBX[23:16]
+    unsigned cores = logical;
+    
+    if (cpuVendor == "GenuineIntel")
     {
-        mov eax, func
-        cpuid
-        mov a, eax
-        mov b, ebx
-        mov c, ecx
-        mov d, edx
+        // Get DCP cache info
+        CpuID(4, regs);
+        cores = ((regs[0] >> 26) & 0x3f) + 1; // EAX[31:26] + 1
+    }
+    else if (cpuVendor == "AuthenticAMD") 
+    {
+        // Get NC: Number of CPU cores - 1
+        CpuID(0x80000008, regs);
+        cores = ((unsigned)(regs[2] & 0xff)) + 1; // ECX[7:0] + 1
     }
-    #else
-    __asm__ __volatile__ ("cpuid":
-    "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func));
-    #endif
-    
-    if (d & 0x10000000)
-        numCores >>= 1;
     
-    return numCores;
+    return cores;
 }
 
 Mutex& GetStaticMutex()

+ 6 - 26
Engine/Core/Timer.cpp

@@ -75,33 +75,13 @@ void Time::BeginFrame(unsigned mSec)
     timeStep_ = (float)mSec / 1000.0f;
     timeStepMSec_ = mSec;
     
-    {
-        // Frame begin event
-        using namespace BeginFrame;
-        
-        VariantMap eventData;
-        eventData[P_FRAMENUMBER] = frameNumber_;
-        eventData[P_TIMESTEP] = timeStep_;
-        SendEvent(E_BEGINFRAME, eventData);
-    }
+    // Frame begin event
+    using namespace BeginFrame;
     
-    {
-        // Logic update event
-        using namespace Update;
-        
-        VariantMap eventData;
-        eventData[P_TIMESTEP] = timeStep_;
-        SendEvent(E_UPDATE, eventData);
-        
-        // Logic post-update event
-        SendEvent(E_POSTUPDATE, eventData);
-        
-        // Rendering update event
-        SendEvent(E_RENDERUPDATE, eventData);
-        
-        // Post-render update event
-        SendEvent(E_POSTRENDERUPDATE, eventData);
-    }
+    VariantMap eventData;
+    eventData[P_FRAMENUMBER] = frameNumber_;
+    eventData[P_TIMESTEP] = timeStep_;
+    SendEvent(E_BEGINFRAME, eventData);
 }
 
 void Time::EndFrame()

+ 1 - 1
Engine/Core/Timer.h

@@ -36,7 +36,7 @@ public:
     /// Destruct. Reset the low-resolution timer period if set.
     virtual ~Time();
     
-    /// Begin new frame, with (last) frame duration in milliseconds. Send frame start event, then the update events.
+    /// Begin new frame, with (last) frame duration in milliseconds and send frame start event.
     void BeginFrame(unsigned mSec);
     /// End frame. Increment total time and send frame end event.
     void EndFrame();

+ 23 - 3
Engine/Engine/Engine.cpp

@@ -175,12 +175,12 @@ bool Engine::Initialize(const String& windowTitle, const String& logName, const
     Log* log = GetSubsystem<Log>();
     log->Open(logName);
     
-    // Set amount of worker threads according to the free CPU cores. If hyperthreading is in use,
-    // GetNumCPUCores() reports only the actual physical cores. Also reserve one core for the main thread
+    // Set amount of worker threads according to the available CPU cores. GetNumCPUCores() does not report hyperthreaded cores,
+    // as using them leads to worse performance. Also reserve one core for the main thread
     int numCores = GetNumCPUCores();
     if (threads && numCores > 1)
     {
-        int numThreads = Max(numCores - 1, 1);
+        int numThreads = numCores - 1;
         GetSubsystem<WorkQueue>()->CreateThreads(numThreads);
         
         String workerThreadString = "Created " + String(numThreads) + " worker thread";
@@ -291,6 +291,7 @@ void Engine::RunFrame()
     Time* time = GetSubsystem<Time>();
     time->BeginFrame(timeStep_);
     
+    Update();
     Render();
     ApplyFrameLimit();
     
@@ -369,6 +370,25 @@ void Engine::DumpResources()
     LOGRAW("Total memory use of all resources " + String(cache->GetTotalMemoryUse()) + "\n\n");
 }
 
+void Engine::Update()
+{
+    // Logic update event
+    using namespace Update;
+    
+    VariantMap eventData;
+    eventData[P_TIMESTEP] = (float)timeStep_ / 1000.f;
+    SendEvent(E_UPDATE, eventData);
+    
+    // Logic post-update event
+    SendEvent(E_POSTUPDATE, eventData);
+    
+    // Rendering update event
+    SendEvent(E_RENDERUPDATE, eventData);
+    
+    // Post-render update event
+    SendEvent(E_POSTRENDERUPDATE, eventData);
+}
+
 void Engine::Render()
 {
     // Do not render if device lost

+ 2 - 0
Engine/Engine/Engine.h

@@ -77,6 +77,8 @@ public:
     /// Return whether the engine has been created in headless mode.
     bool IsHeadless() const { return headless_; }
     
+    /// Send frame update events.
+    void Update();
     /// Render after frame update.
     void Render();
     /// Get the timestep for the next frame and sleep for frame limiting if necessary.