Browse Source

Merge pull request #429 from JeffProgrammer/high_resolution_timer

High resolution timer fixes
Brian Roberts 4 năm trước cách đây
mục cha
commit
a0581dce2a

+ 26 - 78
Engine/source/platform/profiler.cpp

@@ -23,11 +23,9 @@
 #include "platform/platform.h"
 
 #if defined(TORQUE_OS_WIN)
-#include<Windows.h> // for SetThreadAffinityMask
-#endif
-
-#if defined(TORQUE_OS_MAC)
-#include <mach/mach_time.h>
+#include<Windows.h> // for SetThreadAffinityMask, QueryPerformanceCounter, QueryPerformanceFrequency
+#elif defined(TORQUE_OS_MAC)
+#include <mach/mach_time.h> // for mach_absolute_time, mach_timebase_info
 #endif
 
 #include "core/stream/fileStream.h"
@@ -63,111 +61,61 @@ Vector<StringTableEntry> gProfilerNodeStack;
 #define PROFILER_DEBUG_POP_NODE() ;
 #endif
 
-#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
+#if defined(TORQUE_OS_WIN)
+
+static bool sQueryPerformanceInit = false;
+static U64 sQueryPerformanceFrequency = 0;
+
 // platform specific get hires times...
-void startHighResolutionTimer(U32 time[2])
+void startHighResolutionTimer(U64 &time)
 {
-   //time[0] = Platform::getRealMilliseconds();
-
-   __asm
-   {
-      push eax
-      push edx
-      push ecx
-      rdtsc
-      mov ecx, time
-      mov DWORD PTR [ecx], eax
-      mov DWORD PTR [ecx + 4], edx
-      pop ecx
-      pop edx
-      pop eax
-   }
+   QueryPerformanceCounter((LARGE_INTEGER*)&time);
 }
 
-U32 endHighResolutionTimer(U32 time[2])
+F64 endHighResolutionTimer(U64 time)
 {
-   U32 ticks;
-   //ticks = Platform::getRealMilliseconds() - time[0];
-   //return ticks;
-
-   __asm
+   if (!sQueryPerformanceInit)
    {
-      push  eax
-      push  edx
-      push  ecx
-      //db    0fh, 31h
-      rdtsc
-      mov   ecx, time
-      sub   edx, DWORD PTR [ecx+4]
-      sbb   eax, DWORD PTR [ecx]
-      mov   DWORD PTR ticks, eax
-      pop   ecx
-      pop   edx
-      pop   eax
+      sQueryPerformanceInit = true;
+      QueryPerformanceFrequency((LARGE_INTEGER*)&sQueryPerformanceFrequency);
    }
-   return ticks;
-}
 
-#elif defined(TORQUE_SUPPORTS_GCC_INLINE_X86_ASM)
+   U64 current;
+   QueryPerformanceCounter((LARGE_INTEGER*)&current);
 
-// platform specific get hires times...
-void startHighResolutionTimer(U32 time[2])
-{
-   __asm__ __volatile__(
-      "rdtsc\n"
-      : "=a" (time[0]), "=d" (time[1])
-      );
-}
-
-U32 endHighResolutionTimer(U32 time[2])
-{
-   U32 ticks;
-   __asm__ __volatile__(
-      "rdtsc\n"
-      "sub  0x4(%%ecx),  %%edx\n"
-      "sbb  (%%ecx),  %%eax\n"
-      : "=a" (ticks) : "c" (time)
-      );
-   return ticks;
+   return ((1000.0 * static_cast<F64>(current-time)) / static_cast<F64>(sQueryPerformanceFrequency));
 }
 
 #elif defined(TORQUE_OS_MAC)
 
-
-void startHighResolutionTimer(U32 time[2]) {
-   U64 now = mach_absolute_time();
-   AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]");
-   memcpy(time, &now, sizeof(U64));
+void startHighResolutionTimer(U64 &time) {
+   time = mach_absolute_time();
 }
 
-U32 endHighResolutionTimer(U32 time[2])  {
+F64 endHighResolutionTimer(U64 time)  {
    static mach_timebase_info_data_t    sTimebaseInfo = {0, 0};
    
    U64 now = mach_absolute_time();
-   AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]");
-   U64 then;
-   memcpy(&then, time, sizeof(U64));
    
    if(sTimebaseInfo.denom == 0){
       mach_timebase_info(&sTimebaseInfo);
    }
    // Handle the micros/nanos conversion first, because shedding a few bits is better than overflowing.
-   U64 elapsedMicros = ((now - then) / 1000) * sTimebaseInfo.numer / sTimebaseInfo.denom;
+   F64 elapsedMicros = (static_cast<F64>(now - time) / 1000.0) * static_cast<F64>(sTimebaseInfo.numer) / static_cast<F64>(sTimebaseInfo.denom);
    
-   return (U32)elapsedMicros; // Just truncate, and hope we didn't overflow
+   return elapsedMicros; // Just truncate, and hope we didn't overflow
 }
 
 #else
 
-void startHighResolutionTimer(U32 time[2])
+void startHighResolutionTimer(U64 &time)
 {
-   time[0] = Platform::getRealMilliseconds();
+   time = (U64)Platform::getRealMilliseconds();
 }
 
-U32 endHighResolutionTimer(U32 time[2])
+F64 endHighResolutionTimer(U64 time)
 {
-   U32 ticks = Platform::getRealMilliseconds() - time[0];
-   return ticks;
+   return (F64)Platform::getRealMilliseconds() - time;
 }
 
 #endif

+ 1 - 1
Engine/source/platform/profiler.h

@@ -153,7 +153,7 @@ struct ProfilerData
    U32 mHash;
    U32 mSubDepth;
    U32 mInvokeCount;
-   U32 mStartTime[2];
+   U64 mStartTime;
    F64 mTotalTime;
    F64 mSubTime;
 #ifdef TORQUE_ENABLE_PROFILE_PATH

+ 9 - 29
Engine/source/platformWin32/winTimer.cpp

@@ -30,14 +30,11 @@
 class Win32Timer : public PlatformTimer
 {
 private:
-   U32 mTickCountCurrent;
-   U32 mTickCountNext;
    S64 mPerfCountCurrent;
    S64 mPerfCountNext;
    S64 mFrequency;
    F64 mPerfCountRemainderCurrent;
    F64 mPerfCountRemainderNext;
-   bool mUsingPerfCounter;
 public:
 
    Win32Timer()
@@ -45,43 +42,26 @@ public:
       mPerfCountRemainderCurrent = 0.0f;
       mPerfCountRemainderNext = 0.0f;
 
-      // Attempt to use QPC for high res timing, otherwise fallback to GTC.
-      mUsingPerfCounter = QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency);
-      if(mUsingPerfCounter)
-         mUsingPerfCounter = QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent);
+      QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency);
+      QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent);
       mPerfCountNext = 0.0;
-      if (!mUsingPerfCounter)
-         mTickCountCurrent = GetTickCount();
-      else
-         mTickCountCurrent = 0;
-      mTickCountNext = 0;
    }
 
    const S32 getElapsedMs()
    {
-      if(mUsingPerfCounter)
-      {
-         // Use QPC, update remainders so we don't leak time, and return the elapsed time.
-         QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext);
-         F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency));
-         elapsedF64 += mPerfCountRemainderCurrent;
-         U32 elapsed = (U32)mFloor(elapsedF64);
-         mPerfCountRemainderNext = elapsedF64 - F64(elapsed);
+      // Use QPC, update remainders so we don't leak time, and return the elapsed time.
+      QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext);
+      F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency));
+      elapsedF64 += mPerfCountRemainderCurrent;
+      U32 elapsed = (U32)mFloor(elapsedF64);
+      mPerfCountRemainderNext = elapsedF64 - F64(elapsed);
 
-         return elapsed;
-      }
-      else
-      {
-         // Do something naive with GTC.
-         mTickCountNext = GetTickCount();
-         return mTickCountNext - mTickCountCurrent;
-      }
+      return elapsed;
    }
 
    void reset()
    {
       // Do some simple copying to reset the timer to 0.
-      mTickCountCurrent = mTickCountNext;
       mPerfCountCurrent = mPerfCountNext;
       mPerfCountRemainderCurrent = mPerfCountRemainderNext;
    }