瀏覽代碼

Changes profiler to use the high precision timer built into windows.

Also removes the legacy GetTickCount() fallback as that is no longer necessary in modern versions of windows (Windows XP and greater support QueryPerformanceCounter)
Jeff Hutchinson 4 年之前
父節點
當前提交
dee89e25b8
共有 3 個文件被更改,包括 36 次插入108 次删除
  1. 26 78
      Engine/source/platform/profiler.cpp
  2. 1 1
      Engine/source/platform/profiler.h
  3. 9 29
      Engine/source/platformWin32/winTimer.cpp

+ 26 - 78
Engine/source/platform/profiler.cpp

@@ -23,11 +23,9 @@
 #include "platform/platform.h"
 
 #if defined(TORQUE_OS_WIN)
-#include<Windows.h> // for SetThreadAffinityMask
-#endif
-
-#if defined(TORQUE_OS_MAC)
-#include <mach/mach_time.h>
+#include<Windows.h> // for SetThreadAffinityMask, QueryPerformanceCounter, QueryPerformanceFrequency
+#elif defined(TORQUE_OS_MAC)
+#include <mach/mach_time.h> // for mach_absolute_time, mach_timebase_info
 #endif
 
 #include "core/stream/fileStream.h"
@@ -63,111 +61,61 @@ Vector<StringTableEntry> gProfilerNodeStack;
 #define PROFILER_DEBUG_POP_NODE() ;
 #endif
 
-#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
+#if defined(TORQUE_OS_WIN)
+
+static bool sQueryPerformanceInit = false;
+static U64 sQueryPerformanceFrequency = 0;
+
 // platform specific get hires times...
-void startHighResolutionTimer(U32 time[2])
+void startHighResolutionTimer(U64 &time)
 {
-   //time[0] = Platform::getRealMilliseconds();
-
-   __asm
-   {
-      push eax
-      push edx
-      push ecx
-      rdtsc
-      mov ecx, time
-      mov DWORD PTR [ecx], eax
-      mov DWORD PTR [ecx + 4], edx
-      pop ecx
-      pop edx
-      pop eax
-   }
+   QueryPerformanceCounter((LARGE_INTEGER*)&time);
 }
 
-U32 endHighResolutionTimer(U32 time[2])
+F64 endHighResolutionTimer(U64 time)
 {
-   U32 ticks;
-   //ticks = Platform::getRealMilliseconds() - time[0];
-   //return ticks;
-
-   __asm
+   if (!sQueryPerformanceInit)
    {
-      push  eax
-      push  edx
-      push  ecx
-      //db    0fh, 31h
-      rdtsc
-      mov   ecx, time
-      sub   edx, DWORD PTR [ecx+4]
-      sbb   eax, DWORD PTR [ecx]
-      mov   DWORD PTR ticks, eax
-      pop   ecx
-      pop   edx
-      pop   eax
+      sQueryPerformanceInit = true;
+      QueryPerformanceFrequency((LARGE_INTEGER*)&sQueryPerformanceFrequency);
    }
-   return ticks;
-}
 
-#elif defined(TORQUE_SUPPORTS_GCC_INLINE_X86_ASM)
+   U64 current;
+   QueryPerformanceCounter((LARGE_INTEGER*)&current);
 
-// platform specific get hires times...
-void startHighResolutionTimer(U32 time[2])
-{
-   __asm__ __volatile__(
-      "rdtsc\n"
-      : "=a" (time[0]), "=d" (time[1])
-      );
-}
-
-U32 endHighResolutionTimer(U32 time[2])
-{
-   U32 ticks;
-   __asm__ __volatile__(
-      "rdtsc\n"
-      "sub  0x4(%%ecx),  %%edx\n"
-      "sbb  (%%ecx),  %%eax\n"
-      : "=a" (ticks) : "c" (time)
-      );
-   return ticks;
+   return ((1000.0 * static_cast<F64>(current-time)) / static_cast<F64>(sQueryPerformanceFrequency));
 }
 
 #elif defined(TORQUE_OS_MAC)
 
-
-void startHighResolutionTimer(U32 time[2]) {
-   U64 now = mach_absolute_time();
-   AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]");
-   memcpy(time, &now, sizeof(U64));
+void startHighResolutionTimer(U64 &time) {
+   time = mach_absolute_time();
 }
 
-U32 endHighResolutionTimer(U32 time[2])  {
+F64 endHighResolutionTimer(U64 time)  {
    static mach_timebase_info_data_t    sTimebaseInfo = {0, 0};
    
    U64 now = mach_absolute_time();
-   AssertFatal(sizeof(U32[2]) == sizeof(U64), "Can't pack mach_absolute_time into U32[2]");
-   U64 then;
-   memcpy(&then, time, sizeof(U64));
    
    if(sTimebaseInfo.denom == 0){
       mach_timebase_info(&sTimebaseInfo);
    }
    // Handle the micros/nanos conversion first, because shedding a few bits is better than overflowing.
-   U64 elapsedMicros = ((now - then) / 1000) * sTimebaseInfo.numer / sTimebaseInfo.denom;
+   F64 elapsedMicros = (static_cast<F64>(now - time) / 1000.0) * static_cast<F64>(sTimebaseInfo.numer) / static_cast<F64>(sTimebaseInfo.denom);
    
-   return (U32)elapsedMicros; // Just truncate, and hope we didn't overflow
+   return elapsedMicros; // Just truncate, and hope we didn't overflow
 }
 
 #else
 
-void startHighResolutionTimer(U32 time[2])
+void startHighResolutionTimer(U64 &time)
 {
-   time[0] = Platform::getRealMilliseconds();
+   time = (U64)Platform::getRealMilliseconds();
 }
 
-U32 endHighResolutionTimer(U32 time[2])
+F64 endHighResolutionTimer(U64 time)
 {
-   U32 ticks = Platform::getRealMilliseconds() - time[0];
-   return ticks;
+   return (F64)Platform::getRealMilliseconds() - time;
 }
 
 #endif

+ 1 - 1
Engine/source/platform/profiler.h

@@ -153,7 +153,7 @@ struct ProfilerData
    U32 mHash;
    U32 mSubDepth;
    U32 mInvokeCount;
-   U32 mStartTime[2];
+   U64 mStartTime;
    F64 mTotalTime;
    F64 mSubTime;
 #ifdef TORQUE_ENABLE_PROFILE_PATH

+ 9 - 29
Engine/source/platformWin32/winTimer.cpp

@@ -30,14 +30,11 @@
 class Win32Timer : public PlatformTimer
 {
 private:
-   U32 mTickCountCurrent;
-   U32 mTickCountNext;
    S64 mPerfCountCurrent;
    S64 mPerfCountNext;
    S64 mFrequency;
    F64 mPerfCountRemainderCurrent;
    F64 mPerfCountRemainderNext;
-   bool mUsingPerfCounter;
 public:
 
    Win32Timer()
@@ -45,43 +42,26 @@ public:
       mPerfCountRemainderCurrent = 0.0f;
       mPerfCountRemainderNext = 0.0f;
 
-      // Attempt to use QPC for high res timing, otherwise fallback to GTC.
-      mUsingPerfCounter = QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency);
-      if(mUsingPerfCounter)
-         mUsingPerfCounter = QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent);
+      QueryPerformanceFrequency((LARGE_INTEGER *) &mFrequency);
+      QueryPerformanceCounter((LARGE_INTEGER *) &mPerfCountCurrent);
       mPerfCountNext = 0.0;
-      if (!mUsingPerfCounter)
-         mTickCountCurrent = GetTickCount();
-      else
-         mTickCountCurrent = 0;
-      mTickCountNext = 0;
    }
 
    const S32 getElapsedMs()
    {
-      if(mUsingPerfCounter)
-      {
-         // Use QPC, update remainders so we don't leak time, and return the elapsed time.
-         QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext);
-         F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency));
-         elapsedF64 += mPerfCountRemainderCurrent;
-         U32 elapsed = (U32)mFloor(elapsedF64);
-         mPerfCountRemainderNext = elapsedF64 - F64(elapsed);
+      // Use QPC, update remainders so we don't leak time, and return the elapsed time.
+      QueryPerformanceCounter( (LARGE_INTEGER *) &mPerfCountNext);
+      F64 elapsedF64 = (1000.0 * F64(mPerfCountNext - mPerfCountCurrent) / F64(mFrequency));
+      elapsedF64 += mPerfCountRemainderCurrent;
+      U32 elapsed = (U32)mFloor(elapsedF64);
+      mPerfCountRemainderNext = elapsedF64 - F64(elapsed);
 
-         return elapsed;
-      }
-      else
-      {
-         // Do something naive with GTC.
-         mTickCountNext = GetTickCount();
-         return mTickCountNext - mTickCountCurrent;
-      }
+      return elapsed;
    }
 
    void reset()
    {
       // Do some simple copying to reset the timer to 0.
-      mTickCountCurrent = mTickCountNext;
       mPerfCountCurrent = mPerfCountNext;
       mPerfCountRemainderCurrent = mPerfCountRemainderNext;
    }