Browse Source

pstats: Add support for profiling thread context switches

Disabled by default, enable with `pstats-thread-profiling true` in Config.prc
rdb 3 years ago
parent
commit
fd033e66f1

+ 4 - 0
panda/src/pipeline/conditionVarPosixImpl.I

@@ -39,7 +39,11 @@ INLINE ConditionVarPosixImpl::
 INLINE void ConditionVarPosixImpl::
 INLINE void ConditionVarPosixImpl::
 wait() {
 wait() {
   TAU_PROFILE("ConditionVarPosixImpl::wait()", " ", TAU_USER);
   TAU_PROFILE("ConditionVarPosixImpl::wait()", " ", TAU_USER);
+#ifdef DO_PSTATS
+  int result = _wait_func(&_cvar, &_mutex._lock);
+#else
   int result = pthread_cond_wait(&_cvar, &_mutex._lock);
   int result = pthread_cond_wait(&_cvar, &_mutex._lock);
+#endif
 #ifndef NDEBUG
 #ifndef NDEBUG
   if (result != 0) {
   if (result != 0) {
     pipeline_cat.error()
     pipeline_cat.error()

+ 10 - 0
panda/src/pipeline/conditionVarPosixImpl.cxx

@@ -18,6 +18,12 @@
 #include "conditionVarPosixImpl.h"
 #include "conditionVarPosixImpl.h"
 #include <sys/time.h>
 #include <sys/time.h>
 
 
+int (*ConditionVarPosixImpl::_wait_func)(pthread_cond_t *,
+                                         pthread_mutex_t *) = &pthread_cond_wait;
+int (*ConditionVarPosixImpl::_timedwait_func)(pthread_cond_t *,
+                                              pthread_mutex_t *,
+                                              const struct timespec *) = &pthread_cond_timedwait;
+
 /**
 /**
  *
  *
  */
  */
@@ -41,7 +47,11 @@ wait(double timeout) {
     ++ts.tv_sec;
     ++ts.tv_sec;
   }
   }
 
 
+#ifdef DO_PSTATS
+  int result = _timedwait_func(&_cvar, &_mutex._lock, &ts);
+#else
   int result = pthread_cond_timedwait(&_cvar, &_mutex._lock, &ts);
   int result = pthread_cond_timedwait(&_cvar, &_mutex._lock, &ts);
+#endif
 #ifndef NDEBUG
 #ifndef NDEBUG
   if (result != 0 && result != ETIMEDOUT) {
   if (result != 0 && result != ETIMEDOUT) {
     pipeline_cat.error()
     pipeline_cat.error()

+ 5 - 0
panda/src/pipeline/conditionVarPosixImpl.h

@@ -43,6 +43,11 @@ public:
 private:
 private:
   MutexPosixImpl &_mutex;
   MutexPosixImpl &_mutex;
   pthread_cond_t _cvar;
   pthread_cond_t _cvar;
+
+  static int (*_wait_func)(pthread_cond_t *, pthread_mutex_t *);
+  static int (*_timedwait_func)(pthread_cond_t *, pthread_mutex_t *,
+                                const struct timespec *);
+  friend class PStatClientImpl;
 };
 };
 
 
 #include "conditionVarPosixImpl.I"
 #include "conditionVarPosixImpl.I"

+ 8 - 0
panda/src/pipeline/conditionVarWin32Impl.I

@@ -23,7 +23,11 @@ ConditionVarWin32Impl(MutexWin32Impl &mutex) : _mutex(mutex) {
  */
  */
 INLINE void ConditionVarWin32Impl::
 INLINE void ConditionVarWin32Impl::
 wait() {
 wait() {
+#ifdef DO_PSTATS
+  _wait_func(&_cvar, &_mutex._lock, INFINITE, 0);
+#else
   SleepConditionVariableSRW(&_cvar, &_mutex._lock, INFINITE, 0);
   SleepConditionVariableSRW(&_cvar, &_mutex._lock, INFINITE, 0);
+#endif
 }
 }
 
 
 /**
 /**
@@ -31,7 +35,11 @@ wait() {
  */
  */
 INLINE void ConditionVarWin32Impl::
 INLINE void ConditionVarWin32Impl::
 wait(double timeout) {
 wait(double timeout) {
+#ifdef DO_PSTATS
+  _wait_func(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0);
+#else
   SleepConditionVariableSRW(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0);
   SleepConditionVariableSRW(&_cvar, &_mutex._lock, (DWORD)(timeout * 1000.0), 0);
+#endif
 }
 }
 
 
 /**
 /**

+ 3 - 0
panda/src/pipeline/conditionVarWin32Impl.cxx

@@ -17,4 +17,7 @@
 
 
 #include "conditionVarWin32Impl.h"
 #include "conditionVarWin32Impl.h"
 
 
+// This function gets replaced by PStats to measure the time spent waiting.
+BOOL (*ConditionVarWin32Impl::_wait_func)(PCONDITION_VARIABLE, PSRWLOCK, DWORD, ULONG) = &SleepConditionVariableSRW;
+
 #endif  // _WIN32
 #endif  // _WIN32

+ 3 - 0
panda/src/pipeline/conditionVarWin32Impl.h

@@ -40,6 +40,9 @@ public:
 private:
 private:
   MutexWin32Impl &_mutex;
   MutexWin32Impl &_mutex;
   CONDITION_VARIABLE _cvar = CONDITION_VARIABLE_INIT;
   CONDITION_VARIABLE _cvar = CONDITION_VARIABLE_INIT;
+
+  static BOOL (*_wait_func)(PCONDITION_VARIABLE, PSRWLOCK, DWORD, ULONG);
+  friend class PStatClientImpl;
 };
 };
 
 
 #include "conditionVarWin32Impl.I"
 #include "conditionVarWin32Impl.I"

+ 20 - 0
panda/src/pipeline/thread.I

@@ -191,7 +191,11 @@ is_simple_threads() {
 INLINE void Thread::
 INLINE void Thread::
 sleep(double seconds) {
 sleep(double seconds) {
   TAU_PROFILE("void Thread::sleep(double)", " ", TAU_USER);
   TAU_PROFILE("void Thread::sleep(double)", " ", TAU_USER);
+#ifdef DO_PSTATS
+  _sleep_func(seconds);
+#else
   ThreadImpl::sleep(seconds);
   ThreadImpl::sleep(seconds);
+#endif
 }
 }
 
 
 /**
 /**
@@ -200,7 +204,11 @@ sleep(double seconds) {
 INLINE void Thread::
 INLINE void Thread::
 force_yield() {
 force_yield() {
   TAU_PROFILE("void Thread::yield()", " ", TAU_USER);
   TAU_PROFILE("void Thread::yield()", " ", TAU_USER);
+#ifdef DO_PSTATS
+  _yield_func();
+#else
   ThreadImpl::yield();
   ThreadImpl::yield();
+#endif
 }
 }
 
 
 /**
 /**
@@ -214,6 +222,18 @@ consider_yield() {
   ThreadImpl::consider_yield();
   ThreadImpl::consider_yield();
 }
 }
 
 
+
+/**
+ * Returns thread statistics.  The first number is the total number of context
+ * switches reported by the OS, and the second number is the number of
+ * involuntary context switches (ie. the thread was scheduled out by the OS),
+ * if known.
+ */
+INLINE bool Thread::
+get_context_switches(size_t &total, size_t &involuntary) {
+  return ThreadImpl::get_context_switches(total, involuntary);
+}
+
 /**
 /**
  * Returns true if the thread has been started, false if it has not, or if
  * Returns true if the thread has been started, false if it has not, or if
  * join() has already been called.
  * join() has already been called.

+ 3 - 0
panda/src/pipeline/thread.cxx

@@ -22,6 +22,9 @@ Thread *Thread::_main_thread;
 Thread *Thread::_external_thread;
 Thread *Thread::_external_thread;
 TypeHandle Thread::_type_handle;
 TypeHandle Thread::_type_handle;
 
 
+void (*Thread::_sleep_func)(double) = &ThreadImpl::sleep;
+void (*Thread::_yield_func)() = &ThreadImpl::yield;
+
 /**
 /**
  * Creates a new Thread object, but does not immediately start executing it.
  * Creates a new Thread object, but does not immediately start executing it.
  * This gives the caller a chance to store it in a PT(Thread) object, if
  * This gives the caller a chance to store it in a PT(Thread) object, if

+ 6 - 0
panda/src/pipeline/thread.h

@@ -81,6 +81,8 @@ PUBLISHED:
   BLOCKING INLINE static void force_yield();
   BLOCKING INLINE static void force_yield();
   BLOCKING INLINE static void consider_yield();
   BLOCKING INLINE static void consider_yield();
 
 
+  INLINE static bool get_context_switches(size_t &total, size_t &involuntary);
+
   virtual void output(std::ostream &out) const;
   virtual void output(std::ostream &out) const;
   void output_blocker(std::ostream &out) const;
   void output_blocker(std::ostream &out) const;
   static void write_status(std::ostream &out);
   static void write_status(std::ostream &out);
@@ -162,6 +164,10 @@ private:
   static Thread *_main_thread;
   static Thread *_main_thread;
   static Thread *_external_thread;
   static Thread *_external_thread;
 
 
+  static void (*_sleep_func)(double);
+  static void (*_yield_func)();
+  friend class PStatClientImpl;
+
 public:
 public:
   static TypeHandle get_class_type() {
   static TypeHandle get_class_type() {
     return _type_handle;
     return _type_handle;

+ 8 - 0
panda/src/pipeline/threadDummyImpl.I

@@ -124,3 +124,11 @@ yield() {
 INLINE void ThreadDummyImpl::
 INLINE void ThreadDummyImpl::
 consider_yield() {
 consider_yield() {
 }
 }
+
+/**
+ *
+ */
+INLINE bool ThreadDummyImpl::
+get_context_switches(size_t &, size_t &) {
+  return false;
+}

+ 2 - 0
panda/src/pipeline/threadDummyImpl.h

@@ -57,6 +57,8 @@ public:
   INLINE static void sleep(double seconds);
   INLINE static void sleep(double seconds);
   INLINE static void yield();
   INLINE static void yield();
   INLINE static void consider_yield();
   INLINE static void consider_yield();
+
+  INLINE static bool get_context_switches(size_t &, size_t &);
 };
 };
 
 
 #include "threadDummyImpl.I"
 #include "threadDummyImpl.I"

+ 25 - 0
panda/src/pipeline/threadPosixImpl.cxx

@@ -21,6 +21,11 @@
 #include "config_pipeline.h"
 #include "config_pipeline.h"
 #include <sched.h>
 #include <sched.h>
 
 
+// Used for getrusage().
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
 #ifdef ANDROID
 #ifdef ANDROID
 #include "config_express.h"
 #include "config_express.h"
 #include <jni.h>
 #include <jni.h>
@@ -248,6 +253,26 @@ bind_java_thread() {
 }
 }
 #endif  // ANDROID
 #endif  // ANDROID
 
 
+/**
+ * Returns the number of context switches that occurred on the current thread.
+ * The first number is the total number of context switches reported by the OS,
+ * and the second number is the number of involuntary context switches (ie. the
+ * thread was scheduled out by the OS), if known, otherwise zero.
+ * Returns true if context switch information was available, false otherwise.
+ */
+bool ThreadPosixImpl::
+get_context_switches(size_t &total, size_t &involuntary) {
+#ifdef RUSAGE_THREAD
+  struct rusage usage;
+  if (getrusage(RUSAGE_THREAD, &usage) == 0) {
+    total = (size_t)usage.ru_nvcsw;
+    involuntary = (size_t)usage.ru_nivcsw;
+    return true;
+  }
+#endif
+  return false;
+}
+
 /**
 /**
  * The entry point of each thread.
  * The entry point of each thread.
  */
  */

+ 2 - 0
panda/src/pipeline/threadPosixImpl.h

@@ -63,6 +63,8 @@ public:
   static void bind_java_thread();
   static void bind_java_thread();
 #endif
 #endif
 
 
+  static bool get_context_switches(size_t &total, size_t &involuntary);
+
 private:
 private:
   static void *root_func(void *data);
   static void *root_func(void *data);
   static Thread *init_current_thread();
   static Thread *init_current_thread();

+ 29 - 0
panda/src/pipeline/threadSimpleImpl.cxx

@@ -55,6 +55,9 @@ ThreadSimpleImpl(Thread *parent_obj) :
 #ifdef WIN32
 #ifdef WIN32
   _win32_system_thread_id = 0;
   _win32_system_thread_id = 0;
 #endif
 #endif
+
+  _context_switches = 0;
+  _involuntary_context_switches = 0;
 }
 }
 
 
 /**
 /**
@@ -229,6 +232,32 @@ yield_this(bool volunteer) {
   }
   }
   _manager->enqueue_ready(this, true);
   _manager->enqueue_ready(this, true);
   _manager->next_context();
   _manager->next_context();
+  if (!volunteer) {
+    // Technically all context switches are voluntary here, but to make this
+    // distinction meaningful, we count it as involuntary on consider_yield().
+    ++_involuntary_context_switches;
+  }
+}
+
+/**
+ * Returns the number of context switches that occurred on the current thread.
+ * The first number is the total number of context switches reported by the OS,
+ * and the second number is the number of involuntary context switches (ie. the
+ * thread was scheduled out by the OS), if known, otherwise zero.
+ * Returns true if context switch information was available, false otherwise.
+ */
+bool ThreadSimpleImpl::
+get_context_switches(size_t &total, size_t &involuntary) {
+  ThreadSimpleManager *manager = ThreadSimpleManager::get_global_ptr();
+  if (manager->is_same_system_thread()) {
+    ThreadSimpleImpl *thread = manager->get_current_thread();
+    if (thread != nullptr) {
+      total = thread->_context_switches;
+      involuntary = thread->_involuntary_context_switches;
+      return true;
+    }
+  }
+  return false;
 }
 }
 
 
 /**
 /**

+ 5 - 0
panda/src/pipeline/threadSimpleImpl.h

@@ -77,6 +77,8 @@ public:
 
 
   INLINE static void write_status(std::ostream &out);
   INLINE static void write_status(std::ostream &out);
 
 
+  static bool get_context_switches(size_t &total, size_t &involuntary);
+
 private:
 private:
   static void st_begin_thread(void *data);
   static void st_begin_thread(void *data);
   void begin_thread();
   void begin_thread();
@@ -142,6 +144,9 @@ private:
   DWORD _win32_system_thread_id;
   DWORD _win32_system_thread_id;
 #endif
 #endif
 
 
+  size_t _context_switches;
+  size_t _involuntary_context_switches;
+
   friend class ThreadSimpleManager;
   friend class ThreadSimpleManager;
 };
 };
 
 

+ 2 - 0
panda/src/pipeline/threadSimpleManager.cxx

@@ -233,6 +233,8 @@ next_context() {
     unref_delete(finished_thread->_parent_obj);
     unref_delete(finished_thread->_parent_obj);
   }
   }
 
 
+  ++_current_thread->_context_switches;
+
   // Mark the current thread's resume point.
   // Mark the current thread's resume point.
 
 
 #ifdef HAVE_PYTHON
 #ifdef HAVE_PYTHON

+ 2 - 1
panda/src/pipeline/threadWin32Impl.I

@@ -17,7 +17,8 @@
 INLINE ThreadWin32Impl::
 INLINE ThreadWin32Impl::
 ThreadWin32Impl(Thread *parent_obj) :
 ThreadWin32Impl(Thread *parent_obj) :
   _cv(_mutex),
   _cv(_mutex),
-  _parent_obj(parent_obj)
+  _parent_obj(parent_obj),
+  _profiling(0)
 {
 {
   _thread = 0;
   _thread = 0;
   _joinable = false;
   _joinable = false;

+ 59 - 0
panda/src/pipeline/threadWin32Impl.cxx

@@ -20,9 +20,34 @@
 #include "pointerTo.h"
 #include "pointerTo.h"
 #include "config_pipeline.h"
 #include "config_pipeline.h"
 
 
+#include <windows.h>
+
 static thread_local Thread *_current_thread = nullptr;
 static thread_local Thread *_current_thread = nullptr;
 static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
 static patomic_flag _main_thread_known = ATOMIC_FLAG_INIT;
 
 
+#if _WIN32_WINNT < 0x0601
+// Requires Windows 7.
+static DWORD (*EnableThreadProfiling)(HANDLE, DWORD, DWORD64, HANDLE *) = nullptr;
+static DWORD (*DisableThreadProfiling)(HANDLE) = nullptr;
+static DWORD (*ReadThreadProfilingData)(HANDLE, DWORD, PPERFORMANCE_DATA data) = nullptr;
+
+static bool init_thread_profiling() {
+  static bool inited = false;
+  if (!inited) {
+    HMODULE kernel32 = GetModuleHandleA("kernel32.dll");
+    EnableThreadProfiling = (decltype(EnableThreadProfiling))GetProcAddress(kernel32, "EnableThreadProfiling");
+    DisableThreadProfiling = (decltype(DisableThreadProfiling))GetProcAddress(kernel32, "DisableThreadProfiling");
+    ReadThreadProfilingData = (decltype(ReadThreadProfilingData))GetProcAddress(kernel32, "ReadThreadProfilingData");
+    inited = true;
+  }
+  return (EnableThreadProfiling && DisableThreadProfiling && ReadThreadProfilingData);
+}
+#else
+static bool init_thread_profiling() {
+  return true;
+}
+#endif
+
 /**
 /**
  * Called by get_current_thread() if the current thread pointer is null; checks
  * Called by get_current_thread() if the current thread pointer is null; checks
  * whether it might be the main thread.
  * whether it might be the main thread.
@@ -170,6 +195,35 @@ bind_thread(Thread *thread) {
   _current_thread = thread;
   _current_thread = thread;
 }
 }
 
 
+/**
+ * Returns the number of context switches that occurred on the current thread.
+ * The first number is the total number of context switches reported by the OS,
+ * and the second number is the number of involuntary context switches (ie. the
+ * thread was scheduled out by the OS), if known, otherwise zero.
+ * Returns true if context switch information was available, false otherwise.
+ */
+bool ThreadWin32Impl::
+get_context_switches(size_t &total, size_t &involuntary) {
+  Thread *thread = get_current_thread();
+  ThreadWin32Impl *self = &thread->_impl;
+
+  if (!self->_profiling && init_thread_profiling()) {
+    DWORD result = EnableThreadProfiling(GetCurrentThread(), THREAD_PROFILING_FLAG_DISPATCH, 0, &self->_profiling);
+    if (result != ERROR_SUCCESS) {
+      self->_profiling = 0;
+      return false;
+    }
+  }
+
+  PERFORMANCE_DATA data = {sizeof(PERFORMANCE_DATA), PERFORMANCE_DATA_VERSION};
+  if (ReadThreadProfilingData(self->_profiling, READ_THREAD_PROFILING_FLAG_DISPATCHING, &data) == ERROR_SUCCESS) {
+    total = data.ContextSwitchCount;
+    involuntary = 0;
+    return true;
+  }
+  return false;
+}
+
 /**
 /**
  * The entry point of each thread.
  * The entry point of each thread.
  */
  */
@@ -212,6 +266,11 @@ root_func(LPVOID data) {
       self->_mutex.unlock();
       self->_mutex.unlock();
     }
     }
 
 
+    if (self->_profiling != 0) {
+      DisableThreadProfiling(self->_profiling);
+      self->_profiling = 0;
+    }
+
     // Now drop the parent object reference that we grabbed in start(). This
     // Now drop the parent object reference that we grabbed in start(). This
     // might delete the parent object, and in turn, delete the ThreadWin32Impl
     // might delete the parent object, and in turn, delete the ThreadWin32Impl
     // object.
     // object.

+ 3 - 0
panda/src/pipeline/threadWin32Impl.h

@@ -52,6 +52,8 @@ public:
   INLINE static void yield();
   INLINE static void yield();
   INLINE static void consider_yield();
   INLINE static void consider_yield();
 
 
+  static bool get_context_switches(size_t &total, size_t &involuntary);
+
 private:
 private:
   static DWORD WINAPI root_func(LPVOID data);
   static DWORD WINAPI root_func(LPVOID data);
 
 
@@ -69,6 +71,7 @@ private:
   DWORD _thread_id;
   DWORD _thread_id;
   bool _joinable;
   bool _joinable;
   Status _status;
   Status _status;
+  HANDLE _profiling;
 };
 };
 
 
 #include "threadWin32Impl.I"
 #include "threadWin32Impl.I"

+ 5 - 0
panda/src/pstatclient/config_pstatclient.cxx

@@ -82,6 +82,11 @@ ConfigVariableBool pstats_gpu_timing
           "is not usually an accurate reflectino of how long the actual "
           "is not usually an accurate reflectino of how long the actual "
           "operation takes on the video card."));
           "operation takes on the video card."));
 
 
+ConfigVariableBool pstats_thread_profiling
+("pstats-thread-profiling", false,
+ PRC_DESC("Set this true to query the system for thread statistics, such as "
+          "the number of context switches and time spent waiting."));
+
 // The rest are different in that they directly control the server, not the
 // The rest are different in that they directly control the server, not the
 // client.
 // client.
 ConfigVariableBool pstats_scroll_mode
 ConfigVariableBool pstats_scroll_mode

+ 1 - 0
panda/src/pstatclient/config_pstatclient.h

@@ -38,6 +38,7 @@ extern EXPCL_PANDA_PSTATCLIENT ConfigVariableString pstats_host;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableInt pstats_port;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableInt pstats_port;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_target_frame_rate;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_target_frame_rate;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_gpu_timing;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_gpu_timing;
+extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_thread_profiling;
 
 
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_scroll_mode;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_scroll_mode;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_history;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_history;

+ 32 - 0
panda/src/pstatclient/pStatClient.cxx

@@ -905,6 +905,38 @@ stop(int collector_index, int thread_index, double as_of) {
   }
   }
 }
 }
 
 
+/**
+ * Adds a pair of start/stop times in the given collector.  Used in low-level
+ * code that knows there will not be any other collectors started and stopped
+ * in the meantime, and can be more efficient than a pair of start/stop times.
+ */
+void PStatClient::
+start_stop(int collector_index, int thread_index, double start, double stop) {
+  if (!client_is_connected()) {
+    return;
+  }
+
+#ifdef _DEBUG
+  nassertv(collector_index >= 0 && collector_index < get_num_collectors());
+  nassertv(thread_index >= 0 && thread_index < get_num_threads());
+#endif
+
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+
+  if (collector->is_active() && thread->_is_active) {
+    LightMutexHolder holder(thread->_thread_lock);
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
+      // This collector wasn't already started in this thread; record a new
+      // data point.
+      if (thread->_thread_active) {
+        thread->_frame_data.add_start(collector_index, start);
+        thread->_frame_data.add_stop(collector_index, stop);
+      }
+    }
+  }
+}
+
 /**
 /**
  * Removes the level value from the indicated collector.  The collector will
  * Removes the level value from the indicated collector.  The collector will
  * no longer be reported as having any particular level value.
  * no longer be reported as having any particular level value.

+ 4 - 0
panda/src/pstatclient/pStatClient.h

@@ -129,6 +129,7 @@ private:
   void start(int collector_index, int thread_index, double as_of);
   void start(int collector_index, int thread_index, double as_of);
   void stop(int collector_index, int thread_index);
   void stop(int collector_index, int thread_index);
   void stop(int collector_index, int thread_index, double as_of);
   void stop(int collector_index, int thread_index, double as_of);
+  void start_stop(int collector_index, int thread_index, double start, double stop);
 
 
   void clear_level(int collector_index, int thread_index);
   void clear_level(int collector_index, int thread_index);
   void set_level(int collector_index, int thread_index, double level);
   void set_level(int collector_index, int thread_index, double level);
@@ -216,6 +217,8 @@ private:
     bool _is_active;
     bool _is_active;
     int _frame_number;
     int _frame_number;
     double _next_packet;
     double _next_packet;
+    size_t _context_switches = 0;
+    size_t _involuntary_context_switches = 0;
 
 
     bool _thread_active;
     bool _thread_active;
 
 
@@ -321,6 +324,7 @@ private:
   void start(int collector_index, int thread_index, double as_of);
   void start(int collector_index, int thread_index, double as_of);
   void stop(int collector_index, int thread_index);
   void stop(int collector_index, int thread_index);
   void stop(int collector_index, int thread_index, double as_of);
   void stop(int collector_index, int thread_index, double as_of);
+  void start_stop(int collector_index, int thread_index, double start, double stop);
 
 
   void clear_level(int collector_index, int thread_index);
   void clear_level(int collector_index, int thread_index);
   void set_level(int collector_index, int thread_index, double level);
   void set_level(int collector_index, int thread_index, double level);

+ 159 - 0
panda/src/pstatclient/pStatClientImpl.cxx

@@ -24,6 +24,8 @@
 #include "config_pstatclient.h"
 #include "config_pstatclient.h"
 #include "pStatProperties.h"
 #include "pStatProperties.h"
 #include "cmath.h"
 #include "cmath.h"
+#include "conditionVarWin32Impl.h"
+#include "conditionVarPosixImpl.h"
 
 
 #include <algorithm>
 #include <algorithm>
 
 
@@ -32,6 +34,15 @@
 #include <windows.h>
 #include <windows.h>
 #endif
 #endif
 
 
+static PStatCollector _cswitch_pcollector("Context Switches");
+static PStatCollector _cswitch_sleep_pcollector("Context Switches:Sleep");
+static PStatCollector _cswitch_yield_pcollector("Context Switches:Yields");
+static PStatCollector _cswitch_cvar_pcollector("Context Switches:Condition Variable");
+static PStatCollector _cswitch_involuntary_pcollector("Context Switches:Involuntary");
+static PStatCollector _wait_sleep_pcollector("Wait:Sleep");
+static PStatCollector _wait_yield_pcollector("Wait:Yield");
+static PStatCollector _wait_cvar_pcollector("Wait:Condition Variable");
+
 /**
 /**
  *
  *
  */
  */
@@ -124,6 +135,106 @@ client_connect(std::string hostname, int port) {
   MutexDebug::increment_pstats();
   MutexDebug::increment_pstats();
 #endif // DEBUG_THREADS
 #endif // DEBUG_THREADS
 
 
+  if (pstats_thread_profiling) {
+    // Replace the condition variable wait function with one that does PStats
+    // statistics.
+    _thread_profiling = true;
+
+    Thread::_sleep_func = [] (double seconds) {
+      Thread *current_thread = Thread::get_current_thread();
+      int thread_index = current_thread->get_pstats_index();
+      if (thread_index >= 0) {
+        PStatClient *client = PStatClient::get_global_pstats();
+        double start = client->get_real_time();
+        ThreadImpl::sleep(seconds);
+        double stop = client->get_real_time();
+        client->start_stop(_wait_sleep_pcollector.get_index(), thread_index, start, stop);
+        client->add_level(_cswitch_sleep_pcollector.get_index(), thread_index, 1);
+      }
+      else {
+        ThreadImpl::sleep(seconds);
+      }
+    };
+
+    Thread::_yield_func = [] () {
+      Thread *current_thread = Thread::get_current_thread();
+      int thread_index = current_thread->get_pstats_index();
+      if (thread_index >= 0) {
+        PStatClient *client = PStatClient::get_global_pstats();
+        double start = client->get_real_time();
+        ThreadImpl::yield();
+        double stop = client->get_real_time();
+        client->start_stop(_wait_yield_pcollector.get_index(), thread_index, start, stop);
+        client->add_level(_cswitch_yield_pcollector.get_index(), thread_index, 1);
+      }
+      else {
+        ThreadImpl::yield();
+      }
+    };
+
+#ifdef _WIN32
+    ConditionVarWin32Impl::_wait_func =
+      [] (PCONDITION_VARIABLE cvar, PSRWLOCK lock, DWORD time, ULONG flags) {
+        Thread *current_thread = Thread::get_current_thread();
+        int thread_index = current_thread->get_pstats_index();
+        BOOL result;
+        if (thread_index >= 0) {
+          PStatClient *client = PStatClient::get_global_pstats();
+          double start = client->get_real_time();
+          result = SleepConditionVariableSRW(cvar, lock, time, flags);
+          double stop = client->get_real_time();
+          client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop);
+          client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1);
+        }
+        else {
+          result = SleepConditionVariableSRW(cvar, lock, time, flags);
+        }
+        return result;
+      };
+#endif
+
+#ifdef HAVE_POSIX_THREADS
+    ConditionVarPosixImpl::_wait_func =
+      [] (pthread_cond_t *cvar, pthread_mutex_t *lock) {
+        Thread *current_thread = Thread::get_current_thread();
+        int thread_index = current_thread->get_pstats_index();
+        int result;
+        if (thread_index >= 0) {
+          PStatClient *client = PStatClient::get_global_pstats();
+          double start = client->get_real_time();
+          result = pthread_cond_wait(cvar, lock);
+          double stop = client->get_real_time();
+          client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop);
+          client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1);
+        }
+        else {
+          result = pthread_cond_wait(cvar, lock);
+        }
+        return result;
+      };
+
+    ConditionVarPosixImpl::_timedwait_func =
+      [] (pthread_cond_t *cvar, pthread_mutex_t *lock,
+          const struct timespec *ts) {
+        Thread *current_thread = Thread::get_current_thread();
+        int thread_index = current_thread->get_pstats_index();
+        int result;
+        if (thread_index >= 0) {
+          PStatClient *client = PStatClient::get_global_pstats();
+          double start = client->get_real_time();
+          result = pthread_cond_timedwait(cvar, lock, ts);
+          double stop = client->get_real_time();
+          client->start_stop(_wait_cvar_pcollector.get_index(), thread_index, start, stop);
+          client->add_level(_cswitch_cvar_pcollector.get_index(), thread_index, 1);
+        }
+        else {
+          result = pthread_cond_timedwait(cvar, lock, ts);
+        }
+        return result;
+      };
+#endif
+  }
+
   // Wait for the server hello.
   // Wait for the server hello.
   while (!_got_udp_port) {
   while (!_got_udp_port) {
     transmit_control_data();
     transmit_control_data();
@@ -137,6 +248,23 @@ client_connect(std::string hostname, int port) {
  */
  */
 void PStatClientImpl::
 void PStatClientImpl::
 client_disconnect() {
 client_disconnect() {
+  if (_thread_profiling) {
+    // Switch the functions back to what they were.
+    Thread::_sleep_func = &ThreadImpl::sleep;
+    Thread::_yield_func = &ThreadImpl::yield;
+
+#ifdef _WIN32
+    ConditionVarWin32Impl::_wait_func = &SleepConditionVariableSRW;
+#endif
+
+#ifdef HAVE_POSIX_THREADS
+    ConditionVarPosixImpl::_wait_func = &pthread_cond_wait;
+    ConditionVarPosixImpl::_timedwait_func = &pthread_cond_timedwait;
+#endif
+
+    _thread_profiling = false;
+  }
+
   if (_is_connected) {
   if (_is_connected) {
 #ifdef DEBUG_THREADS
 #ifdef DEBUG_THREADS
     MutexDebug::decrement_pstats();
     MutexDebug::decrement_pstats();
@@ -207,6 +335,37 @@ new_frame(int thread_index, int frame_number) {
     if (frame_number == -1) {
     if (frame_number == -1) {
       frame_number = pthread->_frame_number;
       frame_number = pthread->_frame_number;
     }
     }
+
+    // Record the number of context switches on this thread.
+    if (_thread_profiling) {
+      size_t total, involuntary;
+      PT(Thread) thread = pthread->_thread.lock();
+      if (thread != nullptr && thread->get_context_switches(total, involuntary)) {
+        size_t total_this_frame = total - pthread->_context_switches;
+        pthread->_context_switches = total;
+        frame_data.add_level(_cswitch_pcollector.get_index(), total_this_frame);
+
+        if (involuntary != 0) {
+          size_t involuntary_this_frame = involuntary - pthread->_involuntary_context_switches;
+          pthread->_involuntary_context_switches = involuntary;
+          frame_data.add_level(_cswitch_involuntary_pcollector.get_index(), involuntary_this_frame);
+        }
+      }
+      _client->clear_level(_cswitch_sleep_pcollector.get_index(), thread_index);
+      _client->clear_level(_cswitch_yield_pcollector.get_index(), thread_index);
+      _client->clear_level(_cswitch_cvar_pcollector.get_index(), thread_index);
+    }
+  }
+  else if (_thread_profiling) {
+    // Record the initial number of context switches.
+    PT(Thread) thread = pthread->_thread.lock();
+    if (thread != nullptr) {
+      thread->get_context_switches(pthread->_context_switches,
+                                   pthread->_involuntary_context_switches);
+    }
+    _client->clear_level(_cswitch_sleep_pcollector.get_index(), thread_index);
+    _client->clear_level(_cswitch_yield_pcollector.get_index(), thread_index);
+    _client->clear_level(_cswitch_cvar_pcollector.get_index(), thread_index);
   }
   }
 
 
   pthread->_frame_data.clear();
   pthread->_frame_data.clear();

+ 2 - 0
panda/src/pstatclient/pStatClientImpl.h

@@ -111,6 +111,8 @@ private:
   double _udp_count_factor;
   double _udp_count_factor;
   unsigned int _tcp_count;
   unsigned int _tcp_count;
   unsigned int _udp_count;
   unsigned int _udp_count;
+
+  bool _thread_profiling = false;
 };
 };
 
 
 #include "pStatClientImpl.I"
 #include "pStatClientImpl.I"