Browse Source

pstats: Integrate Python profiler with PStats

Enabled with `pstats-python-profiler 1`, requires recent version of PStats server.

Currently, it is limited to the main thread only.  Support for other threads may be added at a later date.
rdb 3 years ago
parent
commit
50cfdebd9c

+ 6 - 0
dtool/src/interrogatedb/py_compat.h

@@ -213,6 +213,12 @@ INLINE PyObject *_PyLong_Lshift(PyObject *a, size_t shiftby) {
 }
 #endif
 
+/* Python 3.9 */
+
+#ifndef PyCFunction_CheckExact
+#  define PyCFunction_CheckExact(op) (Py_TYPE(op) == &PyCFunction_Type)
+#endif
+
 #if PY_VERSION_HEX < 0x03090000
 INLINE EXPCL_PYPANDA PyObject *PyObject_CallNoArgs(PyObject *func) {
   return _PyObject_CallNoArg(func);

+ 3 - 0
dtool/src/parser-inc/Python.h

@@ -25,6 +25,9 @@ typedef _object PyObject;
 struct _typeobject;
 typedef _typeobject PyTypeObject;
 
+struct _frame;
+typedef _frame PyFrameObject;
+
 typedef struct {} PyStringObject;
 typedef struct {} PyUnicodeObject;
 

+ 2 - 0
makepanda/makepanda.py

@@ -4228,6 +4228,7 @@ if (not RUNTIME):
   IGATEFILES.remove("config_pstats.h")
   TargetAdd('libp3pstatclient.in', opts=OPTS, input=IGATEFILES)
   TargetAdd('libp3pstatclient.in', opts=['IMOD:panda3d.core', 'ILIB:libp3pstatclient', 'SRCDIR:panda/src/pstatclient'])
+  PyTargetAdd('p3pstatclient_pStatClient_ext.obj', opts=OPTS, input='pStatClient_ext.cxx')
 
 #
 # DIRECTORY: panda/src/gobj/
@@ -4707,6 +4708,7 @@ if (not RUNTIME):
   PyTargetAdd('core.pyd', input='p3pnmimage_pfmFile_ext.obj')
   PyTargetAdd('core.pyd', input='p3event_asyncFuture_ext.obj')
   PyTargetAdd('core.pyd', input='p3event_pythonTask.obj')
+  PyTargetAdd('core.pyd', input='p3pstatclient_pStatClient_ext.obj')
   PyTargetAdd('core.pyd', input='p3gobj_ext_composite.obj')
   PyTargetAdd('core.pyd', input='p3pgraph_ext_composite.obj')
   PyTargetAdd('core.pyd', input='p3display_ext_composite.obj')

+ 10 - 0
panda/src/pstatclient/config_pstatclient.cxx

@@ -82,6 +82,16 @@ ConfigVariableBool pstats_gpu_timing
           "is not usually an accurate reflectino of how long the actual "
           "operation takes on the video card."));
 
+ConfigVariableBool pstats_python_profiler
+("pstats-python-profiler", false,
+ PRC_DESC("Set this true to integrate with the Python profiler to show "
+          "detailed information about individual Python functions in "
+          "PStats, similar to the information offered by Python's built-in "
+          "profiler.  This can be really useful to find bottlenecks in a "
+          "Python program, but enabling this will slow down the application "
+          "somewhat, and requires a recent version of the PStats server, so "
+          "it is not enabled by default."));
+
 // The rest are different in that they directly control the server, not the
 // client.
 ConfigVariableBool pstats_scroll_mode

+ 1 - 0
panda/src/pstatclient/config_pstatclient.h

@@ -38,6 +38,7 @@ extern EXPCL_PANDA_PSTATCLIENT ConfigVariableString pstats_host;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableInt pstats_port;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_target_frame_rate;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_gpu_timing;
+extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_python_profiler;
 
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_scroll_mode;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_history;

+ 7 - 4
panda/src/pstatclient/pStatClient.h

@@ -29,6 +29,7 @@
 #include "atomicAdjust.h"
 #include "numeric_types.h"
 #include "bitArray.h"
+#include "extension.h"
 
 class PStatClientImpl;
 class PStatCollector;
@@ -88,8 +89,8 @@ PUBLISHED:
   MAKE_PROPERTY(current_thread, get_current_thread);
   MAKE_PROPERTY(real_time, get_real_time);
 
-  INLINE static bool connect(const std::string &hostname = std::string(), int port = -1);
-  INLINE static void disconnect();
+  EXTEND INLINE static bool connect(const std::string &hostname = std::string(), int port = -1);
+  EXTEND INLINE static void disconnect();
   INLINE static bool is_connected();
 
   INLINE static void resume_after_pause();
@@ -99,8 +100,8 @@ PUBLISHED:
 
   void client_main_tick();
   void client_thread_tick(const std::string &sync_name);
-  bool client_connect(std::string hostname, int port);
-  void client_disconnect();
+  EXTEND bool client_connect(std::string hostname, int port);
+  EXTEND void client_disconnect();
   bool client_is_connected() const;
 
   void client_resume_after_pause();
@@ -254,6 +255,8 @@ private:
   friend class PStatThread;
   friend class PStatClientImpl;
   friend class GraphicsStateGuardian;
+
+  friend class Extension<PStatClient>;
 };
 
 #include "pStatClient.I"

+ 7 - 0
panda/src/pstatclient/pStatClientImpl.cxx

@@ -399,6 +399,13 @@ send_hello() {
   message._major_version = get_current_pstat_major_version();
   message._minor_version = get_current_pstat_minor_version();
 
+  // The Python profiling feature may send nested start/stop pairs, so requires
+  // a server version capable of dealing with this.
+  if (pstats_python_profiler && message._major_version <= 3) {
+    message._major_version = 3;
+    message._minor_version = std::max(message._minor_version, 1);
+  }
+
   Datagram datagram;
   message.encode(datagram);
   _writer.send(datagram, _tcp_connection, true);

+ 31 - 0
panda/src/pstatclient/pStatClient_ext.I

@@ -0,0 +1,31 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file pStatClient_ext.I
+ * @author rdb
+ * @date 2022-11-29
+ */
+
+/**
+ * Attempts to establish a connection to the indicated PStatServer.  Returns
+ * true if successful, false on failure.
+ */
+INLINE bool Extension<PStatClient>::
+connect(const std::string &hostname, int port) {
+  PStatClient *client = PStatClient::get_global_pstats();
+  return invoke_extension<PStatClient>(client).client_connect(hostname, port);
+}
+
+/**
+ * Closes the connection previously established.
+ */
+INLINE void Extension<PStatClient>::
+disconnect() {
+  PStatClient *client = PStatClient::get_global_pstats();
+  invoke_extension<PStatClient>(client).client_disconnect();
+}

+ 339 - 0
panda/src/pstatclient/pStatClient_ext.cxx

@@ -0,0 +1,339 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file pStatClient_ext.cxx
+ * @author rdb
+ * @date 2022-11-23
+ */
+
+#include "pStatClient_ext.h"
+#include "pStatCollector.h"
+#include "config_pstatclient.h"
+
+#include "frameobject.h"
+
+#if PY_VERSION_HEX >= 0x03060000
+static bool _python_profiler_enabled = false;
+
+// Used to cache stuff onto PyCodeObjects.
+static Py_ssize_t _extra_index = -1;
+
+// Stores a mapping between C method definitions and collector indices.
+static pmap<PyMethodDef *, int> _c_method_collectors;
+
+// Parent collector for all Python profiling collectors.
+static PStatCollector code_collector("App:Python");
+
+/**
+ * Walks up the type hierarchy to find the class where the method originates.
+ */
+static bool
+find_method(PyTypeObject *&cls, PyObject *name, PyCodeObject *code) {
+  PyObject *meth = _PyType_Lookup(cls, name);
+  if (meth == nullptr || !PyFunction_Check(meth) ||
+      PyFunction_GET_CODE(meth) != (PyObject *)code) {
+    return false;
+  }
+
+  if (cls->tp_bases != nullptr) {
+    Py_ssize_t size = PyTuple_GET_SIZE(cls->tp_bases);
+    for (Py_ssize_t i = 0; i < size; ++i) {
+      PyTypeObject *base = (PyTypeObject *)PyTuple_GET_ITEM(cls->tp_bases, i);
+
+      if (find_method(base, name, code)) {
+        cls = base;
+        return true;
+      }
+    }
+  }
+
+  // Didn't find it in any of the bases, it must be defined here.
+  return true;
+}
+
+/**
+ * Returns the collector for a Python frame.
+ */
+static int
+#ifdef __GNUC__
+__attribute__ ((noinline))
+#elif defined(_MSC_VER)
+__declspec(noinline)
+#endif
+make_python_frame_collector(PyFrameObject *frame, PyCodeObject *code) {
+#if PY_VERSION_HEX >= 0x030B0000 // 3.11
+  // Fetch the module name out of the frame's global scope.
+  PyObject *globals = PyFrame_GetGlobals(frame);
+  PyObject *py_mod_name = PyDict_GetItemString(globals, "__name__");
+  Py_DECREF(globals);
+
+  const char *mod_name = py_mod_name ? PyUnicode_AsUTF8(py_mod_name) : "<unknown>";
+  const char *meth_name = PyUnicode_AsUTF8(code->co_qualname);
+  char buffer[1024];
+  size_t len = snprintf(buffer, sizeof(buffer), "%s:%s", mod_name, meth_name);
+  for (size_t i = 0; i < len - 1; ++i) {
+    if (buffer[i] == '.') {
+      buffer[i] = ':';
+    }
+  }
+#else
+  // Try to figure out the type name.  There's no obvious way to do this.
+  // It's possible that the first argument passed to this function is the
+  // self instance or the current type (for a classmethod), but we have to
+  // double-check that to make sure.
+  PyTypeObject *cls = nullptr;
+  if (code->co_argcount >= 1) {
+    PyFrame_FastToLocals(frame);
+    PyObject *first_arg = PyDict_GetItem(frame->f_locals, PyTuple_GET_ITEM(code->co_varnames, 0));
+    cls = PyType_Check(first_arg) ? (PyTypeObject *)first_arg : Py_TYPE(first_arg);
+    if ((cls->tp_flags & Py_TPFLAGS_HEAPTYPE) != 0) {
+      // Mangling scheme for methods starting (but not ending) with "__"
+      PyObject *meth_name = code->co_name;
+      Py_ssize_t len = PyUnicode_GET_LENGTH(meth_name);
+      if (len >= 2 && PyUnicode_READ_CHAR(meth_name, 0) == '_' && PyUnicode_READ_CHAR(meth_name, 1) == '_' &&
+          (len < 4 || PyUnicode_READ_CHAR(meth_name, len - 1) != '_' || PyUnicode_READ_CHAR(meth_name, len - 2) != '_')) {
+        const char *cls_name = cls->tp_name;
+        while (cls_name[0] == '_') {
+          ++cls_name;
+        }
+        meth_name = PyUnicode_FromFormat("_%s%S", cls_name, meth_name);
+      } else {
+        Py_INCREF(meth_name);
+      }
+      if (!find_method(cls, meth_name, code)) {
+        // Not a matching method object, it's something else.  Forget it.
+        cls = nullptr;
+      }
+      Py_DECREF(meth_name);
+    } else {
+      cls = nullptr;
+    }
+  }
+
+  // Fetch the module name out of the frame's global scope.
+  PyObject *py_mod_name = PyDict_GetItemString(frame->f_globals, "__name__");
+  if (py_mod_name == nullptr && cls != nullptr) {
+    py_mod_name = PyDict_GetItemString(cls->tp_dict, "__module__");
+  }
+
+  const char *mod_name = py_mod_name ? PyUnicode_AsUTF8(py_mod_name) : "<unknown>";
+  char buffer[1024];
+  size_t len = snprintf(buffer, sizeof(buffer), "%s:", mod_name);
+  for (size_t i = 0; i < len - 1; ++i) {
+    if (buffer[i] == '.') {
+      buffer[i] = ':';
+    }
+  }
+
+  const char *meth_name = PyUnicode_AsUTF8(code->co_name);
+  if (cls != nullptr) {
+    len += snprintf(buffer + len, sizeof(buffer) - len, "%s:%s", cls->tp_name, meth_name);
+  } else {
+    len += snprintf(buffer + len, sizeof(buffer) - len, "%s", meth_name);
+  }
+#endif
+
+  // Add parentheses, unless it's something special like <listcomp>
+  if (len < sizeof(buffer) - 2 && buffer[len - 1] != '>') {
+    buffer[len++] = '(';
+    buffer[len++] = ')';
+    buffer[len] = '\0';
+  }
+
+  PStatCollector collector(code_collector, buffer);
+  intptr_t collector_index = collector.get_index();
+  if (_extra_index != -1) {
+    _PyCode_SetExtra((PyObject *)code, _extra_index, (void *)collector_index);
+  }
+  return collector_index;
+}
+
+/**
+ * Creates a collector for a C function.
+ */
+static int
+#ifdef __GNUC__
+__attribute__ ((noinline))
+#elif defined(_MSC_VER)
+__declspec(noinline)
+#endif
+make_c_function_collector(PyCFunctionObject *meth) {
+  char buffer[1024];
+  size_t len;
+  if (meth->m_self != nullptr && !PyModule_Check(meth->m_self)) {
+    PyTypeObject *cls = PyType_Check(meth->m_self) ? (PyTypeObject *)meth->m_self : Py_TYPE(meth->m_self);
+
+    const char *dot = strrchr(cls->tp_name, '.');
+    if (dot != nullptr) {
+      // The module name is included in the type name.
+      snprintf(buffer, sizeof(buffer), "%s:%s()", cls->tp_name, meth->m_ml->ml_name);
+      len = (dot - cls->tp_name) + 1;
+    } else {
+      // If there's no module name, we need to get it from __module__.
+      PyObject *py_mod_name = cls->tp_dict ? PyDict_GetItemString(cls->tp_dict, "__module__") : nullptr;
+      const char *mod_name;
+      if (py_mod_name != nullptr) {
+        mod_name = PyUnicode_AsUTF8(py_mod_name);
+      } else {
+        // Is it a built-in, like int or dict?
+        PyObject *builtins = PyEval_GetBuiltins();
+        if (PyDict_GetItemString(builtins, cls->tp_name) == (PyObject *)cls) {
+          mod_name = "builtins";
+        } else {
+          mod_name = "<unknown>";
+        }
+      }
+      len = snprintf(buffer, sizeof(buffer), "%s:%s:%s()", mod_name, cls->tp_name, meth->m_ml->ml_name) - 2;
+    }
+  }
+  else if (meth->m_self != nullptr) {
+    const char *mod_name = PyModule_GetName(meth->m_self);
+    len = snprintf(buffer, sizeof(buffer), "%s:%s()", mod_name, meth->m_ml->ml_name) - 2;
+  }
+  else {
+    snprintf(buffer, sizeof(buffer), "%s()", meth->m_ml->ml_name);
+    len = 0;
+  }
+  for (size_t i = 0; i < len; ++i) {
+    if (buffer[i] == '.') {
+      buffer[i] = ':';
+    }
+  }
+  PStatCollector collector(code_collector, buffer);
+  int collector_index = collector.get_index();
+  _c_method_collectors[meth->m_ml] = collector.get_index();
+  return collector_index;
+}
+#endif  // PY_VERSION_HEX
+
+/**
+ * Attempts to establish a connection to the indicated PStatServer.  Returns
+ * true if successful, false on failure.
+ */
+bool Extension<PStatClient>::
+client_connect(std::string hostname, int port) {
+#if PY_VERSION_HEX >= 0x03060000
+  extern struct Dtool_PyTypedObject Dtool_PStatThread;
+
+  if (_this->client_connect(std::move(hostname), port)) {
+    // Pass a PStatThread as argument.
+    if (!_python_profiler_enabled && pstats_python_profiler) {
+      PStatThread *thread = new PStatThread(_this->get_current_thread());
+      PyObject *arg = DTool_CreatePyInstance((void *)thread, Dtool_PStatThread, true, false);
+      if (_extra_index == -1) {
+        _extra_index = _PyEval_RequestCodeExtraIndex(nullptr);
+      }
+      PyEval_SetProfile(&trace_callback, arg);
+      _python_profiler_enabled = false;
+    }
+    return true;
+  }
+  else if (_python_profiler_enabled) {
+    PyEval_SetProfile(nullptr, nullptr);
+    _python_profiler_enabled = false;
+  }
+  return false;
+#else
+  return _this->client_connect(std::move(hostname), port);
+#endif
+}
+
+/**
+ * Closes the connection previously established.
+ */
+void Extension<PStatClient>::
+client_disconnect() {
+  _this->client_disconnect();
+#if PY_VERSION_HEX >= 0x03060000
+  if (_python_profiler_enabled) {
+    PyEval_SetProfile(nullptr, nullptr);
+    _python_profiler_enabled = false;
+  }
+#endif
+}
+
+#if PY_VERSION_HEX >= 0x03060000
+/**
+ * Callback passed to PyEval_SetProfile.
+ */
+int Extension<PStatClient>::
+trace_callback(PyObject *py_thread, PyFrameObject *frame, int what, PyObject *arg) {
+  intptr_t collector_index;
+
+  if (what == PyTrace_CALL || what == PyTrace_RETURN || what == PyTrace_EXCEPTION) {
+    // Normal Python frame entry/exit.
+#if PY_VERSION_HEX >= 0x030B0000 // 3.11
+    PyCodeObject *code = PyFrame_GetCode(frame);
+#else
+    PyCodeObject *code = frame->f_code;
+#endif
+
+    // The index for this collector is cached on the code object.
+    if (_PyCode_GetExtra((PyObject *)code, _extra_index, (void **)&collector_index) != 0 || collector_index == 0) {
+      collector_index = make_python_frame_collector(frame, code);
+    }
+
+#if PY_VERSION_HEX >= 0x030B0000 // 3.11
+    Py_DECREF(code);
+#endif
+  } else if (what == PyTrace_C_CALL || what == PyTrace_C_RETURN || what == PyTrace_C_EXCEPTION) {
+    // Call to a C function or method, which has no frame of its own.
+    if (PyCFunction_CheckExact(arg)) {
+      PyCFunctionObject *meth = (PyCFunctionObject *)arg;
+      auto it = _c_method_collectors.find(meth->m_ml);
+      if (it != _c_method_collectors.end()) {
+        collector_index = it->second;
+      } else {
+        collector_index = make_c_function_collector(meth);
+      }
+    } else {
+      return 0;
+    }
+  } else {
+    return 0;
+  }
+
+  if (collector_index <= 0) {
+    return 0;
+  }
+
+  PStatThread &pthread = *(PStatThread *)DtoolInstance_VOID_PTR(py_thread);
+  PStatClient *client = pthread.get_client();
+  if (!client->client_is_connected()) {
+    // Client was disconnected, disable Python profiling.
+    PyEval_SetProfile(nullptr, nullptr);
+    _python_profiler_enabled = false;
+    return 0;
+  }
+
+  int thread_index = pthread.get_index();
+
+#ifdef _DEBUG
+  nassertr(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors), -1);
+  nassertr(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads), -1);
+#endif
+
+  PStatClient::Collector *collector = client->get_collector_ptr(collector_index);
+  PStatClient::InternalThread *thread = client->get_thread_ptr(thread_index);
+
+  if (collector->is_active() && thread->_is_active) {
+    double as_of = client->get_real_time();
+
+    LightMutexHolder holder(thread->_thread_lock);
+    if (thread->_thread_active) {
+      if (what == PyTrace_CALL || what == PyTrace_C_CALL) {
+        thread->_frame_data.add_start(collector_index, as_of);
+      } else {
+        thread->_frame_data.add_stop(collector_index, as_of);
+      }
+    }
+  }
+
+  return 0;
+}
+#endif  // PY_VERSION_HEX

+ 49 - 0
panda/src/pstatclient/pStatClient_ext.h

@@ -0,0 +1,49 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file pStatClient_ext.h
+ * @author rdb
+ * @date 2022-11-23
+ */
+
+#ifndef PSTATCLIENT_EXT_H
+#define PSTATCLIENT_EXT_H
+
+#include "dtoolbase.h"
+
+#ifdef HAVE_PYTHON
+
+#include "extension.h"
+#include "pStatClient.h"
+#include "py_panda.h"
+
+typedef struct _frame PyFrameObject;
+
+/**
+ * This class defines the extension methods for PStatClient, which are called
+ * instead of any C++ methods with the same prototype.
+ */
+template<>
+class Extension<PStatClient> : public ExtensionBase<PStatClient> {
+public:
+  INLINE static bool connect(const std::string &hostname = std::string(), int port = -1);
+  INLINE static void disconnect();
+
+  bool client_connect(std::string hostname, int port);
+  void client_disconnect();
+
+private:
+  static int trace_callback(PyObject *py_thread, PyFrameObject *frame,
+                            int what, PyObject *arg);
+};
+
+#include "pStatClient_ext.I"
+
+#endif  // HAVE_PYTHON
+
+#endif  // PSTATCLIENT_EXT_H

+ 1 - 3
panda/src/pstatclient/pStatCollector.h

@@ -43,11 +43,9 @@ class Thread;
 class EXPCL_PANDA_PSTATCLIENT PStatCollector {
 #ifdef DO_PSTATS
 
-private:
-  INLINE PStatCollector(PStatClient *client, int index);
-
 public:
   PStatCollector() = default;
+  INLINE PStatCollector(PStatClient *client, int index);
 
 PUBLISHED:
   INLINE explicit PStatCollector(const std::string &name,

+ 8 - 0
panda/src/pstatclient/pStatThread.I

@@ -82,3 +82,11 @@ INLINE int PStatThread::
 get_index() const {
   return _index;
 }
+
+/**
+ *
+ */
+INLINE PStatClient *PStatThread::
+get_client() const {
+  return _client;
+}

+ 3 - 0
panda/src/pstatclient/pStatThread.h

@@ -45,6 +45,9 @@ PUBLISHED:
   MAKE_PROPERTY(thread, get_thread);
   MAKE_PROPERTY(index, get_index);
 
+public:
+  PStatClient *get_client() const;
+
 private:
   PStatClient *_client;
   int _index;