瀏覽代碼

make PStats more thread-friendly

David Rose 19 年之前
父節點
當前提交
9e986fce40

+ 2 - 0
panda/src/collide/collisionLevelState.h

@@ -103,6 +103,8 @@ private:
   BoundingVolumes _parent_bounds;
 
   static PStatCollector _node_volume_pcollector;
+
+  friend class CollisionTraverser;
 };
 
 #include "collisionLevelState.I"

+ 15 - 4
panda/src/collide/collisionPlane.I

@@ -51,9 +51,20 @@ CollisionPlane(const CollisionPlane &copy) :
 {
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: CollisionPlane::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void CollisionPlane::
+flush_level() {
+  _volume_pcollector.flush_level();
+  _test_pcollector.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionPlane::get_normal
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE LVector3f CollisionPlane::
@@ -63,7 +74,7 @@ get_normal() const {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionPlane::dist_to_plane
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE float CollisionPlane::
@@ -73,7 +84,7 @@ dist_to_plane(const LPoint3f &point) const {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionPlane::set_plane
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionPlane::
@@ -85,7 +96,7 @@ set_plane(const Planef &plane) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionPlane::get_plane
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE const Planef &CollisionPlane::

+ 2 - 0
panda/src/collide/collisionPlane.h

@@ -49,6 +49,8 @@ public:
 
   virtual void output(ostream &out) const;
 
+  INLINE static void flush_level();
+
 PUBLISHED:
   INLINE LVector3f get_normal() const;
   INLINE float dist_to_plane(const LPoint3f &point) const;

+ 11 - 0
panda/src/collide/collisionPolygon.I

@@ -104,6 +104,17 @@ verify_points(const LPoint3f &a, const LPoint3f &b,
   return verify_points(array, array + 4);
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: CollisionPolygon::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void CollisionPolygon::
+flush_level() {
+  _volume_pcollector.flush_level();
+  _test_pcollector.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionPolygon::to_2d
 //       Access: Private

+ 2 - 0
panda/src/collide/collisionPolygon.h

@@ -71,6 +71,8 @@ public:
   virtual void output(ostream &out) const;
   virtual void write(ostream &out, int indent_level = 0) const;
 
+  INLINE static void flush_level();
+
 protected:
   virtual PT(BoundingVolume) compute_internal_bounds() const;
 

+ 16 - 5
panda/src/collide/collisionSphere.I

@@ -64,9 +64,20 @@ CollisionSphere(const CollisionSphere &copy) :
 {
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: CollisionSphere::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void CollisionSphere::
+flush_level() {
+  _volume_pcollector.flush_level();
+  _test_pcollector.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionSphere::set_center
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionSphere::
@@ -78,7 +89,7 @@ set_center(const LPoint3f &center) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionSphere::set_center
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionSphere::
@@ -88,7 +99,7 @@ set_center(float x, float y, float z) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionSphere::get_center
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE const LPoint3f &CollisionSphere::
@@ -98,7 +109,7 @@ get_center() const {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionSphere::set_radius
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionSphere::
@@ -111,7 +122,7 @@ set_radius(float radius) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionSphere::get_radius
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE float CollisionSphere::

+ 2 - 0
panda/src/collide/collisionSphere.h

@@ -50,6 +50,8 @@ public:
 
   virtual void output(ostream &out) const;
 
+  INLINE static void flush_level();
+
 PUBLISHED:
   INLINE void set_center(const LPoint3f &center);
   INLINE void set_center(float x, float y, float z);

+ 14 - 0
panda/src/collide/collisionTraverser.cxx

@@ -23,6 +23,10 @@
 #include "collisionGeom.h"
 #include "collisionRecorder.h"
 #include "collisionVisualizer.h"
+#include "collisionSphere.h"
+#include "collisionTube.h"
+#include "collisionPolygon.h"
+#include "collisionPlane.h"
 #include "config_collide.h"
 #include "boundingSphere.h"
 #include "transformState.h"
@@ -294,6 +298,16 @@ traverse(const NodePath &root) {
     get_recorder()->end_traversal();
   }
   #endif  // DO_COLLISION_RECORDING
+
+  CollisionLevelState::_node_volume_pcollector.flush_level();
+  _cnode_volume_pcollector.flush_level();
+  _gnode_volume_pcollector.flush_level();
+  _geom_volume_pcollector.flush_level();
+
+  CollisionSphere::flush_level();
+  CollisionTube::flush_level();
+  CollisionPolygon::flush_level();
+  CollisionPlane::flush_level();
 }
 
 ////////////////////////////////////////////////////////////////////

+ 19 - 8
panda/src/collide/collisionTube.I

@@ -70,9 +70,20 @@ CollisionTube(const CollisionTube &copy) :
   recalc_internals();
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: CollisionTube::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void CollisionTube::
+flush_level() {
+  _volume_pcollector.flush_level();
+  _test_pcollector.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::set_point_a
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionTube::
@@ -83,7 +94,7 @@ set_point_a(const LPoint3f &a) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::set_point_a
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionTube::
@@ -93,7 +104,7 @@ set_point_a(float x, float y, float z) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::get_point_a
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE const LPoint3f &CollisionTube::
@@ -103,7 +114,7 @@ get_point_a() const {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::set_point_b
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionTube::
@@ -114,7 +125,7 @@ set_point_b(const LPoint3f &b) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::set_point_b
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionTube::
@@ -124,7 +135,7 @@ set_point_b(float x, float y, float z) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::get_point_b
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE const LPoint3f &CollisionTube::
@@ -134,7 +145,7 @@ get_point_b() const {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::set_radius
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE void CollisionTube::
@@ -150,7 +161,7 @@ set_radius(float radius) {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: CollisionTube::get_radius
-//       Access: Public
+//       Access: Published
 //  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE float CollisionTube::

+ 2 - 0
panda/src/collide/collisionTube.h

@@ -54,6 +54,8 @@ public:
 
   virtual void output(ostream &out) const;
 
+  INLINE static void flush_level();
+
 PUBLISHED:
   INLINE void set_point_a(const LPoint3f &a);
   INLINE void set_point_a(float x, float y, float z);

+ 3 - 3
panda/src/cull/cullBinOcclusionTest.cxx

@@ -247,9 +247,9 @@ draw() {
     _pending_nodes.pop_front();
   }
 
-  _occlusion_previous_pcollector.add_level(num_drawn_previous);
-  _occlusion_passed_pcollector.add_level(num_drawn);
-  _occlusion_failed_pcollector.add_level(_num_objects - (num_drawn_previous + num_drawn));
+  _occlusion_previous_pcollector.add_level_now(num_drawn_previous);
+  _occlusion_passed_pcollector.add_level_now(num_drawn);
+  _occlusion_failed_pcollector.add_level_now(_num_objects - (num_drawn_previous + num_drawn));
 
   // Now, store a list of the objects within OctreeNodes that passed
   // the occlusion test this frame, so we can ensure that they are

+ 7 - 0
panda/src/display/graphicsEngine.cxx

@@ -34,6 +34,8 @@
 #include "cullFaceAttrib.h"
 #include "string_utils.h"
 #include "geomCacheManager.h"
+#include "renderState.h"
+#include "transformState.h"
 #include "thread.h"
 #include "pipeline.h"
 #include "throw_event.h"
@@ -627,6 +629,11 @@ render_frame() {
 
 #endif  // THREADED_PIPELINE && DO_PSTATS
 
+  GeomCacheManager::flush_level();
+  CullTraverser::flush_level();
+  RenderState::flush_level();
+  TransformState::flush_level();
+
   // Now cycle the pipeline and officially begin the next frame.
 #ifdef THREADED_PIPELINE
   {

+ 18 - 2
panda/src/display/graphicsStateGuardian.cxx

@@ -61,7 +61,6 @@ PStatCollector GraphicsStateGuardian::_vertices_tristrip_pcollector("Vertices:Tr
 PStatCollector GraphicsStateGuardian::_vertices_trifan_pcollector("Vertices:Triangle fans");
 PStatCollector GraphicsStateGuardian::_vertices_tri_pcollector("Vertices:Triangles");
 PStatCollector GraphicsStateGuardian::_vertices_other_pcollector("Vertices:Other");
-PStatCollector GraphicsStateGuardian::_vertices_indexed_tristrip_pcollector("Vertices:Indexed triangle strips");
 PStatCollector GraphicsStateGuardian::_state_pcollector("State changes");
 PStatCollector GraphicsStateGuardian::_transform_state_pcollector("State changes:Transforms");
 PStatCollector GraphicsStateGuardian::_texture_state_pcollector("State changes:Textures");
@@ -1124,6 +1123,24 @@ end_scene() {
 void GraphicsStateGuardian::
 end_frame() {
   _prepared_objects->end_frame();
+
+  // Flush any PStatCollectors.
+  _data_transferred_pcollector.flush_level();
+
+  _primitive_batches_pcollector.flush_level();
+  _primitive_batches_tristrip_pcollector.flush_level();
+  _primitive_batches_trifan_pcollector.flush_level();
+  _primitive_batches_tri_pcollector.flush_level();
+  _primitive_batches_other_pcollector.flush_level();
+  _vertices_tristrip_pcollector.flush_level();
+  _vertices_trifan_pcollector.flush_level();
+  _vertices_tri_pcollector.flush_level();
+  _vertices_other_pcollector.flush_level();
+
+  _state_pcollector.flush_level();
+  _texture_state_pcollector.flush_level();
+  _transform_state_pcollector.flush_level();
+  _draw_primitive_pcollector.flush_level();
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1951,7 +1968,6 @@ init_frame_pstats() {
     _vertices_trifan_pcollector.clear_level();
     _vertices_tri_pcollector.clear_level();
     _vertices_other_pcollector.clear_level();
-    _vertices_indexed_tristrip_pcollector.clear_level();
     
     _state_pcollector.clear_level();
     _transform_state_pcollector.clear_level();

+ 5 - 0
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -1296,6 +1296,11 @@ end_frame() {
 
   GraphicsStateGuardian::end_frame();
 
+  // Flush any PCollectors specific to this kind of GSG.
+  _primitive_batches_display_list_pcollector.flush_level();
+  _vertices_display_list_pcollector.flush_level();
+  _vertices_immediate_pcollector.flush_level();
+
   // Now is a good time to delete any pending display lists.
   {
     MutexHolder holder(_lock);

+ 14 - 0
panda/src/gobj/geomCacheManager.I

@@ -58,3 +58,17 @@ INLINE int GeomCacheManager::
 get_total_size() const {
   return _total_size;
 }
+
+////////////////////////////////////////////////////////////////////
+//     Function: GeomCacheManager::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void GeomCacheManager::
+flush_level() {
+  _geom_cache_size_pcollector.flush_level();
+  _geom_cache_active_pcollector.flush_level();
+  _geom_cache_record_pcollector.flush_level();
+  _geom_cache_erase_pcollector.flush_level();
+  _geom_cache_evict_pcollector.flush_level();
+}

+ 1 - 0
panda/src/gobj/geomCacheManager.h

@@ -59,6 +59,7 @@ PUBLISHED:
 
 public:
   void evict_old_entries();
+  INLINE static void flush_level();
 
 private:
   // This mutex protects all operations on this object, especially the

+ 13 - 1
panda/src/pgraph/cullTraverser.I

@@ -286,8 +286,20 @@ set_portal_clipper(PortalClipper *portal_clipper) {
 //       Access: Public
 //  Description: Returns the _portal_clipper pointer
 ////////////////////////////////////////////////////////////////////
-
 INLINE PortalClipper *CullTraverser::
 get_portal_clipper() const {
   return _portal_clipper;
 }
+
+////////////////////////////////////////////////////////////////////
+//     Function: CullTraverser::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void CullTraverser::
+flush_level() {
+  _nodes_pcollector.flush_level();
+  _geom_nodes_pcollector.flush_level();
+  _geoms_pcollector.flush_level();
+  _geoms_occluded_pcollector.flush_level();
+}

+ 2 - 0
panda/src/pgraph/cullTraverser.h

@@ -88,6 +88,8 @@ public:
   void traverse(CullTraverserData &data);
   void traverse_below(CullTraverserData &data);
 
+  INLINE static void flush_level();
+
 public:
   // Statistics
   static PStatCollector _nodes_pcollector;

+ 11 - 0
panda/src/pgraph/renderState.I

@@ -580,6 +580,17 @@ set_shader_expansion(ShaderExpansion *exp) {
   _shader_expansion = exp;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void RenderState::
+flush_level() {
+  _node_counter.flush_level();
+  _cache_counter.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderState::CompositionCycleDescEntry::Constructor
 //       Access: Public

+ 2 - 0
panda/src/pgraph/renderState.h

@@ -155,6 +155,8 @@ public:
   INLINE ShaderExpansion *get_shader_expansion() const;
   INLINE void set_shader_expansion(ShaderExpansion *exp);
 
+  INLINE static void flush_level();
+
 private:
   class CompositionCycleDescEntry {
   public:

+ 11 - 0
panda/src/pgraph/transformState.I

@@ -740,6 +740,17 @@ node_unref() const {
 #endif  // DO_PSTATS
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: TransformState::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void TransformState::
+flush_level() {
+  _node_counter.flush_level();
+  _cache_counter.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: TransformState::check_singular
 //       Access: Private

+ 2 - 0
panda/src/pgraph/transformState.h

@@ -190,6 +190,8 @@ PUBLISHED:
 public:
   static void init_states();
 
+  INLINE static void flush_level();
+
 private:
   class CompositionCycleDescEntry {
   public:

+ 36 - 16
panda/src/pstatclient/pStatClient.I

@@ -80,7 +80,7 @@ get_max_rate() const {
 INLINE int PStatClient::
 get_num_collectors() const {
   ReMutexHolder holder(_lock);
-  return _collectors.size();
+  return _num_collectors;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -90,10 +90,9 @@ get_num_collectors() const {
 ////////////////////////////////////////////////////////////////////
 INLINE PStatCollectorDef *PStatClient::
 get_collector_def(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_collectors.size(), NULL);
+  nassertr(index >= 0 && index < _num_collectors, NULL);
 
-  return _collectors[index].get_def(this, index);
+  return get_collector_ptr(index)->get_def(this, index);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -105,7 +104,7 @@ get_collector_def(int index) const {
 INLINE int PStatClient::
 get_num_threads() const {
   ReMutexHolder holder(_lock);
-  return _threads.size();
+  return _num_threads;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -115,9 +114,8 @@ get_num_threads() const {
 ////////////////////////////////////////////////////////////////////
 INLINE string PStatClient::
 get_thread_name(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_threads.size(), string());
-  return _threads[index]._name;
+  nassertr(index >= 0 && index < AtomicAdjust::get(_num_threads), string());
+  return get_thread_ptr(index)->_name;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -127,9 +125,8 @@ get_thread_name(int index) const {
 ////////////////////////////////////////////////////////////////////
 INLINE string PStatClient::
 get_thread_sync_name(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_threads.size(), string());
-  return _threads[index]._sync_name;
+  nassertr(index >= 0 && index < AtomicAdjust::get(_num_threads), string());
+  return get_thread_ptr(index)->_sync_name;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -140,12 +137,12 @@ get_thread_sync_name(int index) const {
 ////////////////////////////////////////////////////////////////////
 INLINE Thread *PStatClient::
 get_thread_object(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_threads.size(), NULL);
-  if (_threads[index]._thread.was_deleted()) {
+  nassertr(index >= 0 && index < AtomicAdjust::get(_num_threads), NULL);
+  InternalThread *thread = get_thread_ptr(index);
+  if (thread->_thread.was_deleted()) {
     return NULL;
   }
-  return _threads[index]._thread;
+  return thread->_thread;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -227,7 +224,6 @@ client_connect(string hostname, int port) {
 ////////////////////////////////////////////////////////////////////
 INLINE bool PStatClient::
 client_is_connected() const {
-  ReMutexHolder holder(_lock);
   return has_impl() && _impl->client_is_connected();
 }
 
@@ -286,6 +282,30 @@ get_impl() const {
   return ((PStatClient *)this)->get_impl();
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: PStatClient::get_collector_ptr
+//       Access: Private
+//  Description: Returns the nth collector in a thread-safe manner,
+//               even if _lock is not held.
+////////////////////////////////////////////////////////////////////
+INLINE PStatClient::Collector *PStatClient::
+get_collector_ptr(int collector_index) const {
+  CollectorPointer *collectors = (CollectorPointer *)AtomicAdjust::get_ptr(_collectors);
+  return collectors[collector_index];
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatClient::get_thread_ptr
+//       Access: Private
+//  Description: Returns the nth thread in a thread-safe manner,
+//               even if _lock is not held.
+////////////////////////////////////////////////////////////////////
+INLINE PStatClient::InternalThread *PStatClient::
+get_thread_ptr(int thread_index) const {
+  ThreadPointer *threads = (ThreadPointer *)AtomicAdjust::get_ptr(_threads);
+  return threads[thread_index];
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatClient::Collector::Constructor
 //       Access: Public

+ 247 - 219
panda/src/pstatclient/pStatClient.cxx

@@ -30,13 +30,11 @@
 #include "config_pstats.h"
 #include "pStatProperties.h"
 #include "thread.h"
-#include "mutexDebug.h"
 
 PStatCollector PStatClient::_total_size_pcollector("Memory usage");
 PStatCollector PStatClient::_cpp_size_pcollector("Memory usage:C++");
 PStatCollector PStatClient::_interpreter_size_pcollector("Memory usage:Interpreter");
 PStatCollector PStatClient::_pstats_pcollector("*:PStats");
-PStatCollector PStatClient::_mutex_wait_pcollector("Wait:Mutex block");
 
 PStatClient *PStatClient::_global_pstats = NULL;
 
@@ -62,22 +60,25 @@ PStatClient::
 PStatClient() :
   _impl(NULL)
 {
+  _collectors = NULL;
+  _collectors_size = 0;
+  _num_collectors = 0;
+
+  _threads = NULL;
+  _threads_size = 0;
+  _num_threads = 0;
+
   // We always have a collector at index 0 named "Frame".  This tracks
   // the total frame time and is the root of all other collectors.  We
   // have to make this one by hand since it's the root.
-  Collector collector(0, "Frame");
-  //collector._def = new PStatCollectorDef(0, "Frame");
-  //collector._def->_parent_index = 0;
-  //collector._def->_suggested_color.set(0.5, 0.5, 0.5);
-  _collectors.push_back(collector);
+  Collector *collector = new Collector(0, "Frame");
+  //collector->_def = new PStatCollectorDef(0, "Frame");
+  //collector->_def->_parent_index = 0;
+  //collector->_def->_suggested_color.set(0.5, 0.5, 0.5);
+  add_collector(collector);
 
   // The main thread is always at index 0.
   make_thread(Thread::get_main_thread());
-
-  // Assign the hacky callback pointers into MutexDebug.
-#ifdef DEBUG_THREADS
-  MutexDebug::set_pstats_callbacks(&mutex_wait_start, &mutex_wait_stop);
-#endif  // DEBUG_THREADS
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -101,8 +102,7 @@ PStatClient::
 ////////////////////////////////////////////////////////////////////
 PStatCollector PStatClient::
 get_collector(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_collectors.size(), PStatCollector());
+  nassertr(index >= 0 && index < AtomicAdjust::get(_num_collectors), PStatCollector());
   return PStatCollector((PStatClient *)this, index);
 }
 
@@ -113,10 +113,9 @@ get_collector(int index) const {
 ////////////////////////////////////////////////////////////////////
 string PStatClient::
 get_collector_name(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_collectors.size(), string());
+  nassertr(index >= 0 && index < AtomicAdjust::get(_num_collectors), string());
 
-  return _collectors[index].get_name();
+  return get_collector_ptr(index)->get_name();
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -129,15 +128,15 @@ get_collector_name(int index) const {
 ////////////////////////////////////////////////////////////////////
 string PStatClient::
 get_collector_fullname(int index) const {
-  ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_collectors.size(), string());
+  nassertr(index >= 0 && index < AtomicAdjust::get(_num_collectors), string());
 
-  int parent_index = _collectors[index].get_parent_index();
+  Collector *collector = get_collector_ptr(index);
+  int parent_index = collector->get_parent_index();
   if (parent_index == 0) {
-    return _collectors[index].get_name();
+    return collector->get_name();
   } else {
     return get_collector_fullname(parent_index) + ":" + 
-      _collectors[index].get_name();
+      collector->get_name();
   }
 }
 
@@ -149,7 +148,7 @@ get_collector_fullname(int index) const {
 PStatThread PStatClient::
 get_thread(int index) const {
   ReMutexHolder holder(_lock);
-  nassertr(index >= 0 && index < (int)_threads.size(), PStatThread());
+  nassertr(index >= 0 && index < _num_threads, PStatThread());
   return PStatThread((PStatClient *)this, index);
 }
 
@@ -286,19 +285,21 @@ client_disconnect() {
     _impl->client_disconnect();
   }
 
-  Threads::iterator ti;
-  for (ti = _threads.begin(); ti != _threads.end(); ++ti) {
-    (*ti)._frame_number = 0;
-    (*ti)._is_active = false;
-    (*ti)._next_packet = 0.0;
-    (*ti)._frame_data.clear();
+  ThreadPointer *threads = (ThreadPointer *)_threads;
+  for (int ti = 0; ti < _num_threads; ++ti) {
+    InternalThread *thread = threads[ti];
+    thread->_frame_number = 0;
+    thread->_is_active = false;
+    thread->_next_packet = 0.0;
+    thread->_frame_data.clear();
   }
 
-  Collectors::iterator ci;
-  for (ci = _collectors.begin(); ci != _collectors.end(); ++ci) {
+  CollectorPointer *collectors = (CollectorPointer *)_collectors;
+  for (int ci = 0; ci < _num_collectors; ++ci) {
+    Collector *collector = collectors[ci];
     PerThread::iterator ii;
-    for (ii = (*ci)._per_thread.begin();
-         ii != (*ci)._per_thread.end();
+    for (ii = collector->_per_thread.begin();
+         ii != collector->_per_thread.end();
          ++ii) {
       (*ii)._nested_count = 0;
     }
@@ -376,44 +377,41 @@ PStatCollector PStatClient::
 make_collector_with_name(int parent_index, const string &name) {
   ReMutexHolder holder(_lock);
 
-  nassertr(parent_index >= 0 && parent_index < (int)_collectors.size(),
+  nassertr(parent_index >= 0 && parent_index < _num_collectors,
            PStatCollector());
 
-  Collector &parent = _collectors[parent_index];
+  Collector *parent = get_collector_ptr(parent_index);
 
   // A special case: if we asked for a child the same name as its
   // parent, we really meant the parent.  That is, "Frame:Frame" is
   // really the same collector as "Frame".
-  if (parent.get_name() == name) {
+  if (parent->get_name() == name) {
     return PStatCollector(this, parent_index);
   }
 
-  ThingsByName::const_iterator ni = parent._children.find(name);
+  ThingsByName::const_iterator ni = parent->_children.find(name);
 
-  if (ni != parent._children.end()) {
+  if (ni != parent->_children.end()) {
     // We already had a collector by this name; return it.
     int index = (*ni).second;
-    nassertr(index >= 0 && index < (int)_collectors.size(), PStatCollector());
+    nassertr(index >= 0 && index < _num_collectors, PStatCollector());
     return PStatCollector(this, (*ni).second);
   }
 
   // Create a new collector for this name.
-  int new_index = _collectors.size();
-  parent._children.insert(ThingsByName::value_type(name, new_index));
-
-  // Extending the vector invalidates the parent reference, above.
-  _collectors.push_back(Collector(parent_index, name));
+  int new_index = _num_collectors;
+  parent->_children.insert(ThingsByName::value_type(name, new_index));
 
-  Collector &collector = _collectors.back();
-
-  // collector._def = new PStatCollectorDef(new_index, name);
-  // collector._def->set_parent(*_collectors[parent_index]._def);
-  // initialize_collector_def(this, collector._def);
+  Collector *collector = new Collector(parent_index, name);
+  // collector->_def = new PStatCollectorDef(new_index, name);
+  // collector->_def->set_parent(*_collectors[parent_index]._def);
+  // initialize_collector_def(this, collector->_def);
 
   // We need one PerThreadData for each thread.
-  while (collector._per_thread.size() < _threads.size()) {
-    collector._per_thread.push_back(PerThreadData());
+  while (collector->_per_thread.size() < _num_threads) {
+    collector->_per_thread.push_back(PerThreadData());
   }
+  add_collector(collector);
 
   return PStatCollector(this, new_index);
 }
@@ -473,11 +471,12 @@ do_make_thread(Thread *thread) {
          vi != indices.end(); 
          ++vi) {
       int index = (*vi);
-      nassertr(index >= 0 && index < (int)_threads.size(), PStatThread());
-      if (_threads[index]._thread.was_deleted() &&
-          _threads[index]._sync_name == thread->get_sync_name()) {
+      nassertr(index >= 0 && index < _num_threads, PStatThread());
+      ThreadPointer *threads = (ThreadPointer *)_threads;
+      if (threads[index]->_thread.was_deleted() &&
+          threads[index]->_sync_name == thread->get_sync_name()) {
         // Yes, re-use this one.
-        _threads[index]._thread = thread;
+        threads[index]->_thread = thread;
         thread->set_pstats_index(index);
         return PStatThread(this, index);
       }
@@ -485,27 +484,28 @@ do_make_thread(Thread *thread) {
   }
 
   // Create a new PStatsThread for this thread pointer.
-  int new_index = _threads.size();
+  int new_index = _num_threads;
   thread->set_pstats_index(new_index);
   _threads_by_name[thread->get_name()].push_back(new_index);
   _threads_by_sync_name[thread->get_sync_name()].push_back(new_index);
         
-  InternalThread pthread;
-  pthread._thread = thread;
-  pthread._name = thread->get_name();
-  pthread._sync_name = thread->get_sync_name();
-  pthread._is_active = false;
-  pthread._next_packet = 0.0;
-  pthread._frame_number = 0;
+  InternalThread *pthread = new InternalThread;
+  pthread->_thread = thread;
+  pthread->_name = thread->get_name();
+  pthread->_sync_name = thread->get_sync_name();
+  pthread->_is_active = false;
+  pthread->_next_packet = 0.0;
+  pthread->_frame_number = 0;
 
-  _threads.push_back(pthread);
+  add_thread(pthread);
 
   // We need an additional PerThreadData for this thread in all of the
   // collectors.
-  Collectors::iterator ci;
-  for (ci = _collectors.begin(); ci != _collectors.end(); ++ci) {
-    (*ci)._per_thread.push_back(PerThreadData());
-    nassertr((*ci)._per_thread.size() == _threads.size(), PStatThread());
+  CollectorPointer *collectors = (CollectorPointer *)_collectors;
+  for (int ci = 0; ci < _num_collectors; ++ci) {
+    Collector *collector = collectors[ci];
+    collector->_per_thread.push_back(PerThreadData());
+    nassertr(collector->_per_thread.size() == _num_threads, PStatThread());
   }
 
   return PStatThread(this, new_index);
@@ -523,14 +523,12 @@ do_make_thread(Thread *thread) {
 ////////////////////////////////////////////////////////////////////
 bool PStatClient::
 is_active(int collector_index, int thread_index) const {
-  ReMutexHolder holder(_lock);
-
-  nassertr(collector_index >= 0 && collector_index < (int)_collectors.size(), false);
-  nassertr(thread_index >= 0 && thread_index < (int)_threads.size(), false);
+  nassertr(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors), false);
+  nassertr(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads), false);
 
   return (client_is_connected() &&
-          _collectors[collector_index].is_active() &&
-          _threads[thread_index]._is_active);
+          get_collector_ptr(collector_index)->is_active() &&
+          get_thread_ptr(thread_index)->_is_active);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -544,14 +542,24 @@ is_active(int collector_index, int thread_index) const {
 ////////////////////////////////////////////////////////////////////
 bool PStatClient::
 is_started(int collector_index, int thread_index) const {
-  ReMutexHolder holder(_lock);
+  nassertr(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors), false);
+  nassertr(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads), false);
+
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
 
-  nassertr(collector_index >= 0 && collector_index < (int)_collectors.size(), false);
-  nassertr(thread_index >= 0 && thread_index < (int)_threads.size(), false);
+  if (client_is_connected() && collector->is_active() && thread->_is_active) {
+    MutexHolder holder(thread->_thread_lock);
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
+      // Not started.
+      return false;
+    }
+    // Started.
+    return true;
+  }
 
-  return (_collectors[collector_index].is_active() &&
-          _threads[thread_index]._is_active &&
-          _collectors[collector_index]._per_thread[thread_index]._nested_count != 0);
+  // Not even connected.
+  return false;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -563,23 +571,23 @@ is_started(int collector_index, int thread_index) const {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 start(int collector_index, int thread_index) {
-  ReMutexHolder holder(_lock);
-
 #ifdef _DEBUG
-  nassertv(collector_index >= 0 && collector_index < (int)_collectors.size());
-  nassertv(thread_index >= 0 && thread_index < (int)_threads.size());
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
 #endif
 
-  if (client_is_connected() && 
-      _collectors[collector_index].is_active() &&
-      _threads[thread_index]._is_active) {
-    if (_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+
+  if (client_is_connected() && collector->is_active() && thread->_is_active) {
+    MutexHolder holder(thread->_thread_lock);
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
       // This collector wasn't already started in this thread; record
       // a new data point.
-      _threads[thread_index]._frame_data.add_start(collector_index, 
-                                                   get_clock().get_real_time());
+      thread->_frame_data.add_start(collector_index, 
+                                    get_clock().get_real_time());
     }
-    _collectors[collector_index]._per_thread[thread_index]._nested_count++;
+    collector->_per_thread[thread_index]._nested_count++;
   }
 }
 
@@ -592,22 +600,22 @@ start(int collector_index, int thread_index) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 start(int collector_index, int thread_index, float as_of) {
-  ReMutexHolder holder(_lock);
-
 #ifdef _DEBUG
-  nassertv(collector_index >= 0 && collector_index < (int)_collectors.size());
-  nassertv(thread_index >= 0 && thread_index < (int)_threads.size());
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
 #endif
 
-  if (client_is_connected() && 
-      _collectors[collector_index].is_active() &&
-      _threads[thread_index]._is_active) {
-    if (_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+
+  if (client_is_connected() && collector->is_active() && thread->_is_active) {
+    MutexHolder holder(thread->_thread_lock);
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
       // This collector wasn't already started in this thread; record
       // a new data point.
-      _threads[thread_index]._frame_data.add_start(collector_index, as_of);
+      thread->_frame_data.add_start(collector_index, as_of);
     }
-    _collectors[collector_index]._per_thread[thread_index]._nested_count++;
+    collector->_per_thread[thread_index]._nested_count++;
   }
 }
 
@@ -620,17 +628,17 @@ start(int collector_index, int thread_index, float as_of) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 stop(int collector_index, int thread_index) {
-  ReMutexHolder holder(_lock);
-
 #ifdef _DEBUG
-  nassertv(collector_index >= 0 && collector_index < (int)_collectors.size());
-  nassertv(thread_index >= 0 && thread_index < (int)_threads.size());
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
 #endif
 
-  if (client_is_connected() && 
-      _collectors[collector_index].is_active() &&
-      _threads[thread_index]._is_active) {
-    if (_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+
+  if (client_is_connected() && collector->is_active() && thread->_is_active) {
+    MutexHolder holder(thread->_thread_lock);
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
       if (pstats_cat.is_debug()) {
         pstats_cat.debug()
           << "Collector " << get_collector_fullname(collector_index)
@@ -640,13 +648,13 @@ stop(int collector_index, int thread_index) {
       return;
     }
 
-    _collectors[collector_index]._per_thread[thread_index]._nested_count--;
+    collector->_per_thread[thread_index]._nested_count--;
 
-    if (_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
       // This collector has now been completely stopped; record a new
       // data point.
-      _threads[thread_index]._frame_data.add_stop(collector_index,
-                                                  get_clock().get_real_time());
+      thread->_frame_data.add_stop(collector_index,
+                                   get_clock().get_real_time());
     }
   }
 }
@@ -660,17 +668,17 @@ stop(int collector_index, int thread_index) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 stop(int collector_index, int thread_index, float as_of) {
-  ReMutexHolder holder(_lock);
-
 #ifdef _DEBUG
-  nassertv(collector_index >= 0 && collector_index < (int)_collectors.size());
-  nassertv(thread_index >= 0 && thread_index < (int)_threads.size());
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
 #endif
 
-  if (client_is_connected() &&
-      _collectors[collector_index].is_active() &&
-      _threads[thread_index]._is_active) {
-    if (_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+
+  if (client_is_connected() && collector->is_active() && thread->_is_active) {
+    MutexHolder holder(thread->_thread_lock);
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
       if (pstats_cat.is_debug()) {
         pstats_cat.debug()
           << "Collector " << get_collector_fullname(collector_index)
@@ -680,12 +688,12 @@ stop(int collector_index, int thread_index, float as_of) {
       return;
     }
 
-    _collectors[collector_index]._per_thread[thread_index]._nested_count--;
+    collector->_per_thread[thread_index]._nested_count--;
 
-    if (_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
+    if (collector->_per_thread[thread_index]._nested_count == 0) {
       // This collector has now been completely stopped; record a new
       // data point.
-      _threads[thread_index]._frame_data.add_stop(collector_index, as_of);
+      thread->_frame_data.add_stop(collector_index, as_of);
     }
   }
 }
@@ -702,10 +710,17 @@ stop(int collector_index, int thread_index, float as_of) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 clear_level(int collector_index, int thread_index) {
-  ReMutexHolder holder(_lock);
+#ifdef _DEBUG
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
+#endif
+
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+  MutexHolder holder(thread->_thread_lock);
 
-  _collectors[collector_index]._per_thread[thread_index]._has_level = false;
-  _collectors[collector_index]._per_thread[thread_index]._level = 0.0;
+  collector->_per_thread[thread_index]._has_level = true;
+  collector->_per_thread[thread_index]._level = 0.0;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -719,15 +734,24 @@ clear_level(int collector_index, int thread_index) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 set_level(int collector_index, int thread_index, float level) {
-  ReMutexHolder holder(_lock);
+#ifdef _DEBUG
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
+#endif
+
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
 
   // We don't want to condition this on whether the client is already
   // connected or the collector is already active, since we might
   // connect the client later, and we will want to have an accurate
   // value at that time.
-  level *= get_collector_def(collector_index)->_factor;
-  _collectors[collector_index]._per_thread[thread_index]._has_level = true;
-  _collectors[collector_index]._per_thread[thread_index]._level = level;
+  MutexHolder holder(thread->_thread_lock);
+
+  level *= collector->get_def(this, collector_index)->_factor;
+
+  collector->_per_thread[thread_index]._has_level = true;
+  collector->_per_thread[thread_index]._level = level;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -743,11 +767,19 @@ set_level(int collector_index, int thread_index, float level) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 add_level(int collector_index, int thread_index, float increment) {
-  ReMutexHolder holder(_lock);
+#ifdef _DEBUG
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
+#endif
+
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+  MutexHolder holder(thread->_thread_lock);
+
+  increment *= collector->get_def(this, collector_index)->_factor;
 
-  increment *= get_collector_def(collector_index)->_factor;
-  _collectors[collector_index]._per_thread[thread_index]._has_level = true;
-  _collectors[collector_index]._per_thread[thread_index]._level += increment;
+  collector->_per_thread[thread_index]._has_level = true;
+  collector->_per_thread[thread_index]._level += increment;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -760,99 +792,92 @@ add_level(int collector_index, int thread_index, float increment) {
 ////////////////////////////////////////////////////////////////////
 float PStatClient::
 get_level(int collector_index, int thread_index) const {
-  ReMutexHolder holder(_lock);
+#ifdef _DEBUG
+  nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
+  nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
+#endif
+
+  Collector *collector = get_collector_ptr(collector_index);
+  InternalThread *thread = get_thread_ptr(thread_index);
+  MutexHolder holder(thread->_thread_lock);
 
-  return _collectors[collector_index]._per_thread[thread_index]._level /
-    get_collector_def(collector_index)->_factor;
+  float factor = collector->get_def(this, collector_index)->_factor;
+
+  return collector->_per_thread[thread_index]._level / factor;
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: PStatClient::mutex_wait_start
-//       Access: Private, Static
-//  Description: A special-purpose callback function we use to hack a
-//               PStatTimer into the MutexDebug system.  This will be
-//               called (when DEBUG_THREADS is set) when the thread
-//               goes to sleep on a mutex.
+//     Function: PStatClient::add_collector
+//       Access: Private
+//  Description: Adds a new Collector entry to the _collectors array,
+//               in a thread-safe manner.  Assumes _lock is already
+//               held.
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
-mutex_wait_start() {
-#ifdef DEBUG_THREADS
-  // We replicate the code from start(), except we do not grab the
-  // mutex.  We can't, because that would be recursive (this function
-  // is called from deep with Mutex::lock()).  However, we don't need
-  // to, because this code is only called when the global mutex is
-  // held (the global mutex only exists in the case of MUTEX_DEBUG).
-  PStatClient *self = get_global_pstats();
-  int collector_index = _mutex_wait_pcollector._index;
-  int thread_index = self->do_get_current_thread()._index;
-
-#ifdef _DEBUG
-  nassertv(collector_index >= 0 && collector_index < (int)self->_collectors.size());
-  nassertv(thread_index >= 0 && thread_index < (int)self->_threads.size());
-#endif
+add_collector(PStatClient::Collector *collector) {
+  if (_num_collectors >= _collectors_size) {
+    // We need to grow the array.  We have to be careful here, because
+    // there might be clients accessing the array right now who are
+    // not protected by the lock.
+    int new_collectors_size = (_collectors_size == 0) ? 128 : _collectors_size * 2;
+    CollectorPointer *new_collectors = new CollectorPointer[new_collectors_size];
+    memcpy(new_collectors, _collectors, _num_collectors * sizeof(CollectorPointer));
+    AtomicAdjust::set_ptr(_collectors, new_collectors);
+    AtomicAdjust::set(_collectors_size, new_collectors_size);
+
+    // Now, we still have the old array, which we allow to leak.  We
+    // should delete it, but there might be a thread out there that's
+    // still trying to access it, so we can't safely delete it; and it
+    // doesn't really matter much, since it's not a big leak.  (We
+    // will only reallocate the array so many times in an application,
+    // and then no more.)
+
+    new_collectors[_num_collectors] = collector;
+    AtomicAdjust::inc(_num_collectors);
 
-  bool is_connected = self->has_impl() && self->_impl->client_is_connected();
-  if (is_connected && 
-      self->_collectors[collector_index].is_active() &&
-      self->_threads[thread_index]._is_active) {
-    if (self->_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
-      // This collector wasn't already started in this thread; record
-      // a new data point.
-      self->_threads[thread_index]._frame_data.add_start(collector_index, 
-                                                         self->_impl->get_clock().get_real_time());
-    }
-    self->_collectors[collector_index]._per_thread[thread_index]._nested_count++;
+  } else {
+    CollectorPointer *collectors = (CollectorPointer *)_collectors;
+    collectors[_num_collectors] = collector;
+    AtomicAdjust::inc(_num_collectors);
   }
-#endif  // DEBUG_THREADS
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: PStatClient::mutex_wait_stop
-//       Access: Private, Static
-//  Description: A special-purpose callback function we use to hack a
-//               PStatTimer into the MutexDebug system.  This will be
-//               called (when DEBUG_THREADS is set) when the thread
-//               wakes up after blocking on a mutex.
+//     Function: PStatClient::add_thread
+//       Access: Private
+//  Description: Adds a new InternalThread entry to the _threads
+//               array, in a thread-safe manner.  Assumes _lock is
+//               already held.
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
-mutex_wait_stop() {
-#ifdef DEBUG_THREADS
-  // As above, we replicate the code from stop(), except we do not
-  // grab the mutex.
-
-  PStatClient *self = get_global_pstats();
-  int collector_index = _mutex_wait_pcollector._index;
-  int thread_index = self->do_get_current_thread()._index;
-
-#ifdef _DEBUG
-  nassertv(collector_index >= 0 && collector_index < (int)self->_collectors.size());
-  nassertv(thread_index >= 0 && thread_index < (int)self->_threads.size());
-#endif
-
-  bool is_connected = self->has_impl() && self->_impl->client_is_connected();
-  if (is_connected && 
-      self->_collectors[collector_index].is_active() &&
-      self->_threads[thread_index]._is_active) {
-    if (self->_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
-      if (pstats_cat.is_debug()) {
-        pstats_cat.debug()
-          << "Collector " << self->get_collector_fullname(collector_index)
-          << " was already stopped in thread " << self->get_thread_name(thread_index)
-          << "!\n";
-      }
-      return;
-    }
+add_thread(PStatClient::InternalThread *thread) {
+  if (_num_threads >= _threads_size) {
+    // We need to grow the array.  We have to be careful here, because
+    // there might be clients accessing the array right now who are
+    // not protected by the lock.
+    int new_threads_size = (_threads_size == 0) ? 128 : _threads_size * 2;
+    ThreadPointer *new_threads = new ThreadPointer[new_threads_size];
+    memcpy(new_threads, _threads, _num_threads * sizeof(ThreadPointer));
+    // We assume that assignment to a pointer and to an int are each
+    // atomic.
+    AtomicAdjust::set_ptr(_threads, new_threads);
+    AtomicAdjust::set(_threads_size, new_threads_size);
+
+    // Now, we still have the old array, which we allow to leak.  We
+    // should delete it, but there might be a thread out there that's
+    // still trying to access it, so we can't safely delete it; and it
+    // doesn't really matter much, since it's not a big leak.  (We
+    // will only reallocate the array so many times in an application,
+    // and then no more.)
+
+    new_threads[_num_threads] = thread;
+    AtomicAdjust::inc(_num_threads);
 
-    self->_collectors[collector_index]._per_thread[thread_index]._nested_count--;
-    if (self->_collectors[collector_index]._per_thread[thread_index]._nested_count == 0) {
-      // This collector has now been completely stopped; record a new
-      // data point.
-      self->_threads[thread_index]._frame_data.add_stop(collector_index,
-                                                        self->_impl->get_clock().get_real_time());
-    }
+  } else {
+    ThreadPointer *threads = (ThreadPointer *)_threads;
+    threads[_num_threads] = thread;
+    AtomicAdjust::inc(_num_threads);
   }
-
-#endif // DEBUG_THREADS
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -862,13 +887,16 @@ mutex_wait_stop() {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::Collector::
 make_def(const PStatClient *client, int this_index) {
-  _def = new PStatCollectorDef(this_index, _name);
-  if (_parent_index != this_index) {
-    const PStatCollectorDef *parent_def = 
-      client->_collectors[_parent_index].get_def(client, _parent_index);
-    _def->set_parent(*parent_def);
+  ReMutexHolder holder(client->_lock);
+  if (_def == (PStatCollectorDef *)NULL) {
+    _def = new PStatCollectorDef(this_index, _name);
+    if (_parent_index != this_index) {
+      const PStatCollectorDef *parent_def = 
+        client->get_collector_def(_parent_index);
+      _def->set_parent(*parent_def);
+    }
+    initialize_collector_def(client, _def);
   }
-  initialize_collector_def(client, _def);
 }
 
 #endif // DO_PSTATS

+ 25 - 7
panda/src/pstatclient/pStatClient.h

@@ -25,11 +25,15 @@
 #include "pStatClientImpl.h"
 #include "pStatCollectorDef.h"
 #include "reMutex.h"
+#include "pmutex.h"
 #include "reMutexHolder.h"
+#include "mutexHolder.h"
 #include "pmap.h"
 #include "thread.h"
 #include "weakPointerTo.h"
 #include "vector_int.h"
+#include "atomicAdjust.h"
+#include "numeric_types.h"
 
 class PStatCollector;
 class PStatCollectorDef;
@@ -124,8 +128,13 @@ private:
   void add_level(int collector_index, int thread_index, float increment);
   float get_level(int collector_index, int thread_index) const;
 
-  static void mutex_wait_start();
-  static void mutex_wait_stop();
+  class Collector;
+  class InternalThread;
+  void add_collector(Collector *collector);
+  void add_thread(InternalThread *thread);
+
+  INLINE Collector *get_collector_ptr(int collector_index) const;
+  INLINE InternalThread *get_thread_ptr(int thread_index) const;
 
 private:
   // This mutex protects everything in this class.
@@ -175,8 +184,10 @@ private:
     ThingsByName _children;
     PerThread _per_thread;
   };
-  typedef pvector<Collector> Collectors;
-  Collectors _collectors;
+  typedef Collector *CollectorPointer;
+  void *_collectors;  // CollectorPointer *_collectors;
+  PN_int32 _collectors_size;  // size of the allocated array
+  PN_int32 _num_collectors;   // number of in-use elements within the array
 
   // This defines a single thread, i.e. a separate chain of execution,
   // independent of all other threads.  Timing and level data are
@@ -190,9 +201,16 @@ private:
     bool _is_active;
     int _frame_number;
     float _next_packet;
+
+    // This mutex is used to protect writes to _frame_data for this
+    // particular thread, as well as writes to the _per_thread data
+    // for this particular thread in the Collector class, above.
+    Mutex _thread_lock;
   };
-  typedef pvector<InternalThread> Threads;
-  Threads _threads;
+  typedef InternalThread *ThreadPointer;
+  void *_threads;  // ThreadPointer *_threads;
+  PN_int32 _threads_size;  // size of the allocated array
+  PN_int32 _num_threads;   // number of in-use elements within the array
 
   PStatClientImpl *_impl;
 
@@ -200,10 +218,10 @@ private:
   static PStatCollector _cpp_size_pcollector;
   static PStatCollector _interpreter_size_pcollector;
   static PStatCollector _pstats_pcollector;
-  static PStatCollector _mutex_wait_pcollector;
 
   static PStatClient *_global_pstats;
 
+  friend class Collector;
   friend class PStatCollector;
   friend class PStatThread;
   friend class PStatClientImpl;

+ 27 - 22
panda/src/pstatclient/pStatClientImpl.cxx

@@ -165,9 +165,9 @@ client_disconnect() {
 ////////////////////////////////////////////////////////////////////
 void PStatClientImpl::
 new_frame(int thread_index) {
-  nassertv(thread_index >= 0 && thread_index < (int)_client->_threads.size());
+  nassertv(thread_index >= 0 && thread_index < _client->_num_threads);
 
-  PStatClient::InternalThread &pthread = _client->_threads[thread_index];
+  PStatClient::InternalThread *pthread = _client->get_thread_ptr(thread_index);
 
   // If we're the main thread, we should exchange control packets with
   // the server.
@@ -178,34 +178,36 @@ new_frame(int thread_index) {
   // If we've got the UDP port by the time the frame starts, it's
   // time to become active and start actually tracking data.
   if (_got_udp_port) {
-    pthread._is_active = true;
+    pthread->_is_active = true;
   }
 
-  if (!pthread._is_active) {
+  if (!pthread->_is_active) {
     return;
   }
 
   float frame_start = _clock.get_real_time();
 
-  if (!pthread._frame_data.is_empty()) {
+  if (!pthread->_frame_data.is_empty()) {
     // Collector 0 is the whole frame.
     _client->stop(0, thread_index, frame_start);
 
     // Fill up the level data for all the collectors who have level
     // data for this pthread.
-    int num_collectors = _client->_collectors.size();
+    int num_collectors = _client->_num_collectors;
+    PStatClient::CollectorPointer *collectors = 
+      (PStatClient::CollectorPointer *)_client->_collectors;
     for (int i = 0; i < num_collectors; i++) {
       const PStatClient::PerThreadData &ptd = 
-        _client->_collectors[i]._per_thread[thread_index];
+        collectors[i]->_per_thread[thread_index];
       if (ptd._has_level) {
-        pthread._frame_data.add_level(i, ptd._level);
+        pthread->_frame_data.add_level(i, ptd._level);
       }
     }
     transmit_frame_data(thread_index);
   }
 
-  pthread._frame_data.clear();
-  pthread._frame_number++;
+  pthread->_frame_data.clear();
+  pthread->_frame_number++;
   _client->start(0, thread_index, frame_start);
 
   // Also record the time for the PStats operation itself.
@@ -222,15 +224,16 @@ new_frame(int thread_index) {
 ////////////////////////////////////////////////////////////////////
 void PStatClientImpl::
 transmit_frame_data(int thread_index) {
-  nassertv(thread_index >= 0 && thread_index < (int)_client->_threads.size());
-  if (_is_connected && _client->_threads[thread_index]._is_active) {
+  nassertv(thread_index >= 0 && thread_index < _client->_num_threads);
+  PStatClient::InternalThread *thread = _client->get_thread_ptr(thread_index);
+  if (_is_connected && thread->_is_active) {
 
     // We don't want to send too many packets in a hurry and flood the
     // server.  Check that enough time has elapsed for us to send a
     // new packet.  If not, we'll drop this packet on the floor and
     // send a new one next time around.
     float now = _clock.get_real_time();
-    if (now >= _client->_threads[thread_index]._next_packet) {
+    if (now >= thread->_next_packet) {
       // We don't want to send more than _max_rate UDP-size packets
       // per second, per thread.
       float packet_delay = 1.0 / _max_rate;
@@ -242,8 +245,8 @@ transmit_frame_data(int thread_index) {
       datagram.add_uint8(0);
 
       datagram.add_uint16(thread_index);
-      datagram.add_uint32(_client->_threads[thread_index]._frame_number);
-      _client->_threads[thread_index]._frame_data.write_datagram(datagram);
+      datagram.add_uint32(thread->_frame_number);
+      thread->_frame_data.write_datagram(datagram);
 
       if (_writer.is_valid_for_udp(datagram)) {
         if (_udp_count * _udp_count_factor < _tcp_count * _tcp_count_factor) {
@@ -280,7 +283,7 @@ transmit_frame_data(int thread_index) {
         packet_delay *= (float)packet_ratio;
       }
 
-      _client->_threads[thread_index]._next_packet = now + packet_delay;
+      thread->_next_packet = now + packet_delay;
     }
   }
 }
@@ -365,18 +368,18 @@ void PStatClientImpl::
 report_new_collectors() {
   nassertv(_is_connected);
 
-  if (_collectors_reported < (int)_client->_collectors.size()) {
+  if (_collectors_reported < _client->_num_collectors) {
     // Empirically, we determined that you can't send more than about
     // 1400 collectors at once without exceeding the 64K limit on a
     // single datagram.  So we limit ourselves here to sending only
     // half that many.
     static const int max_collectors_at_once = 700;
 
-    while (_collectors_reported < (int)_client->_collectors.size()) {
+    while (_collectors_reported < _client->_num_collectors) {
       PStatClientControlMessage message;
       message._type = PStatClientControlMessage::T_define_collectors;
       int i = 0;
-      while (_collectors_reported < (int)_client->_collectors.size() &&
+      while (_collectors_reported < _client->_num_collectors &&
              i < max_collectors_at_once) {
         message._collectors.push_back(_client->get_collector_def(_collectors_reported));
         _collectors_reported++;
@@ -400,12 +403,14 @@ void PStatClientImpl::
 report_new_threads() {
   nassertv(_is_connected);
 
-  if (_threads_reported < (int)_client->_threads.size()) {
+  if (_threads_reported < _client->_num_threads) {
     PStatClientControlMessage message;
     message._type = PStatClientControlMessage::T_define_threads;
     message._first_thread_index = _threads_reported;
-    while (_threads_reported < (int)_client->_threads.size()) {
-      message._names.push_back(_client->_threads[_threads_reported]._name);
+    PStatClient::ThreadPointer *threads = 
+      (PStatClient::ThreadPointer *)_client->_threads;
+    while (_threads_reported < _client->_num_threads) {
+      message._names.push_back(threads[_threads_reported]->_name);
       _threads_reported++;
     }
 

+ 68 - 10
panda/src/pstatclient/pStatCollector.I

@@ -27,7 +27,11 @@
 //               create your own Collector.
 ////////////////////////////////////////////////////////////////////
 INLINE PStatCollector::
-PStatCollector() {
+PStatCollector() : 
+  _client(NULL),
+  _index(0),
+  _level(0.0f)
+{
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -40,7 +44,8 @@ PStatCollector() {
 INLINE PStatCollector::
 PStatCollector(PStatClient *client, int index) :
   _client(client),
-  _index(index)
+  _index(index),
+  _level(0.0f)
 {
 }
 
@@ -65,7 +70,9 @@ PStatCollector(PStatClient *client, int index) :
 //               otherwise, the global client is used.
 ////////////////////////////////////////////////////////////////////
 INLINE PStatCollector::
-PStatCollector(const string &name, PStatClient *client) {
+PStatCollector(const string &name, PStatClient *client) :
+  _level(0.0f)
+{
   if (client == (PStatClient *)NULL) {
     client = PStatClient::get_global_pstats();
   }
@@ -96,7 +103,9 @@ PStatCollector(const string &name, PStatClient *client) {
 //               as its parent.
 ////////////////////////////////////////////////////////////////////
 INLINE PStatCollector::
-PStatCollector(const PStatCollector &parent, const string &name) {
+PStatCollector(const PStatCollector &parent, const string &name) :
+  _level(0.0f)
+{
   nassertv(parent._client != (PStatClient *)NULL);
   (*this) =
     parent._client->make_collector_with_relname(parent._index, name);
@@ -110,7 +119,8 @@ PStatCollector(const PStatCollector &parent, const string &name) {
 INLINE PStatCollector::
 PStatCollector(const PStatCollector &copy) :
   _client(copy._client),
-  _index(copy._index)
+  _index(copy._index),
+  _level(0.0f)
 {
 }
 
@@ -192,11 +202,12 @@ stop() {
 //  Description: Removes the level setting associated with this
 //               collector for the main thread.  The collector
 //               will no longer show up on any level graphs in the
-//               main thread.
+//               main thread.  This implicitly calls flush_level().
 ////////////////////////////////////////////////////////////////////
 INLINE void PStatCollector::
 clear_level() {
   _client->clear_level(_index, 0);
+  _level = 0.0f;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -204,11 +215,12 @@ clear_level() {
 //       Access: Published
 //  Description: Sets the level setting associated with this
 //               collector for the main thread to the indicated
-//               value.
+//               value.  This implicitly calls flush_level().
 ////////////////////////////////////////////////////////////////////
 INLINE void PStatCollector::
 set_level(float level) {
   _client->set_level(_index, 0, level);
+  _level = 0.0f;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -219,10 +231,14 @@ set_level(float level) {
 //               for the main thread.  If the collector did not
 //               already have a level setting for the main thread, it
 //               is initialized to 0.
+//
+//               As an optimization, the data is not immediately set
+//               to the PStatClient.  It will be sent the next time
+//               flush_level() is called.
 ////////////////////////////////////////////////////////////////////
 INLINE void PStatCollector::
 add_level(float increment) {
-  _client->add_level(_index, 0, increment);
+  _level += increment;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -233,20 +249,62 @@ add_level(float increment) {
 //               collector for the main thread.  If the collector did
 //               not already have a level setting for the main thread,
 //               it is initialized to 0.
+//
+//               As an optimization, the data is not immediately set
+//               to the PStatClient.  It will be sent the next time
+//               flush_level() is called.
 ////////////////////////////////////////////////////////////////////
 INLINE void PStatCollector::
 sub_level(float decrement) {
-  _client->add_level(_index, 0, -decrement);
+  _level -= decrement;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatCollector::add_level_now
+//       Access: Published
+//  Description: Calls add_level() and immediately calls flush_level().
+////////////////////////////////////////////////////////////////////
+INLINE void PStatCollector::
+add_level_now(float increment) {
+  add_level(increment);
+  flush_level();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatCollector::sub_level_now
+//       Access: Published
+//  Description: Calls sub_level() and immediately calls flush_level().
+////////////////////////////////////////////////////////////////////
+INLINE void PStatCollector::
+sub_level_now(float decrement) {
+  sub_level(decrement);
+  flush_level();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatCollector::flush_level
+//       Access: Published
+//  Description: Updates the PStatClient with the recent results from
+//               add_level() and sub_level().
+////////////////////////////////////////////////////////////////////
+INLINE void PStatCollector::
+flush_level() {
+  if (_level != 0.0f) {
+    _client->add_level(_index, 0, _level);
+    _level = 0.0f;
+  }
 }
 
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatCollector::get_level
 //       Access: Published
 //  Description: Returns the current level value of the given
-//               collector in the main thread.
+//               collector in the main thread.  This implicitly calls
+//               flush_level().
 ////////////////////////////////////////////////////////////////////
 INLINE float PStatCollector::
 get_level() {
+  flush_level();
   return _client->get_level(_index, 0);
 }
 

+ 4 - 0
panda/src/pstatclient/pStatCollector.h

@@ -73,6 +73,9 @@ PUBLISHED:
   INLINE void set_level(float level);
   INLINE void add_level(float increment);
   INLINE void sub_level(float decrement);
+  INLINE void add_level_now(float increment);
+  INLINE void sub_level_now(float decrement);
+  INLINE void flush_level();
   INLINE float get_level();
 
   INLINE void clear_thread_level();
@@ -99,6 +102,7 @@ PUBLISHED:
 private:
   PStatClient *_client;
   int _index;
+  float _level;
 
 friend class PStatClient;
 

+ 4 - 3
panda/src/pstatclient/pStatProperties.cxx

@@ -107,10 +107,11 @@ struct LevelCollectorProperties {
 };
 
 static TimeCollectorProperties time_properties[] = {
+  { 1, "Frame",                            { 0.95, 1.0, 0.35 } },
   { 1, "Wait",                             { 0.6, 0.6, 0.6 } },
   { 0, "Wait:Mutex block",                 { 0.5, 0.0, 1.0 } },
   { 1, "Wait:Thread sync",                 { 0.0, 1.0, 0.5 } },
-  { 1, "App",                              { 0.8, 0.0, 0.4 },  1.0 / 30.0 },
+  { 1, "App",                              { 0.0, 0.4, 0.8 },  1.0 / 30.0 },
   { 1, "App:Collisions",                   { 1.0, 0.5, 0.0 } },
   { 1, "App:Collisions:Reset",             { 0.0, 0.0, 0.5 } },
   { 0, "App:Data graph",                   { 0.5, 0.8, 0.4 } },
@@ -122,7 +123,7 @@ static TimeCollectorProperties time_properties[] = {
   { 0, "App:Show code:Nametags:3d",        { 1.0, 0.0, 0.0 } },
   { 0, "App:Show code:Nametags:3d:Contents", { 0.0, 0.5, 0.0 } },
   { 0, "App:Show code:Nametags:3d:Adjust",   { 0.5, 0.0, 0.5 } },
-  { 1, "Cull",                             { 0.0, 1.0, 0.0 },  1.0 / 30.0 },
+  { 1, "Cull",                             { 0.21, 0.68, 0.37 },  1.0 / 30.0 },
   { 1, "Cull:Setup",                       { 0.7, 0.4, 0.5 } },
   { 1, "Cull:Sort",                        { 0.3, 0.3, 0.6 } },
   { 1, "*:Show fps",                       { 0.5, 0.8, 1.0 } },
@@ -135,7 +136,7 @@ static TimeCollectorProperties time_properties[] = {
   { 1, "*:Animation",                      { 1.0, 0.0, 1.0 } },
   { 0, "*:Flatten",                        { 0.0, 0.7, 0.4 } },
   { 0, "*:State Cache",                    { 0.4, 0.7, 0.7 } },
-  { 1, "Draw",                             { 1.0, 0.0, 0.0 },  1.0 / 30.0 },
+  { 1, "Draw",                             { 0.83, 0.02, 0.01 },  1.0 / 30.0 },
   { 1, "Draw:Make current",                { 0.4, 0.2, 0.6 } },
   { 1, "Draw:Copy texture",                { 0.2, 0.6, 0.4 } },
   { 1, "Draw:Clear",                       { 0.0, 0.8, 0.6 } },