Browse Source

better performance analysis

David Rose 18 years ago
parent
commit
1287bcf8fa

+ 3 - 0
panda/src/pgraph/Sources.pp

@@ -23,6 +23,7 @@
     auxSceneData.I auxSceneData.h \
     bamFile.I bamFile.h \
     billboardEffect.I billboardEffect.h \
+    cacheStats.I cacheStats.h \
     camera.I camera.h \
     clipPlaneAttrib.I clipPlaneAttrib.h \
     colorAttrib.I colorAttrib.h \
@@ -134,6 +135,7 @@
     auxSceneData.cxx \
     bamFile.cxx \
     billboardEffect.cxx \
+    cacheStats.cxx \
     camera.cxx \
     clipPlaneAttrib.cxx \
     colorAttrib.cxx \
@@ -240,6 +242,7 @@
     auxSceneData.I auxSceneData.h \
     bamFile.I bamFile.h \
     billboardEffect.I billboardEffect.h \
+    cacheStats.I cacheStats.h \
     camera.I camera.h \
     clipPlaneAttrib.I clipPlaneAttrib.h \
     colorAttrib.I colorAttrib.h \

+ 119 - 0
panda/src/pgraph/cacheStats.I

@@ -0,0 +1,119 @@
+// Filename: cacheStats.I
+// Created by:  drose (24Jul07)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::maybe_report
+//       Access: Public
+//  Description: Outputs a report if enough time has elapsed.
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+maybe_report(const char *name) {
+#ifndef NDEBUG
+  if (_cache_report) {
+    double now = ClockObject::get_global_clock()->get_real_time();
+    if (now - _last_reset < _cache_report_interval) {
+      return;
+    }
+    write(Notify::out(), name);
+    reset(now);
+  }
+#endif  // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::inc_hits
+//       Access: Public
+//  Description: Increments by 1 the count of cache hits.
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+inc_hits() {
+#ifndef NDEBUG
+  ++_cache_hits;
+#endif // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::inc_misses
+//       Access: Public
+//  Description: Increments by 1 the count of cache misses.
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+inc_misses() {
+#ifndef NDEBUG
+  ++_cache_misses;
+#endif // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::inc_adds
+//       Access: Public
+//  Description: Increments by 1 the count of elements added to the
+//               cache.  If is_new is true, the element was added to a
+//               previously empty hashtable.
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+inc_adds(bool is_new) {
+#ifndef NDEBUG
+  if (is_new) {
+    ++_cache_new_adds;
+  }
+  ++_cache_adds;
+#endif // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::inc_dels
+//       Access: Public
+//  Description: Increments by 1 the count of elements removed from
+//               the cache.
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+inc_dels() {
+#ifndef NDEBUG
+  ++_cache_dels;
+#endif // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::add_total_size
+//       Access: Public
+//  Description: Adds the indicated count (positive or negative) to
+//               the total number of entries for the cache
+//               (net occupied size of all the hashtables).
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+add_total_size(int count) {
+#ifndef NDEBUG
+  _total_cache_size += count;
+#endif  // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::add_num_states
+//       Access: Public
+//  Description: Adds the indicated count (positive or negative) to
+//               the total count of individual RenderState or
+//               TransformState objects.
+////////////////////////////////////////////////////////////////////
+INLINE void CacheStats::
+add_num_states(int count) {
+#ifndef NDEBUG
+  _num_states += count;
+#endif  // NDEBUG
+}

+ 74 - 0
panda/src/pgraph/cacheStats.cxx

@@ -0,0 +1,74 @@
+// Filename: cacheStats.cxx
+// Created by:  drose (24Jul07)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+#include "cacheStats.h"
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::init
+//       Access: Public
+//  Description: Initializes the CacheStats for the first time.  We
+//               don't use the constructor for this, since we can't
+//               guarantee ordering of static constructors.
+////////////////////////////////////////////////////////////////////
+void CacheStats::
+init() {
+#ifndef NDEBUG
+  reset(ClockObject::get_global_clock()->get_real_time());
+  _total_cache_size = 0;
+  _num_states = 0;
+
+  _cache_report = ConfigVariableBool("cache-report", false);
+  _cache_report_interval = ConfigVariableDouble("cache-report-interval", 5.0);
+#endif  // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::reset
+//       Access: Public
+//  Description: Reinitializes just those parts of the CacheStats that
+//               should be reset between each reporting interval.
+////////////////////////////////////////////////////////////////////
+void CacheStats::
+reset(double now) {
+#ifndef NDEBUG
+  _cache_hits = 0;
+  _cache_misses = 0;
+  _cache_adds = 0;
+  _cache_new_adds = 0;
+  _cache_dels = 0;
+  _last_reset = now;
+#endif  // NDEBUG
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: CacheStats::write
+//       Access: Public
+//  Description: 
+////////////////////////////////////////////////////////////////////
+void CacheStats::
+write(ostream &out, const char *name) const {
+#ifndef NDEBUG
+  out << name << " cache: " << _cache_hits << " hits, " 
+      << _cache_misses << " misses\n"
+      << _cache_adds + _cache_new_adds << "(" << _cache_new_adds << ") adds(new), "
+      << _cache_dels << " dels, "
+      << _total_cache_size << " / " << _num_states << " = "
+      << (double)_total_cache_size / (double)_num_states 
+      << " average cache size\n";
+#endif  // NDEBUG
+}

+ 64 - 0
panda/src/pgraph/cacheStats.h

@@ -0,0 +1,64 @@
+// Filename: cacheStats.h
+// Created by:  drose (24Jul07)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef CACHESTATS_H
+#define CACHESTATS_H
+
+#include "pandabase.h"
+#include "clockObject.h"
+#include "pnotify.h"
+
+////////////////////////////////////////////////////////////////////
+//       Class : CacheStats
+// Description : This is used to track the utilization of the
+//               TransformState and RenderState caches, for low-level
+//               performance tuning information.
+////////////////////////////////////////////////////////////////////
+class EXPCL_PANDA_PGRAPH CacheStats {
+public:
+  void init();
+  void reset(double now);
+  void write(ostream &out, const char *name) const;
+  INLINE void maybe_report(const char *name);
+
+  INLINE void inc_hits();
+  INLINE void inc_misses();
+  INLINE void inc_adds(bool is_new);
+  INLINE void inc_dels();
+  INLINE void add_total_size(int count);
+  INLINE void add_num_states(int count);
+
+private:
+#ifndef NDEBUG
+  int _cache_hits;
+  int _cache_misses;
+  int _cache_adds;
+  int _cache_new_adds;
+  int _cache_dels;
+  int _total_cache_size;
+  int _num_states;
+  double _last_reset;
+
+  bool _cache_report;
+  double _cache_report_interval;
+#endif  // NDEBUG
+};
+
+#include "cacheStats.I"
+
+#endif

+ 1 - 0
panda/src/pgraph/pgraph_composite1.cxx

@@ -7,6 +7,7 @@
 #include "attribSlots.cxx"
 #include "bamFile.cxx"
 #include "billboardEffect.cxx"
+#include "cacheStats.cxx"
 #include "camera.cxx"
 #include "clipPlaneAttrib.cxx"
 #include "colorAttrib.cxx"

+ 147 - 95
panda/src/pgraph/renderState.cxx

@@ -51,6 +51,8 @@ PStatCollector RenderState::_state_invert_pcollector("*:State Cache:Invert State
 PStatCollector RenderState::_node_counter("RenderStates:On nodes");
 PStatCollector RenderState::_cache_counter("RenderStates:Cached");
 
+CacheStats RenderState::_cache_stats;
+
 TypeHandle RenderState::_type_handle;
 
 
@@ -69,6 +71,7 @@ RenderState() : _lock("RenderState") {
   _saved_entry = _states->end();
   _flags = 0;
   _last_mi = _mungers.end();
+  _cache_stats.add_num_states(1);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -107,11 +110,12 @@ RenderState::
 
   // unref() should have cleared these.
   nassertv(_saved_entry == _states->end());
-  nassertv(_composition_cache.empty() && _invert_composition_cache.empty());
+  nassertv(_composition_cache.is_empty() && _invert_composition_cache.is_empty());
 
   // If this was true at the beginning of the destructor, but is no
   // longer true now, probably we've been double-deleted.
   nassertv(get_ref_count() == 0);
+  _cache_stats.add_num_states(-1);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -346,15 +350,15 @@ compose(const RenderState *other) const {
   ReMutexHolder holder(*_states_lock);
 
   // Is this composition already cached?
-  CompositionCache::const_iterator ci = _composition_cache.find(other);
-  if (ci != _composition_cache.end()) {
-    const Composition &comp = (*ci).second;
+  int index = _composition_cache.find(other);
+  if (index != -1) {
+    Composition &comp = ((RenderState *)this)->_composition_cache.modify_data(index);
     if (comp._result == (const RenderState *)NULL) {
       // Well, it wasn't cached already, but we already had an entry
       // (probably created for the reverse direction), so use the same
       // entry to store the new result.
       CPT(RenderState) result = do_compose(other);
-      ((Composition &)comp)._result = result;
+      comp._result = result;
 
       if (result != (const RenderState *)this) {
         // See the comments below about the need to up the reference
@@ -363,8 +367,10 @@ compose(const RenderState *other) const {
       }
     }
     // Here's the cache!
+    _cache_stats.inc_hits();
     return comp._result;
   }
+  _cache_stats.inc_misses();
 
   // We need to make a new cache entry, both in this object and in the
   // other object.  We make both records so the other RenderState
@@ -375,10 +381,17 @@ compose(const RenderState *other) const {
   // result; the other will be NULL for now.
   CPT(RenderState) result = do_compose(other);
 
-  // Order is important here, in case other == this.
-  ((RenderState *)other)->_composition_cache[this]._result = NULL;
+  _cache_stats.add_total_size(1);
+  _cache_stats.inc_adds(_composition_cache.get_size() == 0);
+
   ((RenderState *)this)->_composition_cache[other]._result = result;
 
+  if (other != this) {
+    _cache_stats.add_total_size(1);
+    _cache_stats.inc_adds(other->_composition_cache.get_size() == 0);
+    ((RenderState *)other)->_composition_cache[this]._result = NULL;
+  }
+
   if (result != (const RenderState *)this) {
     // If the result of compose() is something other than this,
     // explicitly increment the reference count.  We have to be sure
@@ -391,6 +404,8 @@ compose(const RenderState *other) const {
     // that would be a self-referential leak.)
   }
 
+  _cache_stats.maybe_report("RenderState");
+
   return result;
 }
 
@@ -432,15 +447,15 @@ invert_compose(const RenderState *other) const {
   ReMutexHolder holder(*_states_lock);
 
   // Is this composition already cached?
-  CompositionCache::const_iterator ci = _invert_composition_cache.find(other);
-  if (ci != _invert_composition_cache.end()) {
-    const Composition &comp = (*ci).second;
+  int index = _invert_composition_cache.find(other);
+  if (index != -1) {
+    Composition &comp = ((RenderState *)this)->_invert_composition_cache.modify_data(index);
     if (comp._result == (const RenderState *)NULL) {
       // Well, it wasn't cached already, but we already had an entry
       // (probably created for the reverse direction), so use the same
       // entry to store the new result.
       CPT(RenderState) result = do_invert_compose(other);
-      ((Composition &)comp)._result = result;
+      comp._result = result;
 
       if (result != (const RenderState *)this) {
         // See the comments below about the need to up the reference
@@ -449,8 +464,10 @@ invert_compose(const RenderState *other) const {
       }
     }
     // Here's the cache!
+    _cache_stats.inc_hits();
     return comp._result;
   }
+  _cache_stats.inc_misses();
 
   // We need to make a new cache entry, both in this object and in the
   // other object.  We make both records so the other RenderState
@@ -461,9 +478,16 @@ invert_compose(const RenderState *other) const {
   // result; the other will be NULL for now.
   CPT(RenderState) result = do_invert_compose(other);
 
-  ((RenderState *)other)->_invert_composition_cache[this]._result = NULL;
+  _cache_stats.add_total_size(1);
+  _cache_stats.inc_adds(_invert_composition_cache.get_size() == 0);
   ((RenderState *)this)->_invert_composition_cache[other]._result = result;
 
+  if (other != this) {
+    _cache_stats.add_total_size(1);
+    _cache_stats.inc_adds(other->_invert_composition_cache.get_size() == 0);
+    ((RenderState *)other)->_invert_composition_cache[this]._result = NULL;
+  }
+
   if (result != (const RenderState *)this) {
     // If the result of compose() is something other than this,
     // explicitly increment the reference count.  We have to be sure
@@ -700,6 +724,8 @@ write(ostream &out, int indent_level) const {
     const Attribute &attribute = (*ai);
     attribute._attrib->write(out, indent_level + 2);
   }
+  indent(out, indent_level + 2) << _composition_cache << "\n";
+  indent(out, indent_level + 2) << _invert_composition_cache << "\n";
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -767,32 +793,34 @@ get_num_unused_states() {
   for (si = _states->begin(); si != _states->end(); ++si) {
     const RenderState *state = (*si);
 
-    CompositionCache::const_iterator ci;
-    for (ci = state->_composition_cache.begin();
-         ci != state->_composition_cache.end();
-         ++ci) {
-      const RenderState *result = (*ci).second._result;
-      if (result != (const RenderState *)NULL && result != state) {
-        // Here's a RenderState that's recorded in the cache.
-        // Count it.
-        pair<StateCount::iterator, bool> ir =
-          state_count.insert(StateCount::value_type(result, 1));
-        if (!ir.second) {
-          // If the above insert operation fails, then it's already in
-          // the cache; increment its value.
-          (*(ir.first)).second++;
+    int i;
+    int cache_size = state->_composition_cache.get_size();
+    for (i = 0; i < cache_size; ++i) {
+      if (state->_composition_cache.has_element(i)) {
+        const RenderState *result = state->_composition_cache.get_data(i)._result;
+        if (result != (const RenderState *)NULL && result != state) {
+          // Here's a RenderState that's recorded in the cache.
+          // Count it.
+          pair<StateCount::iterator, bool> ir =
+            state_count.insert(StateCount::value_type(result, 1));
+          if (!ir.second) {
+            // If the above insert operation fails, then it's already in
+            // the cache; increment its value.
+            (*(ir.first)).second++;
+          }
         }
       }
     }
-    for (ci = state->_invert_composition_cache.begin();
-         ci != state->_invert_composition_cache.end();
-         ++ci) {
-      const RenderState *result = (*ci).second._result;
-      if (result != (const RenderState *)NULL && result != state) {
-        pair<StateCount::iterator, bool> ir =
-          state_count.insert(StateCount::value_type(result, 1));
-        if (!ir.second) {
-          (*(ir.first)).second++;
+    cache_size = state->_invert_composition_cache.get_size();
+    for (i = 0; i < cache_size; ++i) {
+      if (state->_invert_composition_cache.has_element(i)) {
+        const RenderState *result = state->_invert_composition_cache.get_data(i)._result;
+        if (result != (const RenderState *)NULL && result != state) {
+          pair<StateCount::iterator, bool> ir =
+            state_count.insert(StateCount::value_type(result, 1));
+          if (!ir.second) {
+            (*(ir.first)).second++;
+          }
         }
       }
     }
@@ -873,27 +901,31 @@ clear_cache() {
     for (ti = temp_states.begin(); ti != temp_states.end(); ++ti) {
       RenderState *state = (RenderState *)(*ti).p();
 
-      CompositionCache::const_iterator ci;
-      for (ci = state->_composition_cache.begin();
-           ci != state->_composition_cache.end();
-           ++ci) {
-        const RenderState *result = (*ci).second._result;
-        if (result != (const RenderState *)NULL && result != state) {
-          result->cache_unref();
-          nassertr(result->get_ref_count() > 0, 0);
+      int i;
+      int cache_size = state->_composition_cache.get_size();
+      for (i = 0; i < cache_size; ++i) {
+        if (state->_composition_cache.has_element(i)) {
+          const RenderState *result = state->_composition_cache.get_data(i)._result;
+          if (result != (const RenderState *)NULL && result != state) {
+            result->cache_unref();
+            nassertr(result->get_ref_count() > 0, 0);
+          }
         }
       }
+      _cache_stats.add_total_size(-state->_composition_cache.get_num_entries());
       state->_composition_cache.clear();
 
-      for (ci = state->_invert_composition_cache.begin();
-           ci != state->_invert_composition_cache.end();
-           ++ci) {
-        const RenderState *result = (*ci).second._result;
-        if (result != (const RenderState *)NULL && result != state) {
-          result->cache_unref();
-          nassertr(result->get_ref_count() > 0, 0);
+      cache_size = state->_invert_composition_cache.get_size();
+      for (i = 0; i < cache_size; ++i) {
+        if (state->_invert_composition_cache.has_element(i)) {
+          const RenderState *result = state->_invert_composition_cache.get_data(i)._result;
+          if (result != (const RenderState *)NULL && result != state) {
+            result->cache_unref();
+            nassertr(result->get_ref_count() > 0, 0);
+          }
         }
       }
+      _cache_stats.add_total_size(-state->_invert_composition_cache.get_num_entries());
       state->_invert_composition_cache.clear();
     }
 
@@ -1338,37 +1370,41 @@ r_detect_cycles(const RenderState *start_state,
   }
   ((RenderState *)current_state)->_cycle_detect = this_seq;
     
-  CompositionCache::const_iterator ci;
-  for (ci = current_state->_composition_cache.begin();
-       ci != current_state->_composition_cache.end();
-       ++ci) {
-    const RenderState *result = (*ci).second._result;
-    if (result != (const RenderState *)NULL) {
-      if (r_detect_cycles(start_state, result, length + 1, 
-                          this_seq, cycle_desc)) {
-        // Cycle detected.
-        if (cycle_desc != (CompositionCycleDesc *)NULL) {
-          CompositionCycleDescEntry entry((*ci).first, result, false);
-          cycle_desc->push_back(entry);
+  int i;
+  int cache_size = current_state->_composition_cache.get_size();
+  for (i = 0; i < cache_size; ++i) {
+    if (current_state->_composition_cache.has_element(i)) {
+      const RenderState *result = current_state->_composition_cache.get_data(i)._result;
+      if (result != (const RenderState *)NULL) {
+        if (r_detect_cycles(start_state, result, length + 1, 
+                            this_seq, cycle_desc)) {
+          // Cycle detected.
+          if (cycle_desc != (CompositionCycleDesc *)NULL) {
+            const RenderState *other = current_state->_composition_cache.get_key(i);
+            CompositionCycleDescEntry entry(other, result, false);
+            cycle_desc->push_back(entry);
+          }
+          return true;
         }
-        return true;
       }
     }
   }
 
-  for (ci = current_state->_invert_composition_cache.begin();
-       ci != current_state->_invert_composition_cache.end();
-       ++ci) {
-    const RenderState *result = (*ci).second._result;
-    if (result != (const RenderState *)NULL) {
-      if (r_detect_cycles(start_state, result, length + 1,
-                          this_seq, cycle_desc)) {
-        // Cycle detected.
-        if (cycle_desc != (CompositionCycleDesc *)NULL) {
-          CompositionCycleDescEntry entry((*ci).first, result, true);
-          cycle_desc->push_back(entry);
+  cache_size = current_state->_invert_composition_cache.get_size();
+  for (i = 0; i < cache_size; ++i) {
+    if (current_state->_invert_composition_cache.has_element(i)) {
+      const RenderState *result = current_state->_invert_composition_cache.get_data(i)._result;
+      if (result != (const RenderState *)NULL) {
+        if (r_detect_cycles(start_state, result, length + 1,
+                            this_seq, cycle_desc)) {
+          // Cycle detected.
+          if (cycle_desc != (CompositionCycleDesc *)NULL) {
+            const RenderState *other = current_state->_invert_composition_cache.get_key(i);
+            CompositionCycleDescEntry entry(other, result, true);
+            cycle_desc->push_back(entry);
+          }
+          return true;
         }
-        return true;
       }
     }
   }
@@ -1428,7 +1464,7 @@ remove_cache_pointers() {
   // it.
 
 #ifdef DO_PSTATS
-  if (_composition_cache.empty() && _invert_composition_cache.empty()) {
+  if (_composition_cache.is_empty() && _invert_composition_cache.is_empty()) {
     return;
   }
   PStatTimer timer(_cache_update_pcollector);
@@ -1436,8 +1472,12 @@ remove_cache_pointers() {
 
   // There are lots of ways to do this loop wrong.  Be very careful if
   // you need to modify it for any reason.
-  while (!_composition_cache.empty()) {
-    CompositionCache::iterator ci = _composition_cache.begin();
+  int i = 0;
+  while (!_composition_cache.is_empty()) {
+    // Scan for the next used slot in the table.
+    while (!_composition_cache.has_element(i)) {
+      ++i;
+    }
 
     // It is possible that the "other" RenderState object is
     // currently within its own destructor.  We therefore can't use a
@@ -1446,29 +1486,33 @@ remove_cache_pointers() {
     // reference count to ensure it doesn't destruct while we process
     // this loop; as long as we ensure that no *other* RenderState
     // objects destruct, there will be no reason for that one to.
-    RenderState *other = (RenderState *)(*ci).first;
+    RenderState *other = (RenderState *)_composition_cache.get_key(i);
 
     // We hold a copy of the composition result so we can dereference
     // it later.
-    Composition comp = (*ci).second;
+    Composition comp = _composition_cache.get_data(i);
 
     // Now we can remove the element from our cache.  We do this now,
     // rather than later, before any other RenderState objects have
     // had a chance to destruct, so we are confident that our iterator
     // is still valid.
-    _composition_cache.erase(ci);
+    _composition_cache.remove_element(i);
+    _cache_stats.add_total_size(-1);
+    _cache_stats.inc_dels();
 
     if (other != this) {
-      CompositionCache::iterator oci = other->_composition_cache.find(this);
+      int oi = other->_composition_cache.find(this);
 
       // We may or may not still be listed in the other's cache (it
       // might be halfway through pulling entries out, from within its
       // own destructor).
-      if (oci != other->_composition_cache.end()) {
+      if (oi != -1) {
         // Hold a copy of the other composition result, too.
-        Composition ocomp = (*oci).second;
+        Composition ocomp = other->_composition_cache.get_data(oi);
         
-        other->_composition_cache.erase(oci);
+        other->_composition_cache.remove_element(oi);
+        _cache_stats.add_total_size(-1);
+        _cache_stats.inc_dels();
         
         // It's finally safe to let our held pointers go away.  This may
         // have cascading effects as other RenderState objects are
@@ -1488,18 +1532,25 @@ remove_cache_pointers() {
   }
 
   // A similar bit of code for the invert cache.
-  while (!_invert_composition_cache.empty()) {
-    CompositionCache::iterator ci = _invert_composition_cache.begin();
-    RenderState *other = (RenderState *)(*ci).first;
+  i = 0;
+  while (!_invert_composition_cache.is_empty()) {
+    while (!_invert_composition_cache.has_element(i)) {
+      ++i;
+    }
+
+    RenderState *other = (RenderState *)_invert_composition_cache.get_key(i);
     nassertv(other != this);
-    Composition comp = (*ci).second;
-    _invert_composition_cache.erase(ci);
+    Composition comp = _invert_composition_cache.get_data(i);
+    _invert_composition_cache.remove_element(i);
+    _cache_stats.add_total_size(-1);
+    _cache_stats.inc_dels();
     if (other != this) {
-      CompositionCache::iterator oci = 
-        other->_invert_composition_cache.find(this);
-      if (oci != other->_invert_composition_cache.end()) {
-        Composition ocomp = (*oci).second;
-        other->_invert_composition_cache.erase(oci);
+      int oi = other->_invert_composition_cache.find(this);
+      if (oi != -1) {
+        Composition ocomp = other->_invert_composition_cache.get_data(oi);
+        other->_invert_composition_cache.remove_element(oi);
+        _cache_stats.add_total_size(-1);
+        _cache_stats.inc_dels();
         if (ocomp._result != (const RenderState *)NULL && ocomp._result != other) {
           cache_unref_delete(ocomp._result);
         }
@@ -1894,6 +1945,7 @@ init_states() {
   // called at static init time, presumably when there is still only
   // one thread in the world.
   _states_lock = new ReMutex("RenderState::_states_lock");
+  _cache_stats.init();
   nassertv(Thread::get_current_thread() == Thread::get_main_thread());
 }
 

+ 5 - 1
panda/src/pgraph/renderState.h

@@ -35,6 +35,8 @@
 #include "reMutex.h"
 #include "pmutex.h"
 #include "deletedChain.h"
+#include "simpleHashMap.h"
+#include "cacheStats.h"
 
 class GraphicsStateGuardianBase;
 class FogAttrib;
@@ -248,7 +250,7 @@ private:
   // is not reference counted within this map; instead we store a
   // companion pointer in the other object, and remove the references
   // explicitly when either object destructs.
-  typedef phash_map<const RenderState *, Composition, pointer_hash> CompositionCache;
+  typedef SimpleHashMap<const RenderState *, Composition, pointer_hash> CompositionCache;
   CompositionCache _composition_cache;
   CompositionCache _invert_composition_cache;
 
@@ -341,6 +343,8 @@ private:
   // This mutex protects _flags, and all of the above computed values.
   Mutex _lock;
 
+  static CacheStats _cache_stats;
+
 public:
   static void register_with_read_factory();
   virtual void write_datagram(BamWriter *manager, Datagram &dg);

+ 50 - 5
panda/src/pgraph/sceneGraphAnalyzer.cxx

@@ -79,6 +79,7 @@ clear() {
   _num_geoms = 0;
   _num_geom_vertex_datas = 0;
   _vertex_data_size = 0;
+  _prim_data_size = 0;
 
   _num_vertices = 0;
   _num_normals = 0;
@@ -152,7 +153,11 @@ write(ostream &out, int indent_level) const {
   }
 
   indent(out, indent_level)
-    << "vertices occupy " << (_vertex_data_size + 1023) / 1024 
+    << "GeomVertexData arrays occupy " << (_vertex_data_size + 1023) / 1024 
+    << "K memory.\n";
+
+  indent(out, indent_level)
+    << "GeomPrimitive arrays occupy " << (_prim_data_size + 1023) / 1024 
     << "K memory.\n";
 
   int unreferenced_vertices = 0;
@@ -189,7 +194,25 @@ write(ostream &out, int indent_level) const {
     indent(out, indent_level)
       << _vadatas.size() - _unique_vadatas.size()
       << " GeomVertexArrayDatas are redundant, wasting " 
-      << (wasted_bytes + 512) / 1024 << "K.\n";
+      << (wasted_bytes + 1023) / 1024 << "K.\n";
+  }
+  if (_unique_prim_vadatas.size() != _prim_vadatas.size()) {
+    int wasted_bytes = 0;
+
+    UniqueVADatas::const_iterator uvai;
+    for (uvai = _unique_prim_vadatas.begin();
+         uvai != _unique_prim_vadatas.end();
+         ++uvai) {
+      const GeomVertexArrayData *gvad = (*uvai).first;
+      int dup_count = (*uvai).second;
+      if (dup_count > 1) {
+        wasted_bytes += (dup_count - 1) * gvad->get_data_size_bytes();
+      }
+    }
+    indent(out, indent_level)
+      << _prim_vadatas.size() - _unique_prim_vadatas.size()
+      << " GeomPrimitive arrays are redundant, wasting " 
+      << (wasted_bytes + 1023) / 1024 << "K.\n";
   }
 
   indent(out, indent_level)
@@ -220,8 +243,10 @@ write(ostream &out, int indent_level) const {
     << _num_individual_tris
     << " of these are independent triangles.\n";
 
-  indent(out, indent_level)
-    << _num_lines << " lines, " << _num_points << " points.\n";
+  if (_num_lines != 0 || _num_points != 0) {
+    indent(out, indent_level)
+      << _num_lines << " lines, " << _num_points << " points.\n";
+  }
 
   indent(out, indent_level)
     << _textures.size() << " textures, estimated minimum "
@@ -384,7 +409,7 @@ collect_statistics(const Geom *geom) {
     }
 
     if (prim->is_indexed()) {
-      collect_statistics(prim->get_vertices());
+      collect_prim_statistics(prim->get_vertices());
       if (prim->is_composite()) {
         collect_statistics(prim->get_mins());
         collect_statistics(prim->get_maxs());
@@ -472,3 +497,23 @@ collect_statistics(const GeomVertexArrayData *vadata) {
     ++dup_count;
   }
 }
+
+////////////////////////////////////////////////////////////////////
+//     Function: SceneGraphAnalyzer::collect_prim_statistics
+//       Access: Private
+//  Description: Recursively visits each node, counting up the
+//               statistics.  This one records the vertex index array
+//               associated with a GeomPrimitive, as opposed to the
+//               vertex data array, component of a GeomVertexData.
+////////////////////////////////////////////////////////////////////
+void SceneGraphAnalyzer::
+collect_prim_statistics(const GeomVertexArrayData *vadata) {
+  nassertv(vadata != NULL);
+  bool inserted = _prim_vadatas.insert(vadata).second;
+  if (inserted) {
+    // This is the first time we've encountered this vertex array.
+    _prim_data_size += vadata->get_data_size_bytes();
+    int &dup_count = (*(_unique_prim_vadatas.insert(UniqueVADatas::value_type(vadata, 0)).first)).second;
+    ++dup_count;
+  }
+}

+ 4 - 0
panda/src/pgraph/sceneGraphAnalyzer.h

@@ -95,6 +95,7 @@ private:
   void collect_statistics(const Geom *geom);
   void collect_statistics(Texture *texture);
   void collect_statistics(const GeomVertexArrayData *vadata);
+  void collect_prim_statistics(const GeomVertexArrayData *vadata);
 
   class VDataTracker {
   public:
@@ -113,8 +114,10 @@ private:
   Nodes _nodes;
   VDatas _vdatas;
   VADatas _vadatas;
+  VADatas _prim_vadatas;
   UniqueVDatas _unique_vdatas;
   UniqueVADatas _unique_vadatas;
+  UniqueVADatas _unique_prim_vadatas;
   Textures _textures;
 
 private:
@@ -127,6 +130,7 @@ private:
   int _num_geoms;
   int _num_geom_vertex_datas;
   size_t _vertex_data_size;
+  size_t _prim_data_size;
 
   int _num_vertices;
   int _num_normals;

+ 27 - 114
panda/src/pgraph/transformState.cxx

@@ -28,7 +28,6 @@
 #include "reMutexHolder.h"
 #include "mutexHolder.h"
 #include "thread.h"
-#include "clockObject.h"
 
 ReMutex *TransformState::_states_lock = NULL;
 TransformState::States *TransformState::_states = NULL;
@@ -45,75 +44,9 @@ PStatCollector TransformState::_transform_hash_pcollector("*:State Cache:Calc Ha
 PStatCollector TransformState::_node_counter("TransformStates:On nodes");
 PStatCollector TransformState::_cache_counter("TransformStates:Cached");
 
-TypeHandle TransformState::_type_handle;
-
-class CacheStats {
-public:
-  void init();
-  void reset(double now);
-  void write(ostream &out) const;
-  void maybe_report();
-
-  int _cache_hits;
-  int _cache_semi_hits;
-  int _cache_misses;
-  int _cache_adds;
-  int _cache_new_adds;
-  int _cache_dels;
-  int _total_cache_size;
-  int _num_states;
-  double _last_reset;
-
-  bool _cache_report;
-  double _cache_report_interval;
-};
-static CacheStats _cache_stats;
-
-void CacheStats::
-init() {
-  reset(ClockObject::get_global_clock()->get_real_time());
-  _total_cache_size = 0;
-  _num_states = 0;
-
-  _cache_report = ConfigVariableBool("cache-report", false);
-  _cache_report_interval = ConfigVariableDouble("cache-report-interval", 5.0);
-}
-
-void CacheStats::
-reset(double now) {
-  _cache_hits = 0;
-  _cache_semi_hits = 0;
-  _cache_misses = 0;
-  _cache_adds = 0;
-  _cache_new_adds = 0;
-  _cache_dels = 0;
-  _last_reset = now;
-}
-
-void CacheStats::
-write(ostream &out) const {
-  out << "TransformState cache: " << _cache_hits << " hits, " 
-      << _cache_semi_hits << " semi-hits, "
-      << _cache_misses << " misses\n";
-  out << _cache_adds + _cache_new_adds << "(" << _cache_new_adds << ") adds(new), "
-      << _cache_dels << " dels, "
-      << (double)_total_cache_size / (double)_num_states 
-      << " average cache size\n";
-}
-
-void CacheStats::
-maybe_report() {
-  if (_cache_report) {
-    double now = ClockObject::get_global_clock()->get_real_time();
-    if (now - _last_reset < _cache_report_interval) {
-      return;
-    }
-    write(Notify::out());
-    reset(now);
-  }
-}
-
+CacheStats TransformState::_cache_stats;
 
+TypeHandle TransformState::_type_handle;
 
 ////////////////////////////////////////////////////////////////////
 //     Function: TransformState::Constructor
@@ -130,7 +63,7 @@ TransformState() : _lock("TransformState") {
   _saved_entry = _states->end();
   _flags = F_is_identity | F_singular_known | F_is_2d;
   _inv_mat = (LMatrix4f *)NULL;
-  _cache_stats._num_states++;
+  _cache_stats.add_num_states(1);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -180,7 +113,7 @@ TransformState::
   // If this was true at the beginning of the destructor, but is no
   // longer true now, probably we've been double-deleted.
   nassertv(get_ref_count() == 0);
-  _cache_stats._num_states--;
+  _cache_stats.add_num_states(-1);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -688,14 +621,12 @@ compose(const TransformState *other) const {
         // count only when the result is not the same as this.
         result->cache_ref();
       }
-      ++_cache_stats._cache_semi_hits;
-    } else {
-      ++_cache_stats._cache_hits;
     }
     // Here's the cache!
+    _cache_stats.inc_hits();
     return comp._result;
   }
-  ++_cache_stats._cache_misses;
+  _cache_stats.inc_misses();
 
   // We need to make a new cache entry, both in this object and in the
   // other object.  We make both records so the other TransformState
@@ -706,22 +637,14 @@ compose(const TransformState *other) const {
   // result; the other will be NULL for now.
   CPT(TransformState) result = do_compose(other);
 
-  ++_cache_stats._total_cache_size;
-  if (_composition_cache.get_size() == 0) {
-    ++_cache_stats._cache_new_adds;
-  } else {
-    ++_cache_stats._cache_adds;
-  }
+  _cache_stats.add_total_size(1);
+  _cache_stats.inc_adds(_composition_cache.get_size() == 0);
 
   ((TransformState *)this)->_composition_cache[other]._result = result;
 
   if (other != this) {
-    ++_cache_stats._total_cache_size;
-    if (other->_composition_cache.get_size() == 0) {
-      ++_cache_stats._cache_new_adds;
-    } else {
-      ++_cache_stats._cache_adds;
-    }
+    _cache_stats.add_total_size(1);
+    _cache_stats.inc_adds(other->_composition_cache.get_size() == 0);
     ((TransformState *)other)->_composition_cache[this]._result = NULL;
   }
 
@@ -737,7 +660,7 @@ compose(const TransformState *other) const {
     // that would be a self-referential leak.)
   }
 
-  _cache_stats.maybe_report();
+  _cache_stats.maybe_report("TransformState");
 
   return result;
 }
@@ -803,14 +726,12 @@ invert_compose(const TransformState *other) const {
         // count only when the result is not the same as this.
         result->cache_ref();
       }
-      ++_cache_stats._cache_semi_hits;
-    } else {
-      ++_cache_stats._cache_hits;
     }
     // Here's the cache!
+    _cache_stats.inc_hits();
     return comp._result;
   }
-  ++_cache_stats._cache_misses;
+  _cache_stats.inc_misses();
 
   // We need to make a new cache entry, both in this object and in the
   // other object.  We make both records so the other TransformState
@@ -821,21 +742,13 @@ invert_compose(const TransformState *other) const {
   // result; the other will be NULL for now.
   CPT(TransformState) result = do_invert_compose(other);
 
-  ++_cache_stats._total_cache_size;
-  if (_invert_composition_cache.get_size() == 0) {
-    ++_cache_stats._cache_new_adds;
-  } else {
-    ++_cache_stats._cache_adds;
-  }
+  _cache_stats.add_total_size(1);
+  _cache_stats.inc_adds(_invert_composition_cache.get_size() == 0);
   ((TransformState *)this)->_invert_composition_cache[other]._result = result;
 
   if (other != this) {
-    ++_cache_stats._total_cache_size;
-    if (other->_invert_composition_cache.get_size() == 0) {
-      ++_cache_stats._cache_new_adds;
-    } else {
-      ++_cache_stats._cache_adds;
-    }
+    _cache_stats.add_total_size(1);
+    _cache_stats.inc_adds(other->_invert_composition_cache.get_size() == 0);
     ((TransformState *)other)->_invert_composition_cache[this]._result = NULL;
   }
 
@@ -1190,7 +1103,7 @@ clear_cache() {
           }
         }
       }
-      _cache_stats._total_cache_size -= state->_composition_cache.get_num_entries();
+      _cache_stats.add_total_size(-state->_composition_cache.get_num_entries());
       state->_composition_cache.clear();
 
       cache_size = state->_invert_composition_cache.get_size();
@@ -1203,7 +1116,7 @@ clear_cache() {
           }
         }
       }
-      _cache_stats._total_cache_size -= state->_invert_composition_cache.get_num_entries();
+      _cache_stats.add_total_size(-state->_invert_composition_cache.get_num_entries());
       state->_invert_composition_cache.clear();
     }
 
@@ -1807,8 +1720,8 @@ remove_cache_pointers() {
     // had a chance to destruct, so we are confident that our iterator
     // is still valid.
     _composition_cache.remove_element(i);
-    --_cache_stats._total_cache_size;
-    ++_cache_stats._cache_dels;
+    _cache_stats.add_total_size(-1);
+    _cache_stats.inc_dels();
 
     if (other != this) {
       int oi = other->_composition_cache.find(this);
@@ -1821,8 +1734,8 @@ remove_cache_pointers() {
         Composition ocomp = other->_composition_cache.get_data(oi);
         
         other->_composition_cache.remove_element(oi);
-        --_cache_stats._total_cache_size;
-        ++_cache_stats._cache_dels;
+        _cache_stats.add_total_size(-1);
+        _cache_stats.inc_dels();
         
         // It's finally safe to let our held pointers go away.  This may
         // have cascading effects as other TransformState objects are
@@ -1852,15 +1765,15 @@ remove_cache_pointers() {
     nassertv(other != this);
     Composition comp = _invert_composition_cache.get_data(i);
     _invert_composition_cache.remove_element(i);
-    --_cache_stats._total_cache_size;
-    ++_cache_stats._cache_dels;
+    _cache_stats.add_total_size(-1);
+    _cache_stats.inc_dels();
     if (other != this) {
       int oi = other->_invert_composition_cache.find(this);
       if (oi != -1) {
         Composition ocomp = other->_invert_composition_cache.get_data(oi);
         other->_invert_composition_cache.remove_element(oi);
-        --_cache_stats._total_cache_size;
-        ++_cache_stats._cache_dels;
+        _cache_stats.add_total_size(-1);
+        _cache_stats.inc_dels();
         if (ocomp._result != (const TransformState *)NULL && ocomp._result != other) {
           cache_unref_delete(ocomp._result);
         }

+ 3 - 0
panda/src/pgraph/transformState.h

@@ -34,6 +34,7 @@
 #include "config_pgraph.h"
 #include "deletedChain.h"
 #include "simpleHashMap.h"
+#include "cacheStats.h"
 
 class GraphicsStateGuardianBase;
 class FactoryParams;
@@ -342,6 +343,8 @@ private:
   // This mutex protects _flags, and all of the above computed values.
   Mutex _lock;
 
+  static CacheStats _cache_stats;
+
 public:
   static void register_with_read_factory();
   virtual void write_datagram(BamWriter *manager, Datagram &dg);