Browse Source

fix TransformState leak

David Rose 17 years ago
parent
commit
e41d03482f

+ 12 - 12
panda/src/pgraph/accumulatedAttribs.cxx

@@ -304,7 +304,7 @@ collect(const RenderState *state, int attrib_types) {
 void AccumulatedAttribs::
 apply_to_node(PandaNode *node, int attrib_types) {
   if ((attrib_types & SceneGraphReducer::TT_transform) != 0) {
-    node->set_transform(_transform->compose(node->get_transform()));
+    node->set_transform(_transform->compose(node->get_transform())->get_unique());
     node->reset_prev_transform();
     _transform = TransformState::make_identity();
   }
@@ -314,9 +314,9 @@ apply_to_node(PandaNode *node, int attrib_types) {
       const RenderAttrib *node_attrib =
         node->get_attrib(ColorAttrib::get_class_slot());
       if (node_attrib != (RenderAttrib *)NULL) {
-        node->set_attrib(_color->compose(node_attrib));
+        node->set_attrib(_color->compose(node_attrib)->get_unique());
       } else {
-        node->set_attrib(_color);
+        node->set_attrib(_color->get_unique());
       }
       _color = (RenderAttrib *)NULL;
     }
@@ -327,9 +327,9 @@ apply_to_node(PandaNode *node, int attrib_types) {
       const RenderAttrib *node_attrib =
         node->get_attrib(ColorScaleAttrib::get_class_slot());
       if (node_attrib != (RenderAttrib *)NULL) {
-        node->set_attrib(_color_scale->compose(node_attrib));
+        node->set_attrib(_color_scale->compose(node_attrib)->get_unique());
       } else {
-        node->set_attrib(_color_scale);
+        node->set_attrib(_color_scale->get_unique());
       }
       _color_scale = (RenderAttrib *)NULL;
     }
@@ -340,9 +340,9 @@ apply_to_node(PandaNode *node, int attrib_types) {
       const RenderAttrib *node_attrib =
         node->get_attrib(TexMatrixAttrib::get_class_slot());
       if (node_attrib != (RenderAttrib *)NULL) {
-        node->set_attrib(_tex_matrix->compose(node_attrib));
+        node->set_attrib(_tex_matrix->compose(node_attrib)->get_unique());
       } else {
-        node->set_attrib(_tex_matrix);
+        node->set_attrib(_tex_matrix->get_unique());
       }
       _tex_matrix = (RenderAttrib *)NULL;
     }
@@ -353,9 +353,9 @@ apply_to_node(PandaNode *node, int attrib_types) {
       const RenderAttrib *node_attrib =
         node->get_attrib(ClipPlaneAttrib::get_class_slot());
       if (node_attrib != (RenderAttrib *)NULL) {
-        node->set_attrib(_clip_plane->compose(node_attrib));
+        node->set_attrib(_clip_plane->compose(node_attrib)->get_unique());
       } else {
-        node->set_attrib(_clip_plane);
+        node->set_attrib(_clip_plane->get_unique());
       }
       _clip_plane = (RenderAttrib *)NULL;
     }
@@ -366,16 +366,16 @@ apply_to_node(PandaNode *node, int attrib_types) {
       const RenderAttrib *node_attrib =
         node->get_attrib(CullFaceAttrib::get_class_slot());
       if (node_attrib != (RenderAttrib *)NULL) {
-        node->set_attrib(_cull_face->compose(node_attrib));
+        node->set_attrib(_cull_face->compose(node_attrib)->get_unique());
       } else {
-        node->set_attrib(_cull_face);
+        node->set_attrib(_cull_face->get_unique());
       }
       _cull_face = (RenderAttrib *)NULL;
     }
   }
 
   if ((attrib_types & SceneGraphReducer::TT_other) != 0) {
-    node->set_state(_other->compose(node->get_state()));
+    node->set_state(_other->compose(node->get_state())->get_unique());
     _other = RenderState::make_empty();
   }
 }

+ 24 - 0
panda/src/pgraph/config_pgraph.cxx

@@ -194,6 +194,30 @@ ConfigVariableBool state_cache
           "similar to the TransformState cache controlled via "
           "transform-cache."));
 
+ConfigVariableBool uniquify_transforms
+("uniquify-transforms", false,
+ PRC_DESC("Set this true to ensure that equivalent TransformStates "
+          "are pointerwise equal.  This may improve caching performance, "
+          "but also adds additional overhead to maintain the cache, "
+          "including the need to check for a composition cycle in "
+          "the cache."));
+
+ConfigVariableBool uniquify_states
+("uniquify-states", true,
+ PRC_DESC("Set this true to ensure that equivalent RenderStates "
+          "are pointerwise equal.  This may improve caching performance, "
+          "but also adds additional overhead to maintain the cache, "
+          "including the need to check for a composition cycle in "
+          "the cache."));
+
+ConfigVariableBool uniquify_attribs
+("uniquify-attribs", true,
+ PRC_DESC("Set this true to ensure that equivalent RenderAttribs "
+          "are pointerwise equal.  This may improve caching performance, "
+          "but also adds additional overhead to maintain the cache, "
+          "including the need to check for a composition cycle in "
+          "the cache."));
+
 ConfigVariableBool retransform_sprites
 ("retransform-sprites", true,
  PRC_DESC("To render sprite-based particle effects, Panda must convert "

+ 3 - 0
panda/src/pgraph/config_pgraph.h

@@ -45,6 +45,9 @@ extern ConfigVariableBool paranoid_const;
 extern ConfigVariableBool auto_break_cycles;
 extern ConfigVariableBool transform_cache;
 extern ConfigVariableBool state_cache;
+extern ConfigVariableBool uniquify_transforms;
+extern ConfigVariableBool uniquify_states;
+extern ConfigVariableBool uniquify_attribs;
 extern ConfigVariableBool retransform_sprites;
 extern ConfigVariableBool support_fade_lod;
 extern ConfigVariableBool depth_offset_decals;

+ 15 - 0
panda/src/pgraph/renderAttrib.I

@@ -89,6 +89,21 @@ compare_to(const RenderAttrib &other) const {
   return compare_to_impl(&other);
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: RenderAttrib::get_unique
+//       Access: Published
+//  Description: Returns the pointer to the unique RenderAttrib in
+//               the cache that is equivalent to this one.  This may
+//               be the same pointer as this object, or it may be a
+//               different pointer; but it will be an equivalent
+//               object, and it will be a shared pointer.  This may be
+//               called from time to time to improve cache benefits.
+////////////////////////////////////////////////////////////////////
+INLINE CPT(RenderAttrib) RenderAttrib::
+get_unique() const {
+  return return_unique((RenderAttrib *)this);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderAttrib::register_slot
 //       Access: Public, Static

+ 31 - 12
panda/src/pgraph/renderAttrib.cxx

@@ -258,26 +258,37 @@ validate_attribs() {
 //               to share a common RenderAttrib pointer for all
 //               equivalent RenderAttrib objects.
 //
-//               The make() function of the derived type should create
-//               a new RenderAttrib and pass it through return_new(),
-//               which will either save the pointer and return it
-//               unchanged (if this is the first similar such object)
-//               or delete it and return an equivalent pointer (if
-//               there was already a similar object saved).
+//               This is different from return_unique() in that it
+//               does not actually guarantee a unique pointer, unless
+//               uniquify-attribs is set.
 ////////////////////////////////////////////////////////////////////
 CPT(RenderAttrib) RenderAttrib::
 return_new(RenderAttrib *attrib) {
   nassertr(attrib != (RenderAttrib *)NULL, attrib);
-  static ConfigVariableBool uniquify_attribs("uniquify-attribs", true);
   if (!uniquify_attribs) {
     return attrib;
   }
 
-  LightReMutexHolder holder(*_attribs_lock);
+  return return_unique(attrib);
+}
 
-  // This should be a newly allocated pointer, not one that was used
-  // for anything else.
-  nassertr(attrib->_saved_entry == _attribs->end(), attrib);
+////////////////////////////////////////////////////////////////////
+//     Function: RenderAttrib::return_unique
+//       Access: Protected, Static
+//  Description: This function is used by derived RenderAttrib types
+//               to share a common RenderAttrib pointer for all
+//               equivalent RenderAttrib objects.
+//
+//               The make() function of the derived type should create
+//               a new RenderAttrib and pass it through return_new(),
+//               which will either save the pointer and return it
+//               unchanged (if this is the first similar such object)
+//               or delete it and return an equivalent pointer (if
+//               there was already a similar object saved).
+////////////////////////////////////////////////////////////////////
+CPT(RenderAttrib) RenderAttrib::
+return_unique(RenderAttrib *attrib) {
+  nassertr(attrib != (RenderAttrib *)NULL, attrib);
 
 #ifndef NDEBUG
   if (!state_cache) {
@@ -291,6 +302,14 @@ return_new(RenderAttrib *attrib) {
   }
 #endif
 
+  LightReMutexHolder holder(*_attribs_lock);
+
+  if (attrib->_saved_entry != _attribs->end()) {
+    // This attrib is already in the cache.
+    nassertr(_attribs->find(attrib) == attrib->_saved_entry, attrib);
+    return attrib;
+  }
+
   // Save the attrib in a local PointerTo so that it will be freed at
   // the end of this function if no one else uses it.
   CPT(RenderAttrib) pt_attrib = attrib;
@@ -509,7 +528,7 @@ TypedWritable *RenderAttrib::
 change_this(TypedWritable *old_ptr, BamReader *manager) {
   // First, uniquify the pointer.
   RenderAttrib *attrib = DCAST(RenderAttrib, old_ptr);
-  CPT(RenderAttrib) pointer = return_new(attrib);
+  CPT(RenderAttrib) pointer = return_unique(attrib);
 
   // But now we have a problem, since we have to hold the reference
   // count and there's no way to return a TypedWritable while still

+ 2 - 0
panda/src/pgraph/renderAttrib.h

@@ -76,6 +76,7 @@ public:
 
 PUBLISHED:
   INLINE int compare_to(const RenderAttrib &other) const;
+  INLINE CPT(RenderAttrib) get_unique() const;
 
   bool unref() const;
 
@@ -174,6 +175,7 @@ PUBLISHED:
 
 protected:
   static CPT(RenderAttrib) return_new(RenderAttrib *attrib);
+  static CPT(RenderAttrib) return_unique(RenderAttrib *attrib);
   virtual int compare_to_impl(const RenderAttrib *other) const;
   virtual CPT(RenderAttrib) compose_impl(const RenderAttrib *other) const;
   virtual CPT(RenderAttrib) invert_compose_impl(const RenderAttrib *other) const;

+ 168 - 0
panda/src/pgraph/renderState.I

@@ -143,6 +143,21 @@ get_override(int slot) const {
   return _attributes[slot]._override;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_unique
+//       Access: Published
+//  Description: Returns the pointer to the unique RenderState in
+//               the cache that is equivalent to this one.  This may
+//               be the same pointer as this object, or it may be a
+//               different pointer; but it will be an equivalent
+//               object, and it will be a shared pointer.  This may be
+//               called from time to time to improve cache benefits.
+////////////////////////////////////////////////////////////////////
+INLINE CPT(RenderState) RenderState::
+get_unique() const {
+  return return_unique((RenderState *)this);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderState::cache_ref
 //       Access: Published
@@ -209,6 +224,159 @@ node_unref() const {
 #endif  // DO_PSTATS
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_composition_cache_num_entries
+//       Access: Published
+//  Description: Returns the number of entries in the composition
+//               cache for this RenderState.  This is the number of
+//               other RenderStates whose composition with this one
+//               has been cached.  This number is not useful for any
+//               practical reason other than performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE int RenderState::
+get_composition_cache_num_entries() const {
+  LightReMutexHolder holder(*_states_lock);
+  return _composition_cache.get_num_entries();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_invert_composition_cache_num_entries
+//       Access: Published
+//  Description: Returns the number of entries in the
+//               invert_composition cache for this RenderState.
+//               This is similar to the composition cache, but it
+//               records cache entries for the invert_compose()
+//               operation.  See get_composition_cache_num_entries().
+////////////////////////////////////////////////////////////////////
+INLINE int RenderState::
+get_invert_composition_cache_num_entries() const {
+  LightReMutexHolder holder(*_states_lock);
+  return _invert_composition_cache.get_num_entries();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_composition_cache_size
+//       Access: Published
+//  Description: Returns the number of slots in the composition
+//               cache for this RenderState.  You may use this as
+//               an upper bound when walking through all of the
+//               composition cache results via
+//               get_composition_cache_source() or result().
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE int RenderState::
+get_composition_cache_size() const {
+  LightReMutexHolder holder(*_states_lock);
+  return _composition_cache.get_size();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_composition_cache_source
+//       Access: Published
+//  Description: Returns the source RenderState of the nth element
+//               in the composition cache.  Returns NULL if there
+//               doesn't happen to be an entry in the nth element.
+//               See get_composition_cache_result().
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE const RenderState *RenderState::
+get_composition_cache_source(int n) const {
+  LightReMutexHolder holder(*_states_lock);
+  if (!_composition_cache.has_element(n)) {
+    return NULL;
+  }
+  return _composition_cache.get_key(n);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_composition_cache_result
+//       Access: Published
+//  Description: Returns the result RenderState of the nth element
+//               in the composition cache.  Returns NULL if there
+//               doesn't happen to be an entry in the nth element.
+//
+//               In general,
+//               a->compose(a->get_composition_cache_source(n)) ==
+//               a->get_composition_cache_result(n).
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE const RenderState *RenderState::
+get_composition_cache_result(int n) const {
+  LightReMutexHolder holder(*_states_lock);
+  if (!_composition_cache.has_element(n)) {
+    return NULL;
+  }
+  return _composition_cache.get_data(n)._result;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_invert_composition_cache_size
+//       Access: Published
+//  Description: Returns the number of slots in the composition
+//               cache for this RenderState.  You may use this as
+//               an upper bound when walking through all of the
+//               composition cache results via
+//               get_invert_composition_cache_source() or result().
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE int RenderState::
+get_invert_composition_cache_size() const {
+  LightReMutexHolder holder(*_states_lock);
+  return _invert_composition_cache.get_size();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_invert_composition_cache_source
+//       Access: Published
+//  Description: Returns the source RenderState of the nth element
+//               in the invert composition cache.  Returns NULL if
+//               there doesn't happen to be an entry in the nth
+//               element.  See get_invert_composition_cache_result().
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE const RenderState *RenderState::
+get_invert_composition_cache_source(int n) const {
+  LightReMutexHolder holder(*_states_lock);
+  if (!_invert_composition_cache.has_element(n)) {
+    return NULL;
+  }
+  return _invert_composition_cache.get_key(n);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_invert_composition_cache_result
+//       Access: Published
+//  Description: Returns the result RenderState of the nth element
+//               in the invert composition cache.  Returns NULL if
+//               there doesn't happen to be an entry in the nth
+//               element.
+//
+//               In general,
+//               a->invert_compose(a->get_invert_composition_cache_source(n))
+//               == a->get_invert_composition_cache_result(n).
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+INLINE const RenderState *RenderState::
+get_invert_composition_cache_result(int n) const {
+  LightReMutexHolder holder(*_states_lock);
+  if (!_invert_composition_cache.has_element(n)) {
+    return NULL;
+  }
+  return _invert_composition_cache.get_data(n)._result;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderState::get_draw_order
 //       Access: Published

+ 313 - 13
panda/src/pgraph/renderState.cxx

@@ -37,6 +37,7 @@
 #include "thread.h"
 #include "shaderGeneratorBase.h"
 #include "renderAttribRegistry.h"
+#include "py_panda.h"
   
 LightReMutex *RenderState::_states_lock = NULL;
 RenderState::States *RenderState::_states = NULL;
@@ -278,7 +279,7 @@ make_empty() {
   // and store a pointer forever once we find it the first time.
   if (_empty_state == (RenderState *)NULL) {
     RenderState *state = new RenderState;
-    _empty_state = return_new(state);
+    _empty_state = return_unique(state);
   }
 
   return _empty_state;
@@ -297,7 +298,7 @@ make_full_default() {
   if (_full_default_state == (RenderState *)NULL) {
     RenderState *state = new RenderState;
     state->fill_default();
-    _full_default_state = return_new(state);
+    _full_default_state = return_unique(state);
   }
 
   return _full_default_state;
@@ -718,7 +719,7 @@ unref() const {
   // be holding it if we happen to drop the reference count to 0.
   LightReMutexHolder holder(*_states_lock);
 
-  if (auto_break_cycles) {
+  if (auto_break_cycles && uniquify_states) {
     if (get_cache_ref_count() > 0 &&
         get_ref_count() == get_cache_ref_count() + 1) {
       // If we are about to remove the one reference that is not in the
@@ -736,6 +737,16 @@ unref() const {
         }
         
         ((RenderState *)this)->remove_cache_pointers();
+      } else {
+        ++_last_cycle_detect;
+        if (r_detect_reverse_cycles(this, this, 1, _last_cycle_detect, NULL)) {
+          if (pgraph_cat.is_debug()) {
+            pgraph_cat.debug()
+              << "Breaking cycle involving " << (*this) << "\n";
+          }
+          
+          ((RenderState *)this)->remove_cache_pointers();
+        }
       }
     }
   }
@@ -754,6 +765,126 @@ unref() const {
   return false;
 }
 
+#ifdef HAVE_PYTHON
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_composition_cache
+//       Access: Published
+//  Description: Returns a list of 2-tuples that represents the
+//               composition cache.  For each tuple in the list, the
+//               first element is the source render, and the second
+//               is the result render.  If both are None, there is
+//               no entry in the cache at that slot.
+//
+//               In general, a->compose(source) == result.
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+PyObject *RenderState::
+get_composition_cache() const {
+  IMPORT_THIS struct Dtool_PyTypedObject Dtool_RenderState;
+  LightReMutexHolder holder(*_states_lock);
+  size_t cache_size = _composition_cache.get_size();
+  PyObject *list = PyList_New(cache_size);
+
+  for (size_t i = 0; i < cache_size; ++i) {
+    PyObject *tuple = PyTuple_New(2);
+    PyObject *a, *b;
+    if (!_composition_cache.has_element(i)) {
+      a = Py_None;
+      Py_INCREF(a);
+      b = Py_None;
+      Py_INCREF(b);
+    } else {
+      const RenderState *source = _composition_cache.get_key(i);
+      if (source == (RenderState *)NULL) {
+        a = Py_None;
+        Py_INCREF(a);
+      } else {
+        source->ref();
+        a = DTool_CreatePyInstanceTyped((void *)source, Dtool_RenderState, 
+                                        true, true, source->get_type_index());
+      }
+      const RenderState *result = _composition_cache.get_data(i)._result;
+      if (result == (RenderState *)NULL) {
+        b = Py_None;
+        Py_INCREF(b);
+      } else {
+        result->ref();
+        b = DTool_CreatePyInstanceTyped((void *)result, Dtool_RenderState, 
+                                        true, true, result->get_type_index());
+      }
+    }
+    PyTuple_SET_ITEM(tuple, 0, a);
+    PyTuple_SET_ITEM(tuple, 1, b);
+
+    PyList_SET_ITEM(list, i, tuple);
+  }
+
+  return list;
+}
+#endif  // HAVE_PYTHON
+
+#ifdef HAVE_PYTHON
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_invert_composition_cache
+//       Access: Published
+//  Description: Returns a list of 2-tuples that represents the
+//               invert_composition cache.  For each tuple in the list, the
+//               first element is the source render, and the second
+//               is the result render.  If both are None, there is
+//               no entry in the cache at that slot.
+//
+//               In general, a->invert_compose(source) == result.
+//
+//               This has no practical value other than for examining
+//               the cache for performance analysis.
+////////////////////////////////////////////////////////////////////
+PyObject *RenderState::
+get_invert_composition_cache() const {
+  IMPORT_THIS struct Dtool_PyTypedObject Dtool_RenderState;
+  LightReMutexHolder holder(*_states_lock);
+  size_t cache_size = _invert_composition_cache.get_size();
+  PyObject *list = PyList_New(cache_size);
+
+  for (size_t i = 0; i < cache_size; ++i) {
+    PyObject *tuple = PyTuple_New(2);
+    PyObject *a, *b;
+    if (!_invert_composition_cache.has_element(i)) {
+      a = Py_None;
+      Py_INCREF(a);
+      b = Py_None;
+      Py_INCREF(b);
+    } else {
+      const RenderState *source = _invert_composition_cache.get_key(i);
+      if (source == (RenderState *)NULL) {
+        a = Py_None;
+        Py_INCREF(a);
+      } else {
+        source->ref();
+        a = DTool_CreatePyInstanceTyped((void *)source, Dtool_RenderState, 
+                                        true, true, source->get_type_index());
+      }
+      const RenderState *result = _invert_composition_cache.get_data(i)._result;
+      if (result == (RenderState *)NULL) {
+        b = Py_None;
+        Py_INCREF(b);
+      } else {
+        result->ref();
+        b = DTool_CreatePyInstanceTyped((void *)result, Dtool_RenderState, 
+                                        true, true, result->get_type_index());
+      }
+    }
+    PyTuple_SET_ITEM(tuple, 0, a);
+    PyTuple_SET_ITEM(tuple, 1, b);
+
+    PyList_SET_ITEM(list, i, tuple);
+  }
+
+  return list;
+}
+#endif  // HAVE_PYTHON
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderState::output
 //       Access: Published, Virtual
@@ -1102,6 +1233,30 @@ list_cycles(ostream &out) {
         }
 
         cycle_desc.clear();
+      } else {
+        ++_last_cycle_detect;
+        if (r_detect_reverse_cycles(state, state, 1, _last_cycle_detect, &cycle_desc)) {
+          // This state begins a cycle.
+          CompositionCycleDesc::iterator csi;
+          
+          out << "\nReverse cycle detected of length " << cycle_desc.size() + 1 << ":\n"
+              << "state ";
+          for (csi = cycle_desc.begin(); csi != cycle_desc.end(); ++csi) {
+            const CompositionCycleDescEntry &entry = (*csi);
+            out << (const void *)entry._result << ":"
+                << entry._result->get_ref_count() << " =\n";
+            entry._result->write(out, 2);
+            out << (const void *)entry._obj << ":"
+                << entry._obj->get_ref_count() << " =\n";
+            entry._obj->write(out, 2);
+            visited.insert(entry._result);
+          }
+          out << (void *)state << ":"
+              << state->get_ref_count() << " =\n";
+          state->write(out, 2);
+          
+          cycle_desc.clear();
+        }
       }
     }
   }
@@ -1279,11 +1434,9 @@ validate_filled_slots() const {
 //  Description: This function is used to share a common RenderState
 //               pointer for all equivalent RenderState objects.
 //
-//               See the similar logic in RenderAttrib.  The idea is
-//               to create a new RenderState object and pass it
-//               through this function, which will share the pointer
-//               with a previously-created RenderState object if it is
-//               equivalent.
+//               This is different from return_unique() in that it
+//               does not actually guarantee a unique pointer, unless
+//               uniquify-states is set.
 ////////////////////////////////////////////////////////////////////
 CPT(RenderState) RenderState::
 return_new(RenderState *state) {
@@ -1316,11 +1469,29 @@ return_new(RenderState *state) {
   nassertr(state->validate_filled_slots(), state);
 #endif
 
-  static ConfigVariableBool uniquify_states("uniquify-states", true);
   if (!uniquify_states && !state->is_empty()) {
     return state;
   }
 
+  return return_unique(state);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::return_unique
+//       Access: Private, Static
+//  Description: This function is used to share a common RenderState
+//               pointer for all equivalent RenderState objects.
+//
+//               See the similar logic in RenderAttrib.  The idea is
+//               to create a new RenderState object and pass it
+//               through this function, which will share the pointer
+//               with a previously-created RenderState object if it is
+//               equivalent.
+////////////////////////////////////////////////////////////////////
+CPT(RenderState) RenderState::
+return_unique(RenderState *state) {
+  nassertr(state != (RenderState *)NULL, state);
+
 #ifndef NDEBUG
   if (!state_cache) {
     return state;
@@ -1335,14 +1506,30 @@ return_new(RenderState *state) {
 
   LightReMutexHolder holder(*_states_lock);
 
-  // This should be a newly allocated pointer, not one that was used
-  // for anything else.
-  nassertr(state->_saved_entry == _states->end(), state);
+  if (state->_saved_entry != _states->end()) {
+    // This state is already in the cache.
+    nassertr(_states->find(state) == state->_saved_entry, state);
+    return state;
+  }
 
   // Save the state in a local PointerTo so that it will be freed at
   // the end of this function if no one else uses it.
   CPT(RenderState) pt_state = state;
 
+  // Ensure each of the individual attrib pointers has been uniquified
+  // before we add the state to the cache.
+  if (!uniquify_attribs && !state->is_empty()) {
+    SlotMask mask = state->_filled_slots;
+    int slot = mask.get_lowest_on_bit();
+    while (slot >= 0) {
+      Attribute &attrib = state->_attributes[slot];
+      nassertr(attrib._attrib != (RenderAttrib *)NULL, state);
+      attrib._attrib = attrib._attrib->get_unique();
+      mask.clear_bit(slot);
+      slot = mask.get_lowest_on_bit();
+    }    
+  }
+
   pair<States::iterator, bool> result = _states->insert(state);
 
   if (result.second) {
@@ -1529,6 +1716,85 @@ r_detect_cycles(const RenderState *start_state,
   return false;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::r_detect_reverse_cycles
+//       Access: Private, Static
+//  Description: Works the same as r_detect_cycles, but checks for
+//               cycles in the reverse direction along the cache
+//               chain.  (A cycle may appear in either direction, and
+//               we must check both.)
+////////////////////////////////////////////////////////////////////
+bool RenderState::
+r_detect_reverse_cycles(const RenderState *start_state,
+                        const RenderState *current_state,
+                        int length, UpdateSeq this_seq,
+                        RenderState::CompositionCycleDesc *cycle_desc) {
+  if (current_state->_cycle_detect == this_seq) {
+    // We've already seen this state; therefore, we've found a cycle.
+
+    // However, we only care about cycles that return to the starting
+    // state and involve more than two steps.  If only one or two
+    // nodes are involved, it doesn't represent a memory leak, so no
+    // problem there.
+    return (current_state == start_state && length > 2);
+  }
+  ((RenderState *)current_state)->_cycle_detect = this_seq;
+
+  int i;
+  int cache_size = current_state->_composition_cache.get_size();
+  for (i = 0; i < cache_size; ++i) {
+    if (current_state->_composition_cache.has_element(i)) {
+      const RenderState *other = current_state->_composition_cache.get_key(i);
+      if (other != current_state) {
+        int oi = other->_composition_cache.find(current_state);
+        nassertr(oi != -1, false);
+
+        const RenderState *result = other->_composition_cache.get_data(oi)._result;
+        if (result != (const RenderState *)NULL) {
+          if (r_detect_reverse_cycles(start_state, result, length + 1, 
+                                      this_seq, cycle_desc)) {
+            // Cycle detected.
+            if (cycle_desc != (CompositionCycleDesc *)NULL) {
+              const RenderState *other = current_state->_composition_cache.get_key(i);
+              CompositionCycleDescEntry entry(other, result, false);
+              cycle_desc->push_back(entry);
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  cache_size = current_state->_invert_composition_cache.get_size();
+  for (i = 0; i < cache_size; ++i) {
+    if (current_state->_invert_composition_cache.has_element(i)) {
+      const RenderState *other = current_state->_invert_composition_cache.get_key(i);
+      if (other != current_state) {
+        int oi = other->_invert_composition_cache.find(current_state);
+        nassertr(oi != -1, false);
+
+        const RenderState *result = other->_invert_composition_cache.get_data(oi)._result;
+        if (result != (const RenderState *)NULL) {
+          if (r_detect_reverse_cycles(start_state, result, length + 1, 
+                                      this_seq, cycle_desc)) {
+            // Cycle detected.
+            if (cycle_desc != (CompositionCycleDesc *)NULL) {
+              const RenderState *other = current_state->_invert_composition_cache.get_key(i);
+              CompositionCycleDescEntry entry(other, result, false);
+              cycle_desc->push_back(entry);
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  // No cycle detected.
+  return false;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderState::release_new
 //       Access: Private
@@ -1801,6 +2067,40 @@ update_pstats(int old_referenced_bits, int new_referenced_bits) {
 #endif  // DO_PSTATS
 }
 
+#ifdef HAVE_PYTHON
+////////////////////////////////////////////////////////////////////
+//     Function: RenderState::get_states
+//       Access: Published, Static
+//  Description: Returns a list of all of the RenderState objects
+//               in the state cache.  The order of elements in this
+//               cache is arbitrary.
+////////////////////////////////////////////////////////////////////
+PyObject *RenderState::
+get_states() {
+  IMPORT_THIS struct Dtool_PyTypedObject Dtool_RenderState;
+  if (_states == (States *)NULL) {
+    return PyList_New(0);
+  }
+  LightReMutexHolder holder(*_states_lock);
+
+  size_t num_states = _states->size();
+  PyObject *list = PyList_New(num_states);
+  States::const_iterator si;
+  size_t i;
+  for (si = _states->begin(), i = 0; si != _states->end(); ++si, ++i) {
+    nassertr(i < num_states, list);
+    const RenderState *state = (*si);
+    state->ref();
+    PyObject *a = 
+      DTool_CreatePyInstanceTyped((void *)state, Dtool_RenderState, 
+                                  true, true, state->get_type_index());
+    PyList_SET_ITEM(list, i, a);
+  }
+  nassertr(i == num_states, list);
+  return list;
+}
+#endif  // HAVE_PYTHON
+
 ////////////////////////////////////////////////////////////////////
 //     Function: RenderState::init_states
 //       Access: Public, Static
@@ -1915,7 +2215,7 @@ TypedWritable *RenderState::
 change_this(TypedWritable *old_ptr, BamReader *manager) {
   // First, uniquify the pointer.
   RenderState *state = DCAST(RenderState, old_ptr);
-  CPT(RenderState) pointer = return_new(state);
+  CPT(RenderState) pointer = return_unique(state);
 
   // But now we have a problem, since we have to hold the reference
   // count and there's no way to return a TypedWritable while still

+ 24 - 0
panda/src/pgraph/renderState.h

@@ -108,6 +108,8 @@ PUBLISHED:
   INLINE int get_override(TypeHandle type) const;
   INLINE int get_override(int slot) const;
 
+  INLINE CPT(RenderState) get_unique() const;
+
   bool unref() const;
 
   INLINE void cache_ref() const;
@@ -115,6 +117,20 @@ PUBLISHED:
   INLINE void node_ref() const;
   INLINE bool node_unref() const;
 
+  INLINE int get_composition_cache_num_entries() const;
+  INLINE int get_invert_composition_cache_num_entries() const;
+
+  INLINE int get_composition_cache_size() const;
+  INLINE const RenderState *get_composition_cache_source(int n) const;
+  INLINE const RenderState *get_composition_cache_result(int n) const;
+  INLINE int get_invert_composition_cache_size() const;
+  INLINE const RenderState *get_invert_composition_cache_source(int n) const;
+  INLINE const RenderState *get_invert_composition_cache_result(int n) const;
+#ifdef HAVE_PYTHON
+  PyObject *get_composition_cache() const;
+  PyObject *get_invert_composition_cache() const;
+#endif  // HAVE_PYTHON
+
   void output(ostream &out) const;
   void write(ostream &out, int indent_level) const;
 
@@ -127,6 +143,9 @@ PUBLISHED:
   static void list_cycles(ostream &out);
   static void list_states(ostream &out);
   static bool validate_states();
+#ifdef HAVE_PYTHON
+  static PyObject *get_states();
+#endif  // HAVE_PYTHON
 
 PUBLISHED:
   // These methods are intended for use by low-level code, but they're
@@ -159,12 +178,17 @@ private:
   typedef pvector<CompositionCycleDescEntry> CompositionCycleDesc;
 
   static CPT(RenderState) return_new(RenderState *state);
+  static CPT(RenderState) return_unique(RenderState *state);
   CPT(RenderState) do_compose(const RenderState *other) const;
   CPT(RenderState) do_invert_compose(const RenderState *other) const;
   static bool r_detect_cycles(const RenderState *start_state,
                               const RenderState *current_state,
                               int length, UpdateSeq this_seq,
                               CompositionCycleDesc *cycle_desc);
+  static bool r_detect_reverse_cycles(const RenderState *start_state,
+                                      const RenderState *current_state,
+                                      int length, UpdateSeq this_seq,
+                                      CompositionCycleDesc *cycle_desc);
 
   void release_new();
   void remove_cache_pointers();

+ 15 - 0
panda/src/pgraph/transformState.I

@@ -683,6 +683,21 @@ get_inverse() const {
   return invert_compose(TransformState::make_identity());
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: TransformState::get_unique
+//       Access: Published
+//  Description: Returns the pointer to the unique TransformState in
+//               the cache that is equivalent to this one.  This may
+//               be the same pointer as this object, or it may be a
+//               different pointer; but it will be an equivalent
+//               object, and it will be a shared pointer.  This may be
+//               called from time to time to improve cache benefits.
+////////////////////////////////////////////////////////////////////
+INLINE CPT(TransformState) TransformState::
+get_unique() const {
+  return return_unique((TransformState *)this);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: TransformState::get_geom_rendering
 //       Access: Published

+ 150 - 14
panda/src/pgraph/transformState.cxx

@@ -29,6 +29,7 @@
 LightReMutex *TransformState::_states_lock = NULL;
 TransformState::States *TransformState::_states = NULL;
 CPT(TransformState) TransformState::_identity_state;
+CPT(TransformState) TransformState::_invalid_state;
 UpdateSeq TransformState::_last_cycle_detect;
 PStatCollector TransformState::_cache_update_pcollector("*:State Cache:Update");
 PStatCollector TransformState::_transform_compose_pcollector("*:State Cache:Compose Transform");
@@ -195,7 +196,7 @@ make_identity() {
   // and store a pointer forever once we find it the first time.
   if (_identity_state == (TransformState *)NULL) {
     TransformState *state = new TransformState;
-    _identity_state = return_new(state);
+    _identity_state = return_unique(state);
   }
 
   return _identity_state;
@@ -209,9 +210,13 @@ make_identity() {
 ////////////////////////////////////////////////////////////////////
 CPT(TransformState) TransformState::
 make_invalid() {
-  TransformState *state = new TransformState;
-  state->_flags = F_is_invalid | F_singular_known | F_is_singular | F_components_known | F_mat_known;
-  return return_new(state);
+  if (_invalid_state == (TransformState *)NULL) {
+    TransformState *state = new TransformState;
+    state->_flags = F_is_invalid | F_singular_known | F_is_singular | F_components_known | F_mat_known;
+    _invalid_state = return_unique(state);
+  }
+
+  return _invalid_state;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -787,7 +792,7 @@ unref() const {
   // be holding it if we happen to drop the reference count to 0.
   LightReMutexHolder holder(*_states_lock);
 
-  if (auto_break_cycles) {
+  if (auto_break_cycles && uniquify_transforms) {
     if (get_cache_ref_count() > 0 &&
         get_ref_count() == get_cache_ref_count() + 1) {
       // If we are about to remove the one reference that is not in the
@@ -807,6 +812,16 @@ unref() const {
         }
         
         ((TransformState *)this)->remove_cache_pointers();
+      } else {
+        ++_last_cycle_detect;
+        if (r_detect_reverse_cycles(this, this, 1, _last_cycle_detect, NULL)) {
+          if (pgraph_cat.is_debug()) {
+            pgraph_cat.debug()
+              << "Breaking cycle involving " << (*this) << "\n";
+          }
+          
+          ((TransformState *)this)->remove_cache_pointers();
+        }
       }
     }
   }
@@ -1317,6 +1332,30 @@ list_cycles(ostream &out) {
         }
 
         cycle_desc.clear();
+      } else {
+        ++_last_cycle_detect;
+        if (r_detect_reverse_cycles(state, state, 1, _last_cycle_detect, &cycle_desc)) {
+          // This state begins a cycle.
+          CompositionCycleDesc::iterator csi;
+          
+          out << "\nReverse cycle detected of length " << cycle_desc.size() + 1 << ":\n"
+              << "state ";
+          for (csi = cycle_desc.begin(); csi != cycle_desc.end(); ++csi) {
+            const CompositionCycleDescEntry &entry = (*csi);
+            out << (const void *)entry._result << ":"
+                << entry._result->get_ref_count() << " =\n";
+            entry._result->write(out, 2);
+            out << (const void *)entry._obj << ":"
+                << entry._obj->get_ref_count() << " =\n";
+            entry._obj->write(out, 2);
+            visited.insert(entry._result);
+          }
+          out << (void *)state << ":"
+              << state->get_ref_count() << " =\n";
+          state->write(out, 2);
+          
+          cycle_desc.clear();
+        }
       }
     }
   }
@@ -1464,6 +1503,26 @@ init_states() {
 //  Description: This function is used to share a common TransformState
 //               pointer for all equivalent TransformState objects.
 //
+//               This is different from return_unique() in that it
+//               does not actually guarantee a unique pointer, unless
+//               uniquify-transforms is set.
+////////////////////////////////////////////////////////////////////
+CPT(TransformState) TransformState::
+return_new(TransformState *state) {
+  nassertr(state != (TransformState *)NULL, state);
+  if (!uniquify_transforms && !state->is_identity()) {
+    return state;
+  }
+
+  return return_unique(state);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TransformState::return_unique
+//       Access: Private, Static
+//  Description: This function is used to share a common TransformState
+//               pointer for all equivalent TransformState objects.
+//
 //               See the similar logic in RenderState.  The idea is to
 //               create a new TransformState object and pass it
 //               through this function, which will share the pointer
@@ -1471,12 +1530,8 @@ init_states() {
 //               is equivalent.
 ////////////////////////////////////////////////////////////////////
 CPT(TransformState) TransformState::
-return_new(TransformState *state) {
+return_unique(TransformState *state) {
   nassertr(state != (TransformState *)NULL, state);
-  static ConfigVariableBool uniquify_transforms("uniquify-transforms", true);
-  if (!uniquify_transforms && !state->is_identity()) {
-    return state;
-  }
 
 #ifndef NDEBUG
   if (!transform_cache) {
@@ -1494,9 +1549,11 @@ return_new(TransformState *state) {
 
   LightReMutexHolder holder(*_states_lock);
 
-  // This should be a newly allocated pointer, not one that was used
-  // for anything else.
-  nassertr(state->_saved_entry == _states->end(), state);
+  if (state->_saved_entry != _states->end()) {
+    // This state is already in the cache.
+    nassertr(_states->find(state) == state->_saved_entry, state);
+    return state;
+  }
 
   // Save the state in a local PointerTo so that it will be freed at
   // the end of this function if no one else uses it.
@@ -1797,6 +1854,85 @@ r_detect_cycles(const TransformState *start_state,
   return false;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: TransformState::r_detect_reverse_cycles
+//       Access: Private, Static
+//  Description: Works the same as r_detect_cycles, but checks for
+//               cycles in the reverse direction along the cache
+//               chain.  (A cycle may appear in either direction, and
+//               we must check both.)
+////////////////////////////////////////////////////////////////////
+bool TransformState::
+r_detect_reverse_cycles(const TransformState *start_state,
+                        const TransformState *current_state,
+                        int length, UpdateSeq this_seq,
+                        TransformState::CompositionCycleDesc *cycle_desc) {
+  if (current_state->_cycle_detect == this_seq) {
+    // We've already seen this state; therefore, we've found a cycle.
+
+    // However, we only care about cycles that return to the starting
+    // state and involve more than two steps.  If only one or two
+    // nodes are involved, it doesn't represent a memory leak, so no
+    // problem there.
+    return (current_state == start_state && length > 2);
+  }
+  ((TransformState *)current_state)->_cycle_detect = this_seq;
+
+  int i;
+  int cache_size = current_state->_composition_cache.get_size();
+  for (i = 0; i < cache_size; ++i) {
+    if (current_state->_composition_cache.has_element(i)) {
+      const TransformState *other = current_state->_composition_cache.get_key(i);
+      if (other != current_state) {
+        int oi = other->_composition_cache.find(current_state);
+        nassertr(oi != -1, false);
+
+        const TransformState *result = other->_composition_cache.get_data(oi)._result;
+        if (result != (const TransformState *)NULL) {
+          if (r_detect_reverse_cycles(start_state, result, length + 1, 
+                                      this_seq, cycle_desc)) {
+            // Cycle detected.
+            if (cycle_desc != (CompositionCycleDesc *)NULL) {
+              const TransformState *other = current_state->_composition_cache.get_key(i);
+              CompositionCycleDescEntry entry(other, result, false);
+              cycle_desc->push_back(entry);
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  cache_size = current_state->_invert_composition_cache.get_size();
+  for (i = 0; i < cache_size; ++i) {
+    if (current_state->_invert_composition_cache.has_element(i)) {
+      const TransformState *other = current_state->_invert_composition_cache.get_key(i);
+      if (other != current_state) {
+        int oi = other->_invert_composition_cache.find(current_state);
+        nassertr(oi != -1, false);
+
+        const TransformState *result = other->_invert_composition_cache.get_data(oi)._result;
+        if (result != (const TransformState *)NULL) {
+          if (r_detect_reverse_cycles(start_state, result, length + 1, 
+                                      this_seq, cycle_desc)) {
+            // Cycle detected.
+            if (cycle_desc != (CompositionCycleDesc *)NULL) {
+              const TransformState *other = current_state->_invert_composition_cache.get_key(i);
+              CompositionCycleDescEntry entry(other, result, false);
+              cycle_desc->push_back(entry);
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  // No cycle detected.
+  return false;
+}
+
 
 ////////////////////////////////////////////////////////////////////
 //     Function: TransformState::release_new
@@ -2289,7 +2425,7 @@ TypedWritable *TransformState::
 change_this(TypedWritable *old_ptr, BamReader *manager) {
   // First, uniquify the pointer.
   TransformState *state = DCAST(TransformState, old_ptr);
-  CPT(TransformState) pointer = return_new(state);
+  CPT(TransformState) pointer = return_unique(state);
 
   // But now we have a problem, since we have to hold the reference
   // count and there's no way to return a TypedWritable while still

+ 8 - 0
panda/src/pgraph/transformState.h

@@ -169,6 +169,7 @@ PUBLISHED:
   CPT(TransformState) invert_compose(const TransformState *other) const;
 
   INLINE CPT(TransformState) get_inverse() const;
+  INLINE CPT(TransformState) get_unique() const;
 
   INLINE int get_geom_rendering(int geom_rendering) const;
 
@@ -230,12 +231,18 @@ private:
   typedef pvector<CompositionCycleDescEntry> CompositionCycleDesc;
 
   static CPT(TransformState) return_new(TransformState *state);
+  static CPT(TransformState) return_unique(TransformState *state);
+
   CPT(TransformState) do_compose(const TransformState *other) const;
   CPT(TransformState) do_invert_compose(const TransformState *other) const;
   static bool r_detect_cycles(const TransformState *start_state,
                               const TransformState *current_state,
                               int length, UpdateSeq this_seq,
                               CompositionCycleDesc *cycle_desc);
+  static bool r_detect_reverse_cycles(const TransformState *start_state,
+                                      const TransformState *current_state,
+                                      int length, UpdateSeq this_seq,
+                                      CompositionCycleDesc *cycle_desc);
 
   void release_new();
   void remove_cache_pointers();
@@ -248,6 +255,7 @@ private:
   typedef phash_set<const TransformState *, indirect_less_hash<const TransformState *> > States;
   static States *_states;
   static CPT(TransformState) _identity_state;
+  static CPT(TransformState) _invalid_state;
 
   // This iterator records the entry corresponding to this TransformState
   // object in the above global set.  We keep the iterator around so