Explorar o código

faster sprite particles

David Rose %!s(int64=19) %!d(string=hai) anos
pai
achega
63edecf517
Modificáronse 30 ficheiros con 681 adicións e 369 borrados
  1. 4 0
      panda/src/display/graphicsEngine.cxx
  2. 1 0
      panda/src/display/graphicsEngine.h
  3. 18 2
      panda/src/express/pointerToArray.I
  4. 1 0
      panda/src/express/pointerToArray.h
  5. 12 0
      panda/src/express/pointerToArrayBase.I
  6. 1 0
      panda/src/express/pointerToArrayBase.h
  7. 4 3
      panda/src/gobj/geomPrimitive.I
  8. 88 17
      panda/src/gobj/geomPrimitive.cxx
  9. 6 2
      panda/src/gobj/geomPrimitive.h
  10. 20 0
      panda/src/gobj/geomVertexArrayData.I
  11. 26 0
      panda/src/gobj/geomVertexArrayData.cxx
  12. 2 0
      panda/src/gobj/geomVertexArrayData.h
  13. 27 0
      panda/src/gobj/geomVertexData.I
  14. 37 0
      panda/src/gobj/geomVertexData.cxx
  15. 2 0
      panda/src/gobj/geomVertexData.h
  16. 5 0
      panda/src/particlesystem/geomParticleRenderer.cxx
  17. 3 2
      panda/src/particlesystem/geomParticleRenderer.h
  18. 4 0
      panda/src/particlesystem/lineParticleRenderer.cxx
  19. 3 1
      panda/src/particlesystem/lineParticleRenderer.h
  20. 4 0
      panda/src/particlesystem/pointParticleRenderer.cxx
  21. 3 0
      panda/src/particlesystem/pointParticleRenderer.h
  22. 4 0
      panda/src/particlesystem/sparkleParticleRenderer.cxx
  23. 3 1
      panda/src/particlesystem/sparkleParticleRenderer.h
  24. 93 97
      panda/src/particlesystem/spriteParticleRenderer.cxx
  25. 1 2
      panda/src/particlesystem/spriteParticleRenderer.h
  26. 10 0
      panda/src/pgraph/cullableObject.I
  27. 285 239
      panda/src/pgraph/cullableObject.cxx
  28. 10 1
      panda/src/pgraph/cullableObject.h
  29. 2 1
      panda/src/pstatclient/pStatProperties.cxx
  30. 2 1
      panda/src/putil/bam.h

+ 4 - 0
panda/src/display/graphicsEngine.cxx

@@ -42,6 +42,7 @@
 #include "throw_event.h"
 #include "objectDeletor.h"
 #include "bamCache.h"
+#include "cullableObject.h"
 
 #if defined(WIN32)
   #define WINDOWS_LEAN_AND_MEAN
@@ -94,6 +95,7 @@ PStatCollector GraphicsEngine::_volume_inv_sphere_pcollector("Collision Volumes:
 PStatCollector GraphicsEngine::_test_inv_sphere_pcollector("Collision Tests:CollisionInvSphere");
 PStatCollector GraphicsEngine::_volume_geom_pcollector("Collision Volumes:CollisionGeom");
 PStatCollector GraphicsEngine::_test_geom_pcollector("Collision Tests:CollisionGeom");
+PStatCollector GraphicsEngine::_sw_sprites_pcollector("SW Sprites");
 
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsEngine::Constructor
@@ -665,6 +667,7 @@ render_frame() {
   CullTraverser::flush_level();
   RenderState::flush_level();
   TransformState::flush_level();
+  CullableObject::flush_level();
 
   // Now cycle the pipeline and officially begin the next frame.
 #ifdef THREADED_PIPELINE
@@ -719,6 +722,7 @@ render_frame() {
   _test_inv_sphere_pcollector.clear_level();
   _volume_geom_pcollector.clear_level();
   _test_geom_pcollector.clear_level();
+  _sw_sprites_pcollector.clear_level();
 
 #endif  // DO_PSTATS
 

+ 1 - 0
panda/src/display/graphicsEngine.h

@@ -376,6 +376,7 @@ private:
   static PStatCollector _test_inv_sphere_pcollector;
   static PStatCollector _volume_geom_pcollector;
   static PStatCollector _test_geom_pcollector;
+  static PStatCollector _sw_sprites_pcollector;
 
   friend class WindowRenderer;
   friend class GraphicsOutput;

+ 18 - 2
panda/src/express/pointerToArray.I

@@ -43,8 +43,10 @@ template<class Element>
 INLINE PointerToArray<Element> 
 PointerToArray<Element>::empty_array(size_type n) {
   PointerToArray<Element> temp;
-  temp.reserve(n);
-  ((To *)temp._void_ptr)->insert(((To *)temp._void_ptr)->begin(), n, Element());
+  temp.reassign(new ReferenceCountedVector<Element>);
+
+  To new_array(n);
+  ((To *)(temp._void_ptr))->swap(new_array);
   return temp;
 }
 
@@ -213,6 +215,20 @@ reserve(TYPENAME PointerToArray<Element>::size_type n) {
   ((To *)(this->_void_ptr))->reserve(n);
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: PointerToArray::resize
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+template<class Element>
+INLINE void PointerToArray<Element>::
+resize(TYPENAME PointerToArray<Element>::size_type n) {
+  if ((this->_void_ptr) == NULL) {
+    reassign(new ReferenceCountedVector<Element>);
+  }
+  ((To *)(this->_void_ptr))->resize(n);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: PointerToArray::capacity
 //       Access: Public

+ 1 - 0
panda/src/express/pointerToArray.h

@@ -142,6 +142,7 @@ public:
 
   // Functions specific to vectors.
   INLINE void reserve(size_type n);
+  INLINE void resize(size_type n);
   INLINE size_type capacity() const;
   INLINE reference front() const;
   INLINE reference back() const;

+ 12 - 0
panda/src/express/pointerToArrayBase.I

@@ -43,6 +43,18 @@ ReferenceCountedVector(const ReferenceCountedVector<Element> &copy) :
 #endif  // DO_PSTATS
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: ReferenceCountedVector::empty_array constructor
+//       Access: Public
+//  Description: Creates an array of initial_size elements.
+////////////////////////////////////////////////////////////////////
+template<class Element>
+INLINE ReferenceCountedVector<Element>::
+ReferenceCountedVector(TYPENAME ReferenceCountedVector<Element>::size_type initial_size) :
+  pvector<Element>(initial_size)
+{
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: ReferenceCountedVector::Destructor
 //       Access: Public

+ 1 - 0
panda/src/express/pointerToArrayBase.h

@@ -51,6 +51,7 @@ public:
 
   INLINE ReferenceCountedVector();
   INLINE ReferenceCountedVector(const ReferenceCountedVector<Element> &copy);
+  INLINE ReferenceCountedVector(size_type initial_size);
   INLINE ~ReferenceCountedVector();
   ALLOC_DELETED_CHAIN(ReferenceCountedVector<Element>);
 

+ 4 - 3
panda/src/gobj/geomPrimitive.I

@@ -339,7 +339,7 @@ get_ends() const {
 //  Description: Returns a const pointer to the primitive mins
 //               array so application code can read it directly.  Do
 //               not attempt to modify the returned array; use
-//               modify_mins() or set_mins() for this.
+//               set_minmax() for this.
 //
 //               Note that simple primitive types, like triangles, do
 //               not have a mins array.
@@ -357,7 +357,7 @@ get_mins() const {
 //  Description: Returns a const pointer to the primitive maxs
 //               array so application code can read it directly.  Do
 //               not attempt to modify the returned array; use
-//               modify_maxs() or set_maxs() for this.
+//               set_minmax().
 //
 //               Note that simple primitive types, like triangles, do
 //               not have a maxs array.
@@ -606,9 +606,10 @@ is_indexed() const {
 ////////////////////////////////////////////////////////////////////
 INLINE int GeomPrimitivePipelineReader::
 get_num_vertices() const {
-  if (_cdata->_vertices == (GeomVertexArrayData *)NULL) {
+  if (_cdata->_num_vertices != -1) {
     return _cdata->_num_vertices;
   } else {
+    nassertr(_cdata->_vertices != (GeomVertexArrayData *)NULL, 0);
     return _vertices_reader->get_num_rows();
   }
 }

+ 88 - 17
panda/src/gobj/geomPrimitive.cxx

@@ -198,6 +198,7 @@ add_vertex(int vertex) {
   if (cdata->_vertices == (GeomVertexArrayData *)NULL) {
     // The nonindexed case.  We can keep the primitive nonindexed only
     // if the vertex number happens to be the next available vertex.
+    nassertv(cdata->_num_vertices != -1);
     if (cdata->_num_vertices == 0) {
       cdata->_first_vertex = vertex;
       cdata->_num_vertices = 1;
@@ -260,6 +261,7 @@ add_consecutive_vertices(int start, int num_vertices) {
   if (cdata->_vertices == (GeomVertexArrayData *)NULL) {
     // The nonindexed case.  We can keep the primitive nonindexed only
     // if the vertex number happens to be the next available vertex.
+    nassertv(cdata->_num_vertices != -1);
     if (cdata->_num_vertices == 0) {
       cdata->_first_vertex = start;
       cdata->_num_vertices = num_vertices;
@@ -848,14 +850,21 @@ write(ostream &out, int indent_level) const {
 //               If this is called on a nonindexed primitive, it will
 //               implicitly be converted to an indexed primitive.
 //
+//               If num_vertices is not -1, it specifies an artificial
+//               limit to the number of vertices in the array.
+//               Otherwise, all of the vertices in the array will be
+//               used.
+//
 //               Don't call this in a downstream thread unless you
 //               don't mind it blowing away other changes you might
 //               have recently made in an upstream thread.
 ////////////////////////////////////////////////////////////////////
 GeomVertexArrayData *GeomPrimitive::
-modify_vertices() {
+modify_vertices(int num_vertices) {
   CDWriter cdata(_cycler, true);
-  return do_modify_vertices(cdata);
+  GeomVertexArrayData *vertices = do_modify_vertices(cdata);
+  cdata->_num_vertices = num_vertices;
+  return vertices;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -865,14 +874,20 @@ modify_vertices() {
 //               table.  Chances are good that you should also replace
 //               the ends list with set_ends() at the same time.
 //
+//               If num_vertices is not -1, it specifies an artificial
+//               limit to the number of vertices in the array.
+//               Otherwise, all of the vertices in the array will be
+//               used.
+//
 //               Don't call this in a downstream thread unless you
 //               don't mind it blowing away other changes you might
 //               have recently made in an upstream thread.
 ////////////////////////////////////////////////////////////////////
 void GeomPrimitive::
-set_vertices(const GeomVertexArrayData *vertices) {
+set_vertices(const GeomVertexArrayData *vertices, int num_vertices) {
   CDWriter cdata(_cycler, true);
   cdata->_vertices = (GeomVertexArrayData *)vertices;
+  cdata->_num_vertices = num_vertices;
 
   cdata->_modified = Geom::get_next_modified();
   cdata->_got_minmax = false;
@@ -890,6 +905,7 @@ set_vertices(const GeomVertexArrayData *vertices) {
 ////////////////////////////////////////////////////////////////////
 void GeomPrimitive::
 set_nonindexed_vertices(int first_vertex, int num_vertices) {
+  nassertv(num_vertices != -1);
   CDWriter cdata(_cycler, true);
   cdata->_vertices = (GeomVertexArrayData *)NULL;
   cdata->_first_vertex = first_vertex;
@@ -952,6 +968,48 @@ set_ends(CPTA_int ends) {
   cdata->_got_minmax = false;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomPrimitive::set_minmax
+//       Access: Public
+//  Description: Explicitly specifies the minimum and maximum
+//               vertices, as well as the lists of per-component min
+//               and max.
+//
+//               Use this method with extreme caution.  It's generally
+//               better to let the GeomPrimitive compute these
+//               explicitly, unless for some reason you can do it
+//               faster and you absolutely need the speed improvement.
+//
+//               Note that any modification to the vertex array will
+//               normally cause this to be recomputed, unless you set
+//               it immediately again.
+////////////////////////////////////////////////////////////////////
+void GeomPrimitive::
+set_minmax(int min_vertex, int max_vertex,
+           GeomVertexArrayData *mins, GeomVertexArrayData *maxs) {
+  CDWriter cdata(_cycler, true);
+  cdata->_min_vertex = min_vertex;
+  cdata->_max_vertex = max_vertex;
+  cdata->_mins = mins;
+  cdata->_maxs = maxs;
+
+  cdata->_modified = Geom::get_next_modified();
+  cdata->_got_minmax = true;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GeomPrimitive::clear_minmax
+//       Access: Public
+//  Description: Undoes a previous call to set_minmax(), and allows
+//               the minimum and maximum values to be recomputed
+//               normally.
+////////////////////////////////////////////////////////////////////
+void GeomPrimitive::
+clear_minmax() {
+  CDWriter cdata(_cycler, true);
+  cdata->_got_minmax = false;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomPrimitive::get_num_vertices_per_primitive
 //       Access: Public, Virtual
@@ -1167,6 +1225,7 @@ calc_tight_bounds(LPoint3f &min_point, LPoint3f &max_point,
 
   if (cdata->_vertices == (GeomVertexArrayData *)NULL) {
     // Nonindexed case.
+    nassertv(cdata->_num_vertices != -1);
     if (got_mat) {
       for (int i = 0; i < cdata->_num_vertices; i++) {
         reader.set_row(cdata->_first_vertex + i);
@@ -1208,9 +1267,10 @@ calc_tight_bounds(LPoint3f &min_point, LPoint3f &max_point,
   } else {
     // Indexed case.
     GeomVertexReader index(cdata->_vertices, 0, current_thread);
+    int num_vertices = get_num_vertices();
 
     if (got_mat) {
-      while (!index.is_at_end()) {
+      for (int i = 0; i < num_vertices; ++i) {
         reader.set_row(index.get_data1i());
         LPoint3f vertex = mat.xform_point(reader.get_data3f());
         
@@ -1228,7 +1288,7 @@ calc_tight_bounds(LPoint3f &min_point, LPoint3f &max_point,
         }
       }
     } else {
-      while (!index.is_at_end()) {
+      for (int i = 0; i < num_vertices; ++i) {
         reader.set_row(index.get_data1i());
         const LVecBase3f &vertex = reader.get_data3f();
         
@@ -1318,6 +1378,7 @@ recompute_minmax(GeomPrimitive::CData *cdata) {
   if (cdata->_vertices == (GeomVertexArrayData *)NULL) {
     // In the nonindexed case, we don't need to do much (the
     // minmax is trivial).
+    nassertv(cdata->_num_vertices != -1);
     cdata->_min_vertex = cdata->_first_vertex;
     cdata->_max_vertex = cdata->_first_vertex + cdata->_num_vertices - 1;
     cdata->_mins.clear();
@@ -1342,16 +1403,16 @@ recompute_minmax(GeomPrimitive::CData *cdata) {
     GeomVertexWriter maxs(cdata->_maxs, 0);
 
     int pi = 0;
-    int vi = 0;
-    
+
     unsigned int vertex = index.get_data1i();
     cdata->_min_vertex = vertex;
     cdata->_max_vertex = vertex;
     unsigned int min_prim = vertex;
     unsigned int max_prim = vertex;
-    
-    ++vi;
-    while (!index.is_at_end()) {
+
+    int num_vertices = get_num_vertices();
+    for (int vi = 1; vi < num_vertices; ++vi) {
+      nassertv(!index.is_at_end());
       unsigned int vertex = index.get_data1i();
       cdata->_min_vertex = min(cdata->_min_vertex, vertex);
       cdata->_max_vertex = max(cdata->_max_vertex, vertex);
@@ -1367,8 +1428,6 @@ recompute_minmax(GeomPrimitive::CData *cdata) {
         min_prim = min(min_prim, vertex);
         max_prim = max(max_prim, vertex);
       }
-      
-      ++vi;
     }
     mins.add_data1i(min_prim);
     maxs.add_data1i(max_prim);
@@ -1386,7 +1445,9 @@ recompute_minmax(GeomPrimitive::CData *cdata) {
     cdata->_min_vertex = vertex;
     cdata->_max_vertex = vertex;
 
-    while (!index.is_at_end()) {
+    int num_vertices = get_num_vertices();
+    for (int vi = 1; vi < num_vertices; ++vi) {
+      nassertv(!index.is_at_end());
       unsigned int vertex = index.get_data1i();
       cdata->_min_vertex = min(cdata->_min_vertex, vertex);
       cdata->_max_vertex = max(cdata->_max_vertex, vertex);
@@ -1404,11 +1465,13 @@ recompute_minmax(GeomPrimitive::CData *cdata) {
 void GeomPrimitive::
 do_make_indexed(CData *cdata) {
   if (cdata->_vertices == (GeomVertexArrayData *)NULL) {
+    nassertv(cdata->_num_vertices != -1);
     cdata->_vertices = make_index_data();
     GeomVertexWriter index(cdata->_vertices, 0);
     for (int i = 0; i < cdata->_num_vertices; ++i) {
       index.add_data1i(i + cdata->_first_vertex);
     }
+    cdata->_num_vertices = -1;
   }
 }
 
@@ -1554,8 +1617,8 @@ make_copy() const {
 void GeomPrimitive::CData::
 write_datagram(BamWriter *manager, Datagram &dg) const {
   dg.add_uint8(_shade_model);
-  dg.add_uint32(_first_vertex);
-  dg.add_uint32(_num_vertices);
+  dg.add_int32(_first_vertex);
+  dg.add_int32(_num_vertices);
   dg.add_uint8(_index_type);
   dg.add_uint8(_usage_hint);
 
@@ -1576,6 +1639,14 @@ complete_pointers(TypedWritable **p_list, BamReader *manager) {
 
   _vertices = DCAST(GeomVertexArrayData, p_list[pi++]);    
 
+  if (manager->get_file_minor_ver() < 6 &&
+      _vertices != (GeomVertexArrayData *)NULL) {
+    // Older bam files might have a meaningless number in
+    // _num_vertices if the primitive is indexed.  Nowadays, this
+    // number is always considered meaningful unless it is -1.
+    _num_vertices = -1;
+  }
+
   return pi;
 }
 
@@ -1589,8 +1660,8 @@ complete_pointers(TypedWritable **p_list, BamReader *manager) {
 void GeomPrimitive::CData::
 fillin(DatagramIterator &scan, BamReader *manager) {
   _shade_model = (ShadeModel)scan.get_uint8();
-  _first_vertex = scan.get_uint32();
-  _num_vertices = scan.get_uint32();
+  _first_vertex = scan.get_int32();
+  _num_vertices = scan.get_int32();
   _index_type = (NumericType)scan.get_uint8();
   _usage_hint = (UsageHint)scan.get_uint8();
 

+ 6 - 2
panda/src/gobj/geomPrimitive.h

@@ -152,8 +152,8 @@ public:
   // instead.
 
   INLINE const GeomVertexArrayData *get_vertices() const;
-  GeomVertexArrayData *modify_vertices();
-  void set_vertices(const GeomVertexArrayData *vertices);
+  GeomVertexArrayData *modify_vertices(int num_vertices = -1);
+  void set_vertices(const GeomVertexArrayData *vertices, int num_vertices = -1);
   void set_nonindexed_vertices(int first_vertex, int num_vertices);
 
   INLINE int get_index_stride() const;
@@ -166,6 +166,10 @@ public:
   INLINE const GeomVertexArrayData *get_mins() const;
   INLINE const GeomVertexArrayData *get_maxs() const;
 
+  void set_minmax(int min_vertex, int max_vertex,
+                  GeomVertexArrayData *mins, GeomVertexArrayData *maxs);
+  void clear_minmax();
+
   virtual int get_num_vertices_per_primitive() const;
   virtual int get_min_num_vertices_per_primitive() const;
   virtual int get_num_unused_vertices_per_primitive() const;

+ 20 - 0
panda/src/gobj/geomVertexArrayData.I

@@ -70,6 +70,7 @@ get_num_rows() const {
 //     Function: GeomVertexArrayData::set_num_rows
 //       Access: Published
 //  Description: Sets the length of the array to n rows.
+//
 //               Normally, you would not call this directly, since all
 //               of the arrays in a particular GeomVertexData must
 //               have the same number of rows; instead, call
@@ -93,6 +94,25 @@ set_num_rows(int n) {
   return writer.set_num_rows(n);
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexArrayData::unclean_set_num_rows
+//       Access: Published
+//  Description: This method behaves like set_num_rows(), except the
+//               new data is not initialized.  Furthermore, after this
+//               call, *any* of the data in the GeomVertexArrayData
+//               may be uninitialized, including the earlier rows.
+//
+//               Normally, you would not call this directly, since all
+//               of the arrays in a particular GeomVertexData must
+//               have the same number of rows; instead, call
+//               GeomVertexData::unclean_set_num_rows().
+////////////////////////////////////////////////////////////////////
+INLINE bool GeomVertexArrayData::
+unclean_set_num_rows(int n) {
+  GeomVertexArrayDataPipelineWriter writer(this, true, Thread::get_current_thread());
+  return writer.unclean_set_num_rows(n);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexArrayData::clear_rows
 //       Access: Published

+ 26 - 0
panda/src/gobj/geomVertexArrayData.cxx

@@ -572,6 +572,32 @@ set_num_rows(int n) {
   return false;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexArrayDataPipelineWriter::unclean_set_num_rows
+//       Access: Public
+//  Description: 
+////////////////////////////////////////////////////////////////////
+bool GeomVertexArrayDataPipelineWriter::
+unclean_set_num_rows(int n) {
+  int stride = _object->_array_format->get_stride();
+  int delta = n - (_cdata->_data.size() / stride);
+  
+  if (delta != 0) {
+    // Just make a new array.  No reason to keep the old one around.
+    PTA_uchar new_data = PTA_uchar::empty_array(n * stride);
+
+    _cdata->_data.node_unref();
+    _cdata->_data = new_data;
+    _cdata->_data.node_ref();
+    _cdata->_data.set_col(GeomVertexArrayData::_vdata_mem_pcollector);
+    _cdata->_modified = Geom::get_next_modified();
+
+    return true;
+  }
+  
+  return false;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexArrayDataPipelineWriter::modify_data
 //       Access: Published

+ 2 - 0
panda/src/gobj/geomVertexArrayData.h

@@ -79,6 +79,7 @@ PUBLISHED:
 
   INLINE int get_num_rows() const;
   INLINE bool set_num_rows(int n);
+  INLINE bool unclean_set_num_rows(int n);
   INLINE void clear_rows();
 
   INLINE int get_data_size_bytes() const;
@@ -276,6 +277,7 @@ public:
   ALLOC_DELETED_CHAIN(GeomVertexArrayDataPipelineWriter);
 
   bool set_num_rows(int n);
+  bool unclean_set_num_rows(int n);
 
   INLINE GeomVertexArrayData *get_object() const;
   PTA_uchar modify_data();

+ 27 - 0
panda/src/gobj/geomVertexData.I

@@ -120,6 +120,33 @@ set_num_rows(int n) {
   return writer.set_num_rows(n);
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexData::unclean_set_num_rows
+//       Access: Published
+//  Description: This method behaves like set_num_rows(), except the
+//               new data is not initialized.  Furthermore, after this
+//               call, *any* of the data in the GeomVertexData may be
+//               uninitialized, including the earlier rows.
+//
+//               This is intended for applications that are about to
+//               completely fill the GeomVertexData with new data
+//               anyway; it provides a tiny performance boost over
+//               set_num_rows().
+//
+//               Although this method is Published, application code
+//               only very rarely has any need to call it.  Instead,
+//               you should use the GeomVertexWriter to build up the
+//               rows in a GeomVertexData object automatically,
+//               without need to explicitly set the number of
+//               rows.
+////////////////////////////////////////////////////////////////////
+INLINE bool GeomVertexData::
+unclean_set_num_rows(int n) {
+  GeomVertexDataPipelineWriter writer(this, true, Thread::get_current_thread());
+  writer.check_array_writers();
+  return writer.unclean_set_num_rows(n);
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexData::get_num_arrays
 //       Access: Published

+ 37 - 0
panda/src/gobj/geomVertexData.cxx

@@ -1861,6 +1861,43 @@ set_num_rows(int n) {
   return any_changed;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexDataPipelineWriter::unclean_set_num_rows
+//       Access: Public
+//  Description: 
+////////////////////////////////////////////////////////////////////
+bool GeomVertexDataPipelineWriter::
+unclean_set_num_rows(int n) {
+  nassertr(_got_array_writers, false);
+  nassertr(_cdata->_format->get_num_arrays() == (int)_cdata->_arrays.size(), false);
+
+  bool any_changed = false;
+
+  int color_array = -1;
+  int orig_color_rows = -1;
+
+  for (size_t i = 0; i < _cdata->_arrays.size(); i++) {
+    if (_array_writers[i]->get_num_rows() != n) {
+      // Copy-on-write.
+      if (_cdata->_arrays[i]->get_ref_count() > 1) {
+        delete _array_writers[i];
+        _cdata->_arrays[i] = new GeomVertexArrayData(*_cdata->_arrays[i]);
+        _array_writers[i] = new GeomVertexArrayDataPipelineWriter(_cdata->_arrays[i], _force_to_0, _current_thread);
+      }
+      _array_writers[i]->unclean_set_num_rows(n);
+      any_changed = true;
+    }
+  }
+
+  if (any_changed) {
+    _object->clear_cache_stage();
+    _cdata->_modified = Geom::get_next_modified();
+    _cdata->_animated_vertices.clear();
+  }
+
+  return any_changed;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexDataPipelineWriter::modify_array
 //       Access: Public

+ 2 - 0
panda/src/gobj/geomVertexData.h

@@ -102,6 +102,7 @@ PUBLISHED:
 
   INLINE int get_num_rows() const;
   INLINE bool set_num_rows(int n);
+  INLINE bool unclean_set_num_rows(int n);
   void clear_rows();
 
   INLINE int get_num_arrays() const;
@@ -475,6 +476,7 @@ public:
 
   int get_num_rows() const;
   bool set_num_rows(int n);
+  bool unclean_set_num_rows(int n);
 
 private:
   void make_array_writers();

+ 5 - 0
panda/src/particlesystem/geomParticleRenderer.cxx

@@ -22,6 +22,9 @@
 #include "transformState.h"
 #include "colorScaleAttrib.h"
 #include "colorAttrib.h"
+#include "pStatTimer.h"
+
+PStatCollector GeomParticleRenderer::_render_collector("App:Particles:Geom:Render");
 
 ////////////////////////////////////////////////////////////////////
 //    Function : GeomParticleRenderer
@@ -163,6 +166,8 @@ kill_particle(int index) {
 
 void GeomParticleRenderer::
 render(pvector< PT(PhysicsObject) >& po_vector, int ttl_particles) {
+  PStatTimer t1(_render_collector);
+
   BaseParticle *cur_particle;
   int i, remaining_particles = ttl_particles;
 

+ 3 - 2
panda/src/particlesystem/geomParticleRenderer.h

@@ -22,12 +22,11 @@
 #include "baseParticleRenderer.h"
 #include "baseParticle.h"
 #include "colorInterpolationManager.h"
-
 #include "pandaNode.h"
 #include "pointerTo.h"
 #include "pointerToArray.h"
-
 #include "pvector.h"
+#include "pStatCollector.h"
 
 class EXPCL_PANDAPHYSICS GeomParticleRenderer : public BaseParticleRenderer {
 PUBLISHED:
@@ -65,6 +64,8 @@ private:
 
   virtual void resize_pool(int new_size);
   void kill_nodes();
+
+  static PStatCollector _render_collector;
 };
 
 #include "geomParticleRenderer.I"

+ 4 - 0
panda/src/particlesystem/lineParticleRenderer.cxx

@@ -22,6 +22,9 @@
 #include "geom.h"
 #include "geomVertexWriter.h"
 #include "indent.h"
+#include "pStatTimer.h"
+
+PStatCollector LineParticleRenderer::_render_collector("App:Particles:Line:Render");
 
 ////////////////////////////////////////////////////////////////////
 //    Function : LineParticleRenderer
@@ -154,6 +157,7 @@ init_geoms() {
 
 void LineParticleRenderer::
 render(pvector< PT(PhysicsObject) >& po_vector, int ttl_particles) {
+  PStatTimer t1(_render_collector);
 
   if (!ttl_particles)
     return;

+ 3 - 1
panda/src/particlesystem/lineParticleRenderer.h

@@ -21,12 +21,12 @@
 
 #include "baseParticle.h"
 #include "baseParticleRenderer.h"
-
 #include "pointerTo.h"
 #include "pointerToArray.h"
 #include "geom.h"
 #include "geomVertexData.h"
 #include "geomLines.h"
+#include "pStatCollector.h"
 
 ////////////////////////////////////////////////////////////////////
 //       Class : LineParticleRenderer
@@ -79,6 +79,8 @@ private:
   virtual void render(pvector< PT(PhysicsObject) >& po_vector,
                       int ttl_particles);
   virtual void resize_pool(int new_size);
+
+  static PStatCollector _render_collector;
 };
 
 #include "lineParticleRenderer.I"

+ 4 - 0
panda/src/particlesystem/pointParticleRenderer.cxx

@@ -22,6 +22,9 @@
 #include "geom.h"
 #include "geomVertexWriter.h"
 #include "indent.h"
+#include "pStatTimer.h"
+
+PStatCollector PointParticleRenderer::_render_collector("App:Particles:Point:Render");
 
 ////////////////////////////////////////////////////////////////////
 //    Function : PointParticleRenderer
@@ -219,6 +222,7 @@ create_color(const BaseParticle *p) {
 
 void PointParticleRenderer::
 render(pvector< PT(PhysicsObject) >& po_vector, int ttl_particles) {
+  PStatTimer t1(_render_collector);
 
   BaseParticle *cur_particle;
 

+ 3 - 0
panda/src/particlesystem/pointParticleRenderer.h

@@ -28,6 +28,7 @@
 #include "geom.h"
 #include "geomVertexData.h"
 #include "geomPoints.h"
+#include "pStatCollector.h"
 
 ////////////////////////////////////////////////////////////////////
 //       Class : PointParticleRenderer
@@ -97,6 +98,8 @@ private:
   virtual void render(pvector< PT(PhysicsObject) >& po_vector,
                       int ttl_particles);
   virtual void resize_pool(int new_size);
+
+  static PStatCollector _render_collector;
 };
 
 #include "pointParticleRenderer.I"

+ 4 - 0
panda/src/particlesystem/sparkleParticleRenderer.cxx

@@ -22,6 +22,9 @@
 #include "geom.h"
 #include "geomVertexWriter.h"
 #include "indent.h"
+#include "pStatTimer.h"
+
+PStatCollector SparkleParticleRenderer::_render_collector("App:Particles:Sparkle:Render");
 
 ////////////////////////////////////////////////////////////////////
 //    Function : SparkleParticleRenderer
@@ -149,6 +152,7 @@ init_geoms() {
 ////////////////////////////////////////////////////////////////////
 void SparkleParticleRenderer::
 render(pvector< PT(PhysicsObject) >& po_vector, int ttl_particles) {
+  PStatTimer t1(_render_collector);
   if (!ttl_particles) {
     return;
   }

+ 3 - 1
panda/src/particlesystem/sparkleParticleRenderer.h

@@ -21,12 +21,12 @@
 
 #include "baseParticle.h"
 #include "baseParticleRenderer.h"
-
 #include "pointerTo.h"
 #include "pointerToArray.h"
 #include "geom.h"
 #include "geomVertexData.h"
 #include "geomLines.h"
+#include "pStatCollector.h"
 
 enum SparkleParticleLifeScale {
   SP_NO_SCALE,
@@ -97,6 +97,8 @@ private:
   virtual void render(pvector< PT(PhysicsObject) >& po_vector,
                       int ttl_particles);
   virtual void resize_pool(int new_size);
+
+  static PStatCollector _render_collector;
 };
 
 #include "sparkleParticleRenderer.I"

+ 93 - 97
panda/src/particlesystem/spriteParticleRenderer.cxx

@@ -35,8 +35,7 @@
 #include "config_particlesystem.h"
 #include "pStatTimer.h"
 
-PStatCollector SpriteParticleRenderer::_sprite_particle_render("App:Particles:Sprite:Render");
-PStatCollector SpriteParticleRenderer::_sprite_particle_process_vertices("App:Particles:Sprite:Render:Process Verts");
+PStatCollector SpriteParticleRenderer::_render_collector("App:Particles:Sprite:Render");
 
 ////////////////////////////////////////////////////////////////////
 //    Function : SpriteParticleRenderer::SpriteParticleRenderer
@@ -561,7 +560,7 @@ kill_particle(int) {
 ////////////////////////////////////////////////////////////////////
 void SpriteParticleRenderer::
 render(pvector< PT(PhysicsObject) >& po_vector, int ttl_particles) { 
-  PStatTimer t1(_sprite_particle_process_vertices);
+  PStatTimer t1(_render_collector);
   // There is no texture data available, exit.
   if (_anims.empty()) {
     return;
@@ -615,120 +614,117 @@ render(pvector< PT(PhysicsObject) >& po_vector, int ttl_particles) {
   _aabb_max.set(-99999.0f, -99999.0f, -99999.0f);
 
   // run through every filled slot
-  {
-    PStatTimer t2(_sprite_particle_process_vertices);
-    for (i = 0; i < (int)po_vector.size(); i++) {
-      cur_particle = (BaseParticle *) po_vector[i].p();
+  for (i = 0; i < (int)po_vector.size(); i++) {
+    cur_particle = (BaseParticle *) po_vector[i].p();
 
-      if (!cur_particle->get_alive()) {
-        continue;
-      }
+    if (!cur_particle->get_alive()) {
+      continue;
+    }
 
-      LPoint3f position = cur_particle->get_position();
+    LPoint3f position = cur_particle->get_position();
 
-      // x aabb adjust
-      if (position[0] > _aabb_max[0])
-        _aabb_max[0] = position[0];
-      else if (position[0] < _aabb_min[0])
-        _aabb_min[0] = position[0];
+    // x aabb adjust
+    if (position[0] > _aabb_max[0])
+      _aabb_max[0] = position[0];
+    else if (position[0] < _aabb_min[0])
+      _aabb_min[0] = position[0];
 
-      // y aabb adjust
-      if (position[1] > _aabb_max[1])
-        _aabb_max[1] = position[1];
-      else if (position[1] < _aabb_min[1])
-        _aabb_min[1] = position[1];
+    // y aabb adjust
+    if (position[1] > _aabb_max[1])
+      _aabb_max[1] = position[1];
+    else if (position[1] < _aabb_min[1])
+      _aabb_min[1] = position[1];
 
-      // z aabb adjust
-      if (position[2] > _aabb_max[2])
-        _aabb_max[2] = position[2];
-      else if (position[2] < _aabb_min[2])
-        _aabb_min[2] = position[2];
+    // z aabb adjust
+    if (position[2] > _aabb_max[2])
+      _aabb_max[2] = position[2];
+    else if (position[2] < _aabb_min[2])
+      _aabb_min[2] = position[2];
 
 
-      float t = cur_particle->get_parameterized_age();
-      int anim_index = cur_particle->get_index();
+    float t = cur_particle->get_parameterized_age();
+    int anim_index = cur_particle->get_index();
 
-      // If an animation has been removed, we need to reassign
-      // those particles assigned to the removed animation.
-      if(_animation_removed && (anim_index >= anim_count)) {
-        anim_index = int(NORMALIZED_RAND()*anim_count);
-        anim_index = anim_index<anim_count?anim_index:anim_index-1;
-        cur_particle->set_index(anim_index);
-      }
+    // If an animation has been removed, we need to reassign
+    // those particles assigned to the removed animation.
+    if(_animation_removed && (anim_index >= anim_count)) {
+      anim_index = int(NORMALIZED_RAND()*anim_count);
+      anim_index = anim_index<anim_count?anim_index:anim_index-1;
+      cur_particle->set_index(anim_index);
+    }
 
-      // Find the frame
-      if (_animate_frames) {
-        if (_animate_frames_rate == 0.0f) {
-          frame = (int)(t*_anim_size[anim_index]);
-        } else {
-          frame = (int)fmod(cur_particle->get_age()*_animate_frames_rate+1,_anim_size[anim_index]);
-        }
+    // Find the frame
+    if (_animate_frames) {
+      if (_animate_frames_rate == 0.0f) {
+        frame = (int)(t*_anim_size[anim_index]);
       } else {
-        frame = _animate_frames_index;
+        frame = (int)fmod(cur_particle->get_age()*_animate_frames_rate+1,_anim_size[anim_index]);
       }
+    } else {
+      frame = _animate_frames_index;
+    }
 
-      // Quick check make sure our math above didn't result in an invalid frame.
-      frame = (frame < _anim_size[anim_index]) ? frame : (_anim_size[anim_index]-1);
-      ++_ttl_count[anim_index][frame];
-
-      // Calculate the color
-      // This is where we'll want to give the renderer the new color
-      Colorf c = _color_interpolation_manager->generateColor(t);
-
-      int alphamode=get_alpha_mode();
-      if (alphamode != PR_ALPHA_NONE) {
-        if (alphamode == PR_ALPHA_OUT)
-          c[3] *= (1.0f - t) * get_user_alpha();
-        else if (alphamode == PR_ALPHA_IN)
-          c[3] *= t * get_user_alpha();
-        else if (alphamode == PR_ALPHA_IN_OUT) {
-          c[3] *= 2.0f * min(t, 1.0f - t) * get_user_alpha();
-        }
-        else {
-          assert(alphamode == PR_ALPHA_USER);
-          c[3] *= get_user_alpha();
-        }
+    // Quick check make sure our math above didn't result in an invalid frame.
+    frame = (frame < _anim_size[anim_index]) ? frame : (_anim_size[anim_index]-1);
+    ++_ttl_count[anim_index][frame];
+
+    // Calculate the color
+    // This is where we'll want to give the renderer the new color
+    Colorf c = _color_interpolation_manager->generateColor(t);
+
+    int alphamode=get_alpha_mode();
+    if (alphamode != PR_ALPHA_NONE) {
+      if (alphamode == PR_ALPHA_OUT)
+        c[3] *= (1.0f - t) * get_user_alpha();
+      else if (alphamode == PR_ALPHA_IN)
+        c[3] *= t * get_user_alpha();
+      else if (alphamode == PR_ALPHA_IN_OUT) {
+        c[3] *= 2.0f * min(t, 1.0f - t) * get_user_alpha();
       }
+      else {
+        assert(alphamode == PR_ALPHA_USER);
+        c[3] *= get_user_alpha();
+      }
+    }
           
-      // Send the data on its way...
-      _sprite_writer[anim_index][frame].vertex.add_data3f(position);
-      _sprite_writer[anim_index][frame].color.add_data4f(c);
+    // Send the data on its way...
+    _sprite_writer[anim_index][frame].vertex.add_data3f(position);
+    _sprite_writer[anim_index][frame].color.add_data4f(c);
     
-      float current_x_scale = _initial_x_scale;
-      float current_y_scale = _initial_y_scale;
+    float current_x_scale = _initial_x_scale;
+    float current_y_scale = _initial_y_scale;
     
-      if (_animate_x_ratio || _animate_y_ratio) {
-        if (_blend_method == PP_BLEND_CUBIC) {
-          t = CUBIC_T(t);
-        }
-      
-        if (_animate_x_ratio) {
-          current_x_scale = (_initial_x_scale +
-                             (t * (_final_x_scale - _initial_x_scale)));
-        }
-        if (_animate_y_ratio) {
-          current_y_scale = (_initial_y_scale +
-                             (t * (_final_y_scale - _initial_y_scale)));
-        }
+    if (_animate_x_ratio || _animate_y_ratio) {
+      if (_blend_method == PP_BLEND_CUBIC) {
+        t = CUBIC_T(t);
       }
-
-      if (_sprite_writer[anim_index][frame].size.has_column()) {
-        _sprite_writer[anim_index][frame].size.add_data1f(current_y_scale * _height);
-      }
-      if (_sprite_writer[anim_index][frame].aspect_ratio.has_column()) {
-        _sprite_writer[anim_index][frame].aspect_ratio.add_data1f(_aspect_ratio * current_x_scale / current_y_scale);
+      
+      if (_animate_x_ratio) {
+        current_x_scale = (_initial_x_scale +
+                           (t * (_final_x_scale - _initial_x_scale)));
       }
-      if (_animate_theta) {
-        _sprite_writer[anim_index][frame].rotate.add_data1f(cur_particle->get_theta());
-      } else if (_sprite_writer[anim_index][frame].rotate.has_column()) {
-        _sprite_writer[anim_index][frame].rotate.add_data1f(_theta);
+      if (_animate_y_ratio) {
+        current_y_scale = (_initial_y_scale +
+                           (t * (_final_y_scale - _initial_y_scale)));
       }
+    }
 
-      // maybe jump out early?
-      remaining_particles--;
-      if (remaining_particles == 0) {
-        break;
-      }
+    if (_sprite_writer[anim_index][frame].size.has_column()) {
+      _sprite_writer[anim_index][frame].size.add_data1f(current_y_scale * _height);
+    }
+    if (_sprite_writer[anim_index][frame].aspect_ratio.has_column()) {
+      _sprite_writer[anim_index][frame].aspect_ratio.add_data1f(_aspect_ratio * current_x_scale / current_y_scale);
+    }
+    if (_animate_theta) {
+      _sprite_writer[anim_index][frame].rotate.add_data1f(cur_particle->get_theta());
+    } else if (_sprite_writer[anim_index][frame].rotate.has_column()) {
+      _sprite_writer[anim_index][frame].rotate.add_data1f(_theta);
+    }
+
+    // maybe jump out early?
+    remaining_particles--;
+    if (remaining_particles == 0) {
+      break;
     }
   }
   int n = 0;

+ 1 - 2
panda/src/particlesystem/spriteParticleRenderer.h

@@ -269,8 +269,7 @@ private:
   pvector<int*> _ttl_count;  // _ttl_count[i][j] holds the number of particles attached to animation 'i' at frame 'j'.
   vector_int _birth_list;  // Holds the list of particles that need a new random animation to start on.
 
-  static PStatCollector _sprite_particle_render;
-  static PStatCollector _sprite_particle_process_vertices;
+  static PStatCollector _render_collector;
 };
 
 #include "spriteParticleRenderer.I"

+ 10 - 0
panda/src/pgraph/cullableObject.I

@@ -125,6 +125,16 @@ draw(GraphicsStateGuardianBase *gsg, Thread *current_thread) {
   _geom->draw(gsg, _munger, _munged_data, current_thread);
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: CullableObject::flush_level
+//       Access: Public, Static
+//  Description: Flushes the PStatCollectors used during traversal.
+////////////////////////////////////////////////////////////////////
+INLINE void CullableObject::
+flush_level() {
+  _sw_sprites_pcollector.flush_level();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: CullableObject::SortPoints::Constructor
 //       Access: Public

+ 285 - 239
panda/src/pgraph/cullableObject.cxx

@@ -35,8 +35,13 @@
 #include "geomTriangles.h"
 #include "light.h"
 
-PStatCollector CullableObject::_munge_points_pcollector("*:Munge:Points");
+CullableObject::FormatMap CullableObject::_format_map;
+
+PStatCollector CullableObject::_munge_sprites_pcollector("*:Munge:Sprites");
+PStatCollector CullableObject::_munge_sprites_verts_pcollector("*:Munge:Sprites:Verts");
+PStatCollector CullableObject::_munge_sprites_prims_pcollector("*:Munge:Sprites:Prims");
 PStatCollector CullableObject::_munge_light_vector_pcollector("*:Munge:Light Vector");
+PStatCollector CullableObject::_sw_sprites_pcollector("SW Sprites");
 
 TypeHandle CullableObject::_type_handle;
 
@@ -191,7 +196,8 @@ output(ostream &out) const {
 void CullableObject::
 munge_points_to_quads(const CullTraverser *traverser) {
   Thread *current_thread = traverser->get_current_thread();
-  PStatTimer timer(_munge_points_pcollector, current_thread);
+  PStatTimer timer(_munge_sprites_pcollector, current_thread);
+  _sw_sprites_pcollector.add_level(_munged_data->get_num_rows());
 
   GraphicsStateGuardianBase *gsg = traverser->get_gsg();
 
@@ -228,70 +234,6 @@ munge_points_to_quads(const CullTraverser *traverser) {
     }
   }
 
-  PT(GeomVertexArrayFormat) new_array_format;
-  if (retransform_sprites) {
-    // With retransform_sprites in effect, we will be sending ordinary
-    // 3-D points to the graphics API.
-    new_array_format = 
-      new GeomVertexArrayFormat(InternalName::get_vertex(), 3, 
-                                Geom::NT_float32,
-                                Geom::C_point);
-  } else {
-    // Without retransform_sprites, we will be sending 4-component
-    // clip-space points.
-    new_array_format = 
-      new GeomVertexArrayFormat(InternalName::get_vertex(), 4, 
-                                Geom::NT_float32,
-                                Geom::C_clip_point);
-  }
-  if (has_normal) {
-    const GeomVertexColumn *c = normal.get_column();
-    new_array_format->add_column
-      (InternalName::get_normal(), c->get_num_components(),
-       c->get_numeric_type(), c->get_contents());
-  }
-  if (has_color) {
-    const GeomVertexColumn *c = color.get_column();
-    new_array_format->add_column
-      (InternalName::get_color(), c->get_num_components(),
-       c->get_numeric_type(), c->get_contents());
-  }
-  if (sprite_texcoord) {
-    new_array_format->add_column
-      (InternalName::get_texcoord(), 2,
-       Geom::NT_float32,
-       Geom::C_texcoord);
-
-  } else if (has_texcoord) {
-    const GeomVertexColumn *c = texcoord.get_column();
-    new_array_format->add_column
-      (InternalName::get_texcoord(), c->get_num_components(),
-       c->get_numeric_type(), c->get_contents());
-  }
-
-  CPT(GeomVertexFormat) new_format = 
-    GeomVertexFormat::register_format(new_array_format);
-
-  PT(GeomVertexData) new_data = new GeomVertexData
-    (_munged_data->get_name(), new_format, Geom::UH_client);
-
-  GeomVertexWriter new_vertex(new_data, InternalName::get_vertex());
-  GeomVertexWriter new_normal(new_data, InternalName::get_normal());
-  GeomVertexWriter new_color(new_data, InternalName::get_color());
-  GeomVertexWriter new_texcoord(new_data, InternalName::get_texcoord());
-  int new_vi = 0;
-
-  PT(Geom) new_geom = new Geom(new_data);
-
-  const LMatrix4f &modelview = _modelview_transform->get_mat();
-
-  SceneSetup *scene = traverser->get_scene();
-  const Lens *lens = scene->get_lens();
-  const LMatrix4f &projection = lens->get_projection_mat();
-
-  int viewport_width = scene->get_viewport_width();
-  int viewport_height = scene->get_viewport_height();
-
   float point_size = 1.0f;
   bool perspective = false;
   const RenderModeAttrib *render_mode = _state->get_render_mode();
@@ -306,6 +248,68 @@ munge_points_to_quads(const CullTraverser *traverser) {
     }
   }
 
+  // Get the vertex format of the newly created geometry.
+  CPT(GeomVertexFormat) new_format;
+  FormatMap::iterator fmi = _format_map.find(_munged_data->get_format());
+  if (fmi != _format_map.end()) {
+    new_format = (*fmi).second;
+
+  } else {
+    // We have to construct the format now.
+    PT(GeomVertexArrayFormat) new_array_format;
+    if (retransform_sprites) {
+      // With retransform_sprites in effect, we will be sending ordinary
+      // 3-D points to the graphics API.
+      new_array_format = 
+        new GeomVertexArrayFormat(InternalName::get_vertex(), 3, 
+                                  Geom::NT_float32,
+                                  Geom::C_point);
+    } else {
+      // Without retransform_sprites, we will be sending 4-component
+      // clip-space points.
+      new_array_format = 
+        new GeomVertexArrayFormat(InternalName::get_vertex(), 4, 
+                                  Geom::NT_float32,
+                                  Geom::C_clip_point);
+    }
+    if (has_normal) {
+      const GeomVertexColumn *c = normal.get_column();
+      new_array_format->add_column
+        (InternalName::get_normal(), c->get_num_components(),
+         c->get_numeric_type(), c->get_contents());
+    }
+    if (has_color) {
+      const GeomVertexColumn *c = color.get_column();
+      new_array_format->add_column
+        (InternalName::get_color(), c->get_num_components(),
+         c->get_numeric_type(), c->get_contents());
+    }
+    if (sprite_texcoord) {
+      new_array_format->add_column
+        (InternalName::get_texcoord(), 2,
+         Geom::NT_float32,
+         Geom::C_texcoord);
+
+    } else if (has_texcoord) {
+      const GeomVertexColumn *c = texcoord.get_column();
+      new_array_format->add_column
+        (InternalName::get_texcoord(), c->get_num_components(),
+         c->get_numeric_type(), c->get_contents());
+    }
+
+    new_format = GeomVertexFormat::register_format(new_array_format);
+    _format_map[_munged_data->get_format()] = new_format;
+  }
+
+  const LMatrix4f &modelview = _modelview_transform->get_mat();
+
+  SceneSetup *scene = traverser->get_scene();
+  const Lens *lens = scene->get_lens();
+  const LMatrix4f &projection = lens->get_projection_mat();
+
+  int viewport_width = scene->get_viewport_width();
+  int viewport_height = scene->get_viewport_height();
+
   // We need a standard projection matrix, in a known coordinate
   // system, to compute the perspective height.
   LMatrix4f height_projection;
@@ -319,197 +323,239 @@ munge_points_to_quads(const CullTraverser *traverser) {
   LMatrix4f inv_render_transform;
   inv_render_transform.invert_from(render_transform);
 
-  // Replace each primitive in the Geom (it's presumably a GeomPoints
-  // primitive, although it might be some other kind of primitive if
-  // we got here because RenderModeAttrib::M_point is enabled) with a
-  // new primitive that replaces each vertex with a quad of the
-  // appropriate scale and orientation.
+  // Now convert all of the vertices in the GeomVertexData to quads.
+  // We always convert all the vertices, assuming all the vertices
+  // will referenced by GeomPrimitives, because we want to optimize
+  // for the most common case.
+  int orig_verts = _munged_data->get_num_rows();
+  int new_verts = 4 * orig_verts;        // each vertex becomes four.
+  int new_prim_verts = 6 * orig_verts;  // two triangles per point.
 
-  // BUG: if we're rendering polygons in M_point mode with a
-  // CullFaceAttrib in effect, we won't actually apply the
-  // CullFaceAttrib but will always render all of the vertices of the
-  // polygons.  This is certainly a bug, but in order to fix it we'd
-  // have to do the face culling ourselves--not sure if it's worth it.
-
-  GeomPipelineReader geom_reader(_geom, current_thread);
-  int num_primitives = geom_reader.get_num_primitives();
-  for (int pi = 0; pi < num_primitives; ++pi) {
-    const GeomPrimitive *primitive = geom_reader.get_primitive(pi);
-    if (primitive->get_num_vertices() != 0) {
-      // We must first convert all of the points to eye space.
-      int num_points = primitive->get_max_vertex() + 1;
-
-      int num_vertices = primitive->get_num_vertices();
-      PointData *points = (PointData *)alloca(num_points * sizeof(PointData));
-      unsigned int *vertices = (unsigned int *)alloca(num_vertices * sizeof(unsigned int));
-      unsigned int *vertices_end = vertices + num_vertices;
-
-      if (primitive->is_indexed()) {
-        GeomVertexReader index(primitive->get_vertices(), 0, current_thread);
-        for (unsigned int *vi = vertices; vi != vertices_end; ++vi) {
-          // Get the point in eye-space coordinates.
-          unsigned int v = index.get_data1i();
-          nassertv(v < (unsigned int)num_points);
-          (*vi) = v;
-          vertex.set_row(v);
-          points[v]._eye = modelview.xform_point(vertex.get_data3f());
-          points[v]._dist = gsg->compute_distance_to(points[v]._eye);
-        }
-      } else {
-        // Nonindexed case.
-        unsigned int first_vertex = primitive->get_first_vertex();
-        for (int i = 0; i < num_vertices; ++i) {
-          unsigned int v = i + first_vertex;
-          nassertv(v < (unsigned int)num_points);
-          vertices[i] = v;
-          vertex.set_row(v);
-          points[v]._eye = modelview.xform_point(vertex.get_data3f());
-          points[v]._dist = gsg->compute_distance_to(points[v]._eye);
-        }
-      }
-  
-      // Now sort the points in order from back-to-front so they will
-      // render properly with transparency, at least with each other.
-      sort(vertices, vertices_end, SortPoints(points));
-  
-      // Go through the points, now in sorted order, and generate a pair
-      // of triangles for each one.  We generate indexed triangles
-      // instead of two-triangle strips, since this seems to be
-      // generally faster on PC hardware (otherwise, we'd have to nearly
-      // double the vertices to stitch all the little triangle strips
-      // together).
-      PT(GeomPrimitive) new_primitive = new GeomTriangles(Geom::UH_client);
-
-      for (unsigned int *vi = vertices; vi != vertices_end; ++vi) {
-        // The point in eye coordinates.
-        const LPoint3f &eye = points[*vi]._eye;
+  PT(GeomVertexData) new_data = new GeomVertexData
+    (_munged_data->get_name(), new_format, Geom::UH_client);
+  new_data->unclean_set_num_rows(new_verts);
+
+  GeomVertexWriter new_vertex(new_data, InternalName::get_vertex());
+  GeomVertexWriter new_normal(new_data, InternalName::get_normal());
+  GeomVertexWriter new_color(new_data, InternalName::get_color());
+  GeomVertexWriter new_texcoord(new_data, InternalName::get_texcoord());
+
+  // We'll keep an array of all of the points' eye-space coordinates,
+  // and their distance from the camera, so we can sort the points for
+  // each primitive, below.
+  PointData *points;
+  {
+    PStatTimer t2(_munge_sprites_verts_pcollector, current_thread);
+    points = (PointData *)alloca(orig_verts * sizeof(PointData));
+    int vi = 0;
+    while (!vertex.is_at_end()) {
+      // Get the point in eye-space coordinates.
+      LPoint3f eye = modelview.xform_point(vertex.get_data3f());
+      points[vi]._eye = eye;
+      points[vi]._dist = gsg->compute_distance_to(points[vi]._eye);
     
-        // The point in clip coordinates.
-        LPoint4f p4 = LPoint4f(eye[0], eye[1], eye[2], 1.0f) * projection;
+      // The point in clip coordinates.
+      LPoint4f p4 = LPoint4f(eye[0], eye[1], eye[2], 1.0f) * projection;
+
+      if (has_size) {
+        point_size = size.get_data1f();
+      }
 
-        if (has_size) {
-          size.set_row(*vi);
-          point_size = size.get_data1f();
+      float scale_y = point_size;
+      if (perspective) {
+        // Perspective-sized points.  Here point_size is the point's
+        // height in 3-d units.  To arrange that, we need to figure out
+        // the appropriate scaling factor based on the current viewport
+        // and projection matrix.
+        float scale = _modelview_transform->get_scale()[1];
+        LVector3f height(0.0f, point_size * scale, scale);
+        height = height * height_projection;
+        scale_y = height[1] * viewport_height;
+
+        // We should then divide the radius by the distance from the
+        // camera plane, to emulate the glPointParameters() behavior.
+        if (!lens->is_orthographic()) {
+          scale_y /= gsg->compute_distance_to(eye);
         }
+      }
+      
+      // Also factor in the homogeneous scale for being in clip
+      // coordinates still.
+      scale_y *= p4[3];
 
-        float scale_y = point_size;
-        if (perspective) {
-          // Perspective-sized points.  Here point_size is the point's
-          // height in 3-d units.  To arrange that, we need to figure
-          // out the appropriate scaling factor based on the current
-          // viewport and projection matrix.
-          float scale = _modelview_transform->get_scale()[1];
-          LVector3f height(0.0f, point_size * scale, scale);
-          height = height * height_projection;
-          scale_y = height[1] * viewport_height;
-
-          // We should then divide the radius by the distance from the
-          // camera plane, to emulate the glPointParameters() behavior.
-          if (!lens->is_orthographic()) {
-            scale_y /= gsg->compute_distance_to(eye);
-          }
+      float scale_x = scale_y;
+      if (has_aspect_ratio) {
+        scale_x *= aspect_ratio.get_data1f();
+      }
+
+      // Define the first two corners based on the scales in X and Y.
+      LPoint2f c0(scale_x, scale_y);
+      LPoint2f c1(-scale_x, scale_y);
+
+      if (has_rotate) { 
+        // If we have a rotate factor, apply it to those two corners.
+        float r = rotate.get_data1f();
+        LMatrix3f mat = LMatrix3f::rotate_mat(r);
+        c0 = c0 * mat;
+        c1 = c1 * mat;
+      }
+
+      // Finally, scale the corners in their newly-rotated position,
+      // to compensate for the aspect ratio of the viewport.
+      float rx = 1.0f / viewport_width;
+      float ry = 1.0f / viewport_height;
+      c0.set(c0[0] * rx, c0[1] * ry);
+      c1.set(c1[0] * rx, c1[1] * ry);
+    
+      if (retransform_sprites) {
+        // With retransform_sprites in effect, we must reconvert the
+        // resulting quad back into the original 3-D space.
+        new_vertex.set_data4f(inv_render_transform.xform(LPoint4f(p4[0] + c0[0], p4[1] + c0[1], p4[2], p4[3])));
+        new_vertex.set_data4f(inv_render_transform.xform(LPoint4f(p4[0] + c1[0], p4[1] + c1[1], p4[2], p4[3])));
+        new_vertex.set_data4f(inv_render_transform.xform(LPoint4f(p4[0] - c1[0], p4[1] - c1[1], p4[2], p4[3])));
+        new_vertex.set_data4f(inv_render_transform.xform(LPoint4f(p4[0] - c0[0], p4[1] - c0[1], p4[2], p4[3])));
+      
+        if (has_normal) {
+          const Normalf &c = normal.get_data3f();
+          new_normal.set_data3f(c);
+          new_normal.set_data3f(c);
+          new_normal.set_data3f(c);
+          new_normal.set_data3f(c);
         }
       
-        // Also factor in the homogeneous scale for being in clip
-        // coordinates still.
-        scale_y *= p4[3];
-
-        float scale_x = scale_y;
-        if (has_aspect_ratio) {
-          aspect_ratio.set_row(*vi);
-          scale_x *= aspect_ratio.get_data1f();
+      } else {
+        // Without retransform_sprites, we can simply load the
+        // clip-space coordinates.
+        new_vertex.set_data4f(p4[0] + c0[0], p4[1] + c0[1], p4[2], p4[3]);
+        new_vertex.set_data4f(p4[0] + c1[0], p4[1] + c1[1], p4[2], p4[3]);
+        new_vertex.set_data4f(p4[0] - c1[0], p4[1] - c1[1], p4[2], p4[3]);
+        new_vertex.set_data4f(p4[0] - c0[0], p4[1] - c0[1], p4[2], p4[3]);
+      
+        if (has_normal) {
+          Normalf c = render_transform.xform_vec(normal.get_data3f());
+          new_normal.set_data3f(c);
+          new_normal.set_data3f(c);
+          new_normal.set_data3f(c);
+          new_normal.set_data3f(c);
         }
+      }
+      if (has_color) {
+        const Colorf &c = color.get_data4f();
+        new_color.set_data4f(c);
+        new_color.set_data4f(c);
+        new_color.set_data4f(c);
+        new_color.set_data4f(c);
+      }
+      if (sprite_texcoord) {
+        new_texcoord.set_data2f(1.0f, 0.0f);
+        new_texcoord.set_data2f(0.0f, 0.0f);
+        new_texcoord.set_data2f(1.0f, 1.0f);
+        new_texcoord.set_data2f(0.0f, 1.0f);
+      } else if (has_texcoord) {
+        const LVecBase4f &c = texcoord.get_data4f();
+        new_texcoord.set_data4f(c);
+        new_texcoord.set_data4f(c);
+        new_texcoord.set_data4f(c);
+        new_texcoord.set_data4f(c);
+      }
 
-        // Define the first two corners based on the scales in X and Y.
-        LPoint2f c0(scale_x, scale_y);
-        LPoint2f c1(-scale_x, scale_y);
-
-        if (has_rotate) { 
-          // If we have a rotate factor, apply it to those two corners.
-          rotate.set_row(*vi);
-          float r = rotate.get_data1f();
-          LMatrix3f mat = LMatrix3f::rotate_mat(r);
-          c0 = c0 * mat;
-          c1 = c1 * mat;
-        }
+      ++vi;
+    }
 
-        // Finally, scale the corners in their newly-rotated position,
-        // to compensate for the aspect ratio of the viewport.
-        float rx = 1.0f / viewport_width;
-        float ry = 1.0f / viewport_height;
-        c0.set(c0[0] * rx, c0[1] * ry);
-        c1.set(c1[0] * rx, c1[1] * ry);
-
-        if (retransform_sprites) {
-          // With retransform_sprites in effect, we must reconvert the
-          // resulting quad back into the original 3-D space.
-          new_vertex.add_data4f(inv_render_transform.xform(LPoint4f(p4[0] + c0[0], p4[1] + c0[1], p4[2], p4[3])));
-          new_vertex.add_data4f(inv_render_transform.xform(LPoint4f(p4[0] + c1[0], p4[1] + c1[1], p4[2], p4[3])));
-          new_vertex.add_data4f(inv_render_transform.xform(LPoint4f(p4[0] - c1[0], p4[1] - c1[1], p4[2], p4[3])));
-          new_vertex.add_data4f(inv_render_transform.xform(LPoint4f(p4[0] - c0[0], p4[1] - c0[1], p4[2], p4[3])));
-          
-          if (has_normal) {
-            normal.set_row(*vi);
-            const Normalf &c = normal.get_data3f();
-            new_normal.add_data3f(c);
-            new_normal.add_data3f(c);
-            new_normal.add_data3f(c);
-            new_normal.add_data3f(c);
-          }
+    nassertv(vi == orig_verts);
+    nassertv(new_data->get_num_rows() == new_verts);
+  }
+
+  PT(Geom) new_geom = new Geom(new_data);
+    
+  // Create an appropriate GeomVertexArrayFormat for the primitive
+  // index.
+  static CPT(GeomVertexArrayFormat) new_prim_format;
+  if (new_prim_format == (GeomVertexArrayFormat *)NULL) {
+    new_prim_format =
+      GeomVertexArrayFormat::register_format
+      (new GeomVertexArrayFormat(InternalName::get_index(), 1, 
+                                 GeomEnums::NT_uint16, GeomEnums::C_index));
+  }
+
+  // Replace each primitive in the Geom (it's presumably a GeomPoints
+  // primitive, although it might be some other kind of primitive if
+  // we got here because RenderModeAttrib::M_point is enabled) with a
+  // new primitive that replaces each vertex with a quad of the
+  // appropriate scale and orientation.
 
+  // BUG: if we're rendering polygons in M_point mode with a
+  // CullFaceAttrib in effect, we won't actually apply the
+  // CullFaceAttrib but will always render all of the vertices of the
+  // polygons.  This is certainly a bug, but a very minor one; and in
+  // order to fix it we'd have to do the face culling ourselves--not
+  // sure if it's worth it.
+
+  {
+    PStatTimer t3(_munge_sprites_prims_pcollector, current_thread);
+    GeomPipelineReader geom_reader(_geom, current_thread);
+    int num_primitives = geom_reader.get_num_primitives();
+    for (int pi = 0; pi < num_primitives; ++pi) {
+      const GeomPrimitive *primitive = geom_reader.get_primitive(pi);
+      if (primitive->get_num_vertices() != 0) {
+        // Extract out the list of vertices referenced by the primitive.
+        int num_vertices = primitive->get_num_vertices();
+        unsigned int *vertices = (unsigned int *)alloca(num_vertices * sizeof(unsigned int));
+        unsigned int *vertices_end = vertices + num_vertices;
+
+        if (primitive->is_indexed()) {
+          // Indexed case.
+          GeomVertexReader index(primitive->get_vertices(), 0, current_thread);
+          for (unsigned int *vi = vertices; vi != vertices_end; ++vi) {
+            unsigned int v = index.get_data1i();
+            nassertv(v < (unsigned int)orig_verts);
+            (*vi) = v;
+          }
         } else {
-          // Without retransform_sprites, we can simply load the
-          // clip-space coordinates.
-          new_vertex.add_data4f(p4[0] + c0[0], p4[1] + c0[1], p4[2], p4[3]);
-          new_vertex.add_data4f(p4[0] + c1[0], p4[1] + c1[1], p4[2], p4[3]);
-          new_vertex.add_data4f(p4[0] - c1[0], p4[1] - c1[1], p4[2], p4[3]);
-          new_vertex.add_data4f(p4[0] - c0[0], p4[1] - c0[1], p4[2], p4[3]);
-          
-          if (has_normal) {
-            normal.set_row(*vi);
-            Normalf c = render_transform.xform_vec(normal.get_data3f());
-            new_normal.add_data3f(c);
-            new_normal.add_data3f(c);
-            new_normal.add_data3f(c);
-            new_normal.add_data3f(c);
+          // Nonindexed case.
+          unsigned int first_vertex = primitive->get_first_vertex();
+          for (int i = 0; i < num_vertices; ++i) {
+            unsigned int v = i + first_vertex;
+            nassertv(v < (unsigned int)orig_verts);
+            vertices[i] = v;
           }
         }
-        if (has_color) {
-          color.set_row(*vi);
-          const Colorf &c = color.get_data4f();
-          new_color.add_data4f(c);
-          new_color.add_data4f(c);
-          new_color.add_data4f(c);
-          new_color.add_data4f(c);
-        }
-        if (sprite_texcoord) {
-          new_texcoord.add_data2f(1.0f, 0.0f);
-          new_texcoord.add_data2f(0.0f, 0.0f);
-          new_texcoord.add_data2f(1.0f, 1.0f);
-          new_texcoord.add_data2f(0.0f, 1.0f);
-        } else if (has_texcoord) {
-          texcoord.set_row(*vi);
-          const LVecBase4f &c = texcoord.get_data4f();
-          new_texcoord.add_data4f(c);
-          new_texcoord.add_data4f(c);
-          new_texcoord.add_data4f(c);
-          new_texcoord.add_data4f(c);
+  
+        // Now sort the points in order from back-to-front so they will
+        // render properly with transparency, at least with each other.
+        sort(vertices, vertices_end, SortPoints(points));
+  
+        // Go through the points, now in sorted order, and generate a pair
+        // of triangles for each one.  We generate indexed triangles
+        // instead of two-triangle strips, since this seems to be
+        // generally faster on PC hardware (otherwise, we'd have to nearly
+        // double the vertices to stitch all the little triangle strips
+        // together).
+        PT(GeomPrimitive) new_primitive = new GeomTriangles(Geom::UH_client);
+
+        PT(GeomVertexArrayData) new_index 
+          = new GeomVertexArrayData(new_prim_format, GeomEnums::UH_client);
+        new_index->unclean_set_num_rows(new_prim_verts);
+
+        GeomVertexWriter index(new_index, 0);
+        for (unsigned int *vi = vertices; vi != vertices_end; ++vi) {
+          int new_vi = (*vi) * 4;
+          nassertv(new_vi + 3 < new_prim_verts);
+          index.set_data1i(new_vi);
+          index.set_data1i(new_vi + 1);
+          index.set_data1i(new_vi + 2);
+          index.set_data1i(new_vi + 2);
+          index.set_data1i(new_vi + 1);
+          index.set_data1i(new_vi + 3);
         }
+        new_primitive->set_vertices(new_index, num_vertices * 6);
 
-        new_primitive->add_vertex(new_vi);
-        new_primitive->add_vertex(new_vi + 1);
-        new_primitive->add_vertex(new_vi + 2);
-        new_primitive->close_primitive();
-
-        new_primitive->add_vertex(new_vi + 2);
-        new_primitive->add_vertex(new_vi + 1);
-        new_primitive->add_vertex(new_vi + 3);
-        new_primitive->close_primitive();
+        int min_vi = primitive->get_min_vertex();
+        int max_vi = primitive->get_max_vertex();
+        new_primitive->set_minmax(min_vi * 4, max_vi * 4 + 3, NULL, NULL);
 
-        new_vi += 4;
+        new_geom->add_primitive(new_primitive);
       }
-
-      new_geom->add_primitive(new_primitive);
     }
   }
 

+ 10 - 1
panda/src/pgraph/cullableObject.h

@@ -65,6 +65,8 @@ public:
   INLINE void draw(GraphicsStateGuardianBase *gsg,
                    Thread *current_thread);
 
+  INLINE static void flush_level();
+
 public:
   ~CullableObject();
   ALLOC_DELETED_CHAIN(CullableObject);
@@ -107,8 +109,15 @@ private:
     const PointData *_array;
   };
 
-  static PStatCollector _munge_points_pcollector;
+  // This is a cache of converted vertex formats.
+  typedef pmap<CPT(GeomVertexFormat), CPT(GeomVertexFormat) > FormatMap;
+  static FormatMap _format_map;
+
+  static PStatCollector _munge_sprites_pcollector;
+  static PStatCollector _munge_sprites_verts_pcollector;
+  static PStatCollector _munge_sprites_prims_pcollector;
   static PStatCollector _munge_light_vector_pcollector;
+  static PStatCollector _sw_sprites_pcollector;
 
 public:
   static TypeHandle get_class_type() {

+ 2 - 1
panda/src/pstatclient/pStatProperties.cxx

@@ -132,7 +132,7 @@ static TimeCollectorProperties time_properties[] = {
   { 1, "Cull:Sort",                        { 0.3, 0.3, 0.6 } },
   { 1, "*:Show fps",                       { 0.5, 0.8, 1.0 } },
   { 0, "*:Munge",                          { 0.3, 0.3, 0.9 } },
-  { 0, "*:Munge:Points",                   { 0.2, 0.8, 0.4 } },
+  { 1, "*:Munge:Sprites",                  { 0.2, 0.8, 0.4 } },
   { 0, "*:Munge:Data",                     { 0.7, 0.5, 0.2 } },
   { 0, "*:Munge:Rotate",                   { 0.9, 0.8, 0.5 } },
   { 0, "*:Munge:Decompose",                { 0.1, 0.3, 0.1 } },
@@ -177,6 +177,7 @@ static LevelCollectorProperties level_properties[] = {
   { 1, "Primitive batches:Triangle fans",  { 0.8, 0.5, 0.2 } },
   { 1, "Primitive batches:Triangle strips",{ 0.2, 0.5, 0.8 } },
   { 1, "Primitive batches:Display lists",  { 0.8, 0.5, 1.0 } },
+  { 1, "SW Sprites",                       { 0.2, 0.7, 0.3 },  "K", 1000 },
   { 1, "Vertices",                         { 0.5, 0.2, 0.0 },  "K", 10, 1000 },
   { 1, "Vertices:Other",                   { 0.2, 0.2, 0.2 } },
   { 1, "Vertices:Triangles",               { 0.8, 0.8, 0.8 } },

+ 2 - 1
panda/src/putil/bam.h

@@ -36,12 +36,13 @@ static const unsigned short _bam_major_ver = 6;
 // Bumped to major version 5 on 5/6/05 for new Geom implementation.
 // Bumped to major version 6 on 2/11/06 to factor out PandaNode::CData.
 
-static const unsigned short _bam_minor_ver = 5;
+static const unsigned short _bam_minor_ver = 6;
 // Bumped to minor version 1 on 3/12/06 to add Texture::_compression.
 // Bumped to minor version 2 on 3/17/06 to add PandaNode::_draw_control_mask.
 // Bumped to minor version 3 on 3/21/06 to add Texture::_ram_images.
 // Bumped to minor version 4 on 7/26/06 to add CharacterJoint::_character.
 // Bumped to minor version 5 on 11/15/06 to add PartBundleNode::_num_bundles.
+// Bumped to minor version 6 on 2/5/07 to change GeomPrimitive::_num_vertices.
 
 
 #endif