Browse Source

Experimental InstancedNode class for hardware instancing

rdb 5 years ago
parent
commit
92e2c24958
47 changed files with 1664 additions and 182 deletions
  1. 2 18
      panda/src/cull/cullBinBackToFront.cxx
  2. 2 18
      panda/src/cull/cullBinFixed.cxx
  3. 2 18
      panda/src/cull/cullBinFrontToBack.cxx
  4. 2 18
      panda/src/cull/cullBinStateSorted.cxx
  5. 2 18
      panda/src/cull/cullBinUnsorted.cxx
  6. 5 1
      panda/src/display/graphicsStateGuardian.cxx
  7. 1 1
      panda/src/display/graphicsStateGuardian.h
  8. 2 2
      panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx
  9. 1 1
      panda/src/dxgsg9/dxGraphicsStateGuardian9.h
  10. 33 31
      panda/src/glstuff/glGraphicsStateGuardian_src.cxx
  11. 2 1
      panda/src/glstuff/glGraphicsStateGuardian_src.h
  12. 27 0
      panda/src/glstuff/glShaderContext_src.cxx
  13. 7 4
      panda/src/gobj/geom.cxx
  14. 2 2
      panda/src/gobj/geom.h
  15. 21 0
      panda/src/gobj/geomVertexArrayFormat.cxx
  16. 2 0
      panda/src/gobj/geomVertexArrayFormat.h
  17. 31 0
      panda/src/gobj/geomVertexFormat.cxx
  18. 3 1
      panda/src/gobj/geomVertexFormat.h
  19. 11 0
      panda/src/gobj/internalName.I
  20. 1 0
      panda/src/gobj/internalName.cxx
  21. 2 0
      panda/src/gobj/internalName.h
  22. 1 1
      panda/src/gsgbase/graphicsStateGuardianBase.h
  23. 4 0
      panda/src/pgraph/CMakeLists.txt
  24. 6 0
      panda/src/pgraph/config_pgraph.cxx
  25. 18 2
      panda/src/pgraph/cullTraverser.cxx
  26. 3 1
      panda/src/pgraph/cullTraverserData.I
  27. 9 0
      panda/src/pgraph/cullTraverserData.cxx
  28. 2 0
      panda/src/pgraph/cullTraverserData.h
  29. 18 3
      panda/src/pgraph/cullableObject.I
  30. 34 0
      panda/src/pgraph/cullableObject.cxx
  31. 5 0
      panda/src/pgraph/cullableObject.h
  32. 1 2
      panda/src/pgraph/geomDrawCallbackData.cxx
  33. 11 0
      panda/src/pgraph/geomNode.cxx
  34. 280 0
      panda/src/pgraph/instanceList.I
  35. 213 0
      panda/src/pgraph/instanceList.cxx
  36. 159 0
      panda/src/pgraph/instanceList.h
  37. 39 0
      panda/src/pgraph/instancedNode.I
  38. 492 0
      panda/src/pgraph/instancedNode.cxx
  39. 136 0
      panda/src/pgraph/instancedNode.h
  40. 2 0
      panda/src/pgraph/p3pgraph_composite2.cxx
  41. 57 35
      panda/src/pgraph/pandaNode.cxx
  42. 6 0
      panda/src/pgraph/pandaNode.h
  43. 1 1
      panda/src/pgraph/shaderAttrib.I
  44. 2 0
      panda/src/pgraph/shaderAttrib.cxx
  45. 1 0
      panda/src/pgraph/shaderAttrib.h
  46. 2 2
      panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx
  47. 1 1
      panda/src/tinydisplay/tinyGraphicsStateGuardian.h

+ 2 - 18
panda/src/cull/cullBinBackToFront.cxx

@@ -85,24 +85,8 @@ void CullBinBackToFront::
 draw(bool force, Thread *current_thread) {
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
   }
 }
 }
 
 

+ 2 - 18
panda/src/cull/cullBinFixed.cxx

@@ -71,24 +71,8 @@ void CullBinFixed::
 draw(bool force, Thread *current_thread) {
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
   }
 }
 }
 
 

+ 2 - 18
panda/src/cull/cullBinFrontToBack.cxx

@@ -85,24 +85,8 @@ void CullBinFrontToBack::
 draw(bool force, Thread *current_thread) {
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
   }
 }
 }
 
 

+ 2 - 18
panda/src/cull/cullBinStateSorted.cxx

@@ -70,24 +70,8 @@ void CullBinStateSorted::
 draw(bool force, Thread *current_thread) {
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
   }
 }
 }
 
 

+ 2 - 18
panda/src/cull/cullBinUnsorted.cxx

@@ -55,24 +55,8 @@ void CullBinUnsorted::
 draw(bool force, Thread *current_thread) {
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
 
-  Objects::iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi);
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (CullableObject *object : _objects) {
+    object->draw(_gsg, force, current_thread);
   }
   }
 }
 }
 
 

+ 5 - 1
panda/src/display/graphicsStateGuardian.cxx

@@ -2498,9 +2498,13 @@ finish_decal() {
 bool GraphicsStateGuardian::
 bool GraphicsStateGuardian::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
+                      size_t num_instances, bool force) {
   _data_reader = data_reader;
   _data_reader = data_reader;
 
 
+  if (num_instances == 0) {
+    return false;
+  }
+
   // Always draw if we have a shader, since the shader might use a different
   // Always draw if we have a shader, since the shader might use a different
   // mechanism for fetching vertex data.
   // mechanism for fetching vertex data.
   return _data_reader->has_vertex() || (_target_shader && _target_shader->has_shader());
   return _data_reader->has_vertex() || (_target_shader && _target_shader->has_shader());

+ 1 - 1
panda/src/display/graphicsStateGuardian.h

@@ -375,7 +375,7 @@ public:
 
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
                               bool force);
   virtual bool draw_triangles_adj(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_triangles_adj(const GeomPrimitivePipelineReader *reader,

+ 2 - 2
panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx

@@ -1180,8 +1180,8 @@ end_frame(Thread *current_thread) {
 bool DXGraphicsStateGuardian9::
 bool DXGraphicsStateGuardian9::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
-  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, force)) {
+                      size_t num_instances, bool force) {
+  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, num_instances, force)) {
     return false;
     return false;
   }
   }
   nassertr(_data_reader != nullptr, false);
   nassertr(_data_reader != nullptr, false);

+ 1 - 1
panda/src/dxgsg9/dxGraphicsStateGuardian9.h

@@ -107,7 +107,7 @@ public:
 
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
                               bool force);
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader,

+ 33 - 31
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -4292,7 +4292,7 @@ end_frame(Thread *current_thread) {
 bool CLP(GraphicsStateGuardian)::
 bool CLP(GraphicsStateGuardian)::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
+                      size_t num_instances, bool force) {
 #ifndef NDEBUG
 #ifndef NDEBUG
   if (GLCAT.is_spam()) {
   if (GLCAT.is_spam()) {
     GLCAT.spam() << "begin_draw_primitives: " << *(data_reader->get_object()) << "\n";
     GLCAT.spam() << "begin_draw_primitives: " << *(data_reader->get_object()) << "\n";
@@ -4309,11 +4309,13 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   }
   }
 #endif
 #endif
 
 
-  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, force)) {
+  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, num_instances, force)) {
     return false;
     return false;
   }
   }
   nassertr(_data_reader != nullptr, false);
   nassertr(_data_reader != nullptr, false);
 
 
+  _instance_count = _supports_geometry_instancing ? num_instances : 1;
+
   _geom_display_list = 0;
   _geom_display_list = 0;
 
 
   if (_auto_antialias_mode) {
   if (_auto_antialias_mode) {
@@ -4861,7 +4863,7 @@ draw_triangles(const GeomPrimitivePipelineReader *reader, bool force) {
       }
       }
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_TRIANGLES, num_vertices,
         _glDrawElementsInstanced(GL_TRIANGLES, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -4877,7 +4879,7 @@ draw_triangles(const GeomPrimitivePipelineReader *reader, bool force) {
       }
       }
     } else {
     } else {
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_TRIANGLES,
         _glDrawArraysInstanced(GL_TRIANGLES,
                                reader->get_first_vertex(),
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
                                num_vertices, _instance_count);
@@ -4927,7 +4929,7 @@ draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
         return false;
       }
       }
 
 
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_TRIANGLES_ADJACENCY, num_vertices,
         _glDrawElementsInstanced(GL_TRIANGLES_ADJACENCY, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -4940,7 +4942,7 @@ draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force) {
                              client_pointer);
                              client_pointer);
       }
       }
     } else {
     } else {
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_TRIANGLES_ADJACENCY,
         _glDrawArraysInstanced(GL_TRIANGLES_ADJACENCY,
                                reader->get_first_vertex(),
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
                                num_vertices, _instance_count);
@@ -4992,7 +4994,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
           return false;
           return false;
         }
         }
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawElementsInstanced(GL_TRIANGLE_STRIP, num_vertices,
           _glDrawElementsInstanced(GL_TRIANGLE_STRIP, num_vertices,
                                    get_numeric_type(reader->get_index_type()),
                                    get_numeric_type(reader->get_index_type()),
                                    client_pointer, _instance_count);
                                    client_pointer, _instance_count);
@@ -5008,7 +5010,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
         }
         }
       } else {
       } else {
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawArraysInstanced(GL_TRIANGLE_STRIP,
           _glDrawArraysInstanced(GL_TRIANGLE_STRIP,
                                  reader->get_first_vertex(),
                                  reader->get_first_vertex(),
                                  num_vertices, _instance_count);
                                  num_vertices, _instance_count);
@@ -5042,7 +5044,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_TRIANGLE_STRIP, ends[i] - start,
             _glDrawElementsInstanced(GL_TRIANGLE_STRIP, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
                                      client_pointer + start * index_stride,
@@ -5064,7 +5066,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_TRIANGLE_STRIP, first_vertex + start,
             _glDrawArraysInstanced(GL_TRIANGLE_STRIP, first_vertex + start,
                                    ends[i] - start, _instance_count);
                                    ends[i] - start, _instance_count);
           } else
           } else
@@ -5122,7 +5124,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         if (!setup_primitive(client_pointer, reader, force)) {
         if (!setup_primitive(client_pointer, reader, force)) {
           return false;
           return false;
         }
         }
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawElementsInstanced(GL_TRIANGLE_STRIP_ADJACENCY, num_vertices,
           _glDrawElementsInstanced(GL_TRIANGLE_STRIP_ADJACENCY, num_vertices,
                                    get_numeric_type(reader->get_index_type()),
                                    get_numeric_type(reader->get_index_type()),
                                    client_pointer, _instance_count);
                                    client_pointer, _instance_count);
@@ -5135,7 +5137,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
                                client_pointer);
                                client_pointer);
         }
         }
       } else {
       } else {
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawArraysInstanced(GL_TRIANGLE_STRIP_ADJACENCY,
           _glDrawArraysInstanced(GL_TRIANGLE_STRIP_ADJACENCY,
                                  reader->get_first_vertex(),
                                  reader->get_first_vertex(),
                                  num_vertices, _instance_count);
                                  num_vertices, _instance_count);
@@ -5168,7 +5170,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         unsigned int start = 0;
         unsigned int start = 0;
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_TRIANGLE_STRIP_ADJACENCY, ends[i] - start,
             _glDrawElementsInstanced(GL_TRIANGLE_STRIP_ADJACENCY, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
                                      client_pointer + start * index_stride,
@@ -5187,7 +5189,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         int first_vertex = reader->get_first_vertex();
         int first_vertex = reader->get_first_vertex();
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_TRIANGLE_STRIP_ADJACENCY, first_vertex + start,
             _glDrawArraysInstanced(GL_TRIANGLE_STRIP_ADJACENCY, first_vertex + start,
                                    ends[i] - start, _instance_count);
                                    ends[i] - start, _instance_count);
           } else {
           } else {
@@ -5245,7 +5247,7 @@ draw_trifans(const GeomPrimitivePipelineReader *reader, bool force) {
       for (size_t i = 0; i < ends.size(); i++) {
       for (size_t i = 0; i < ends.size(); i++) {
         _vertices_trifan_pcollector.add_level(ends[i] - start);
         _vertices_trifan_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawElementsInstanced(GL_TRIANGLE_FAN, ends[i] - start,
           _glDrawElementsInstanced(GL_TRIANGLE_FAN, ends[i] - start,
                                    get_numeric_type(reader->get_index_type()),
                                    get_numeric_type(reader->get_index_type()),
                                    client_pointer + start * index_stride,
                                    client_pointer + start * index_stride,
@@ -5266,7 +5268,7 @@ draw_trifans(const GeomPrimitivePipelineReader *reader, bool force) {
       for (size_t i = 0; i < ends.size(); i++) {
       for (size_t i = 0; i < ends.size(); i++) {
         _vertices_trifan_pcollector.add_level(ends[i] - start);
         _vertices_trifan_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawArraysInstanced(GL_TRIANGLE_FAN, first_vertex + start,
           _glDrawArraysInstanced(GL_TRIANGLE_FAN, first_vertex + start,
                                  ends[i] - start, _instance_count);
                                  ends[i] - start, _instance_count);
         } else
         } else
@@ -5324,7 +5326,7 @@ draw_patches(const GeomPrimitivePipelineReader *reader, bool force) {
       }
       }
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_PATCHES, num_vertices,
         _glDrawElementsInstanced(GL_PATCHES, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -5340,7 +5342,7 @@ draw_patches(const GeomPrimitivePipelineReader *reader, bool force) {
       }
       }
     } else {
     } else {
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_PATCHES,
         _glDrawArraysInstanced(GL_PATCHES,
                                reader->get_first_vertex(),
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
                                num_vertices, _instance_count);
@@ -5390,7 +5392,7 @@ draw_lines(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
         return false;
       }
       }
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINES, num_vertices,
         _glDrawElementsInstanced(GL_LINES, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -5406,7 +5408,7 @@ draw_lines(const GeomPrimitivePipelineReader *reader, bool force) {
       }
       }
     } else {
     } else {
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_LINES,
         _glDrawArraysInstanced(GL_LINES,
                                reader->get_first_vertex(),
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
                                num_vertices, _instance_count);
@@ -5454,7 +5456,7 @@ draw_lines_adj(const GeomPrimitivePipelineReader *reader, bool force) {
       if (!setup_primitive(client_pointer, reader, force)) {
       if (!setup_primitive(client_pointer, reader, force)) {
         return false;
         return false;
       }
       }
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINES_ADJACENCY, num_vertices,
         _glDrawElementsInstanced(GL_LINES_ADJACENCY, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -5467,7 +5469,7 @@ draw_lines_adj(const GeomPrimitivePipelineReader *reader, bool force) {
                              client_pointer);
                              client_pointer);
       }
       }
     } else {
     } else {
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_LINES_ADJACENCY,
         _glDrawArraysInstanced(GL_LINES_ADJACENCY,
                                reader->get_first_vertex(),
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
                                num_vertices, _instance_count);
@@ -5526,7 +5528,7 @@ draw_linestrips(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
         return false;
       }
       }
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINE_STRIP, num_vertices,
         _glDrawElementsInstanced(GL_LINE_STRIP, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -5566,7 +5568,7 @@ draw_linestrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
           _vertices_other_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_LINE_STRIP, ends[i] - start,
             _glDrawElementsInstanced(GL_LINE_STRIP, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
                                      client_pointer + start * index_stride,
@@ -5588,7 +5590,7 @@ draw_linestrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
           _vertices_other_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_LINE_STRIP, first_vertex + start,
             _glDrawArraysInstanced(GL_LINE_STRIP, first_vertex + start,
                                    ends[i] - start, _instance_count);
                                    ends[i] - start, _instance_count);
           } else
           } else
@@ -5646,7 +5648,7 @@ draw_linestrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
       if (!setup_primitive(client_pointer, reader, force)) {
       if (!setup_primitive(client_pointer, reader, force)) {
         return false;
         return false;
       }
       }
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINE_STRIP_ADJACENCY, num_vertices,
         _glDrawElementsInstanced(GL_LINE_STRIP_ADJACENCY, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -5681,7 +5683,7 @@ draw_linestrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         unsigned int start = 0;
         unsigned int start = 0;
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
           _vertices_other_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_LINE_STRIP_ADJACENCY, ends[i] - start,
             _glDrawElementsInstanced(GL_LINE_STRIP_ADJACENCY, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
                                      client_pointer + start * index_stride,
@@ -5700,7 +5702,7 @@ draw_linestrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         int first_vertex = reader->get_first_vertex();
         int first_vertex = reader->get_first_vertex();
         for (size_t i = 0; i < ends.size(); i++) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
           _vertices_other_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_LINE_STRIP_ADJACENCY, first_vertex + start,
             _glDrawArraysInstanced(GL_LINE_STRIP_ADJACENCY, first_vertex + start,
                                    ends[i] - start, _instance_count);
                                    ends[i] - start, _instance_count);
           } else {
           } else {
@@ -5747,7 +5749,7 @@ draw_points(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
         return false;
       }
       }
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_POINTS, num_vertices,
         _glDrawElementsInstanced(GL_POINTS, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
                                  client_pointer, _instance_count);
@@ -5763,7 +5765,7 @@ draw_points(const GeomPrimitivePipelineReader *reader, bool force) {
       }
       }
     } else {
     } else {
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_POINTS,
         _glDrawArraysInstanced(GL_POINTS,
                                reader->get_first_vertex(),
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
                                num_vertices, _instance_count);
@@ -11444,7 +11446,7 @@ set_state_and_transform(const RenderState *target,
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
   determine_target_shader();
   determine_target_shader();
-  _instance_count = _target_shader->get_instance_count();
+  _sattr_instance_count = _target_shader->get_instance_count();
 
 
   if (_target_shader != _state_shader) {
   if (_target_shader != _state_shader) {
     do_issue_shader();
     do_issue_shader();

+ 2 - 1
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -295,7 +295,7 @@ public:
 
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
                               bool force);
 #ifndef OPENGLES
 #ifndef OPENGLES
@@ -1079,6 +1079,7 @@ public:
   bool _supports_texture_max_level;
   bool _supports_texture_max_level;
 
 
 #ifndef OPENGLES_1
 #ifndef OPENGLES_1
+  GLsizei _sattr_instance_count;
   GLsizei _instance_count;
   GLsizei _instance_count;
 #endif
 #endif
 
 

+ 27 - 0
panda/src/glstuff/glShaderContext_src.cxx

@@ -487,6 +487,13 @@ reflect_attribute(int i, char *name_buffer, GLsizei name_buflen) {
       bind._name = InternalName::get_texcoord();
       bind._name = InternalName::get_texcoord();
       bind._append_uv = atoi(noprefix.substr(13).c_str());
       bind._append_uv = atoi(noprefix.substr(13).c_str());
 
 
+    } else if (noprefix == "InstanceMatrix") {
+      bind._name = InternalName::get_instance_matrix();
+
+      if (param_type != GL_FLOAT_MAT4x3) {
+        GLCAT.error() << "p3d_InstanceMatrix should be mat4x3!\n";
+      }
+
     } else {
     } else {
       GLCAT.error() << "Unrecognized vertex attrib '" << name_buffer << "'!\n";
       GLCAT.error() << "Unrecognized vertex attrib '" << name_buffer << "'!\n";
       return;
       return;
@@ -498,15 +505,23 @@ reflect_attribute(int i, char *name_buffer, GLsizei name_buflen) {
 
 
   // Get the number of bind points for arrays and matrices.
   // Get the number of bind points for arrays and matrices.
   switch (param_type) {
   switch (param_type) {
+  case GL_FLOAT_MAT3x2:
   case GL_FLOAT_MAT3:
   case GL_FLOAT_MAT3:
+  case GL_FLOAT_MAT3x4:
 #ifndef OPENGLES
 #ifndef OPENGLES
+  case GL_DOUBLE_MAT3x2:
   case GL_DOUBLE_MAT3:
   case GL_DOUBLE_MAT3:
+  case GL_DOUBLE_MAT3x4:
 #endif
 #endif
     bind._elements = 3 * param_size;
     bind._elements = 3 * param_size;
     break;
     break;
 
 
+  case GL_FLOAT_MAT4x2:
+  case GL_FLOAT_MAT4x3:
   case GL_FLOAT_MAT4:
   case GL_FLOAT_MAT4:
 #ifndef OPENGLES
 #ifndef OPENGLES
+  case GL_DOUBLE_MAT4x2:
+  case GL_DOUBLE_MAT4x3:
   case GL_DOUBLE_MAT4:
   case GL_DOUBLE_MAT4:
 #endif
 #endif
     bind._elements = 4 * param_size;
     bind._elements = 4 * param_size;
@@ -2465,6 +2480,18 @@ update_shader_vertex_arrays(ShaderContext *prev, bool force) {
                  _glgsg->_glVertexAttribI4ui != nullptr) {
                  _glgsg->_glVertexAttribI4ui != nullptr) {
           _glgsg->_glVertexAttribI4ui(p, 0, 1, 2, 3);
           _glgsg->_glVertexAttribI4ui(p, 0, 1, 2, 3);
         }
         }
+        else if (name == InternalName::get_instance_matrix()) {
+          const LMatrix4 &ident_mat = LMatrix4::ident_mat();
+
+          for (int i = 0; i < bind._elements; ++i) {
+#ifdef STDFLOAT_DOUBLE
+            _glgsg->_glVertexAttrib4dv(p, ident_mat.get_data() + i * 4);
+#else
+            _glgsg->_glVertexAttrib4fv(p, ident_mat.get_data() + i * 4);
+#endif
+            ++p;
+          }
+        }
       }
       }
     }
     }
 
 

+ 7 - 4
panda/src/gobj/geom.cxx

@@ -1298,18 +1298,20 @@ prepare_now(PreparedGraphicsObjects *prepared_objects,
  * Actually draws the Geom with the indicated GSG, using the indicated vertex
  * Actually draws the Geom with the indicated GSG, using the indicated vertex
  * data (which might have been pre-munged to support the GSG's needs).
  * data (which might have been pre-munged to support the GSG's needs).
  *
  *
+ * num_instances specifies the number of times to render the geometry.
+ *
  * Returns true if all of the primitives were drawn normally, false if there
  * Returns true if all of the primitives were drawn normally, false if there
  * was a problem (for instance, some of the data was nonresident).  If force
  * was a problem (for instance, some of the data was nonresident).  If force
  * is passed true, it will wait for the data to become resident if necessary.
  * is passed true, it will wait for the data to become resident if necessary.
  */
  */
 bool Geom::
 bool Geom::
 draw(GraphicsStateGuardianBase *gsg, const GeomVertexData *vertex_data,
 draw(GraphicsStateGuardianBase *gsg, const GeomVertexData *vertex_data,
-     bool force, Thread *current_thread) const {
+     size_t num_instances, bool force, Thread *current_thread) const {
   GeomPipelineReader geom_reader(this, current_thread);
   GeomPipelineReader geom_reader(this, current_thread);
   GeomVertexDataPipelineReader data_reader(vertex_data, current_thread);
   GeomVertexDataPipelineReader data_reader(vertex_data, current_thread);
   data_reader.check_array_readers();
   data_reader.check_array_readers();
 
 
-  return geom_reader.draw(gsg, &data_reader, force);
+  return geom_reader.draw(gsg, &data_reader, num_instances, force);
 }
 }
 
 
 /**
 /**
@@ -1847,11 +1849,12 @@ check_valid(const GeomVertexDataPipelineReader *data_reader) const {
  */
  */
 bool GeomPipelineReader::
 bool GeomPipelineReader::
 draw(GraphicsStateGuardianBase *gsg,
 draw(GraphicsStateGuardianBase *gsg,
-     const GeomVertexDataPipelineReader *data_reader, bool force) const {
+     const GeomVertexDataPipelineReader *data_reader,
+     size_t num_instances, bool force) const {
   bool all_ok;
   bool all_ok;
   {
   {
     PStatTimer timer(Geom::_draw_primitive_setup_pcollector);
     PStatTimer timer(Geom::_draw_primitive_setup_pcollector);
-    all_ok = gsg->begin_draw_primitives(this, data_reader, force);
+    all_ok = gsg->begin_draw_primitives(this, data_reader, num_instances, force);
   }
   }
   if (all_ok) {
   if (all_ok) {
     Geom::Primitives::const_iterator pi;
     Geom::Primitives::const_iterator pi;

+ 2 - 2
panda/src/gobj/geom.h

@@ -158,7 +158,7 @@ PUBLISHED:
 
 
 public:
 public:
   bool draw(GraphicsStateGuardianBase *gsg,
   bool draw(GraphicsStateGuardianBase *gsg,
-            const GeomVertexData *vertex_data,
+            const GeomVertexData *vertex_data, size_t num_instances,
             bool force, Thread *current_thread) const;
             bool force, Thread *current_thread) const;
 
 
   INLINE void calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point,
   INLINE void calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point,
@@ -433,7 +433,7 @@ public:
 
 
   bool draw(GraphicsStateGuardianBase *gsg,
   bool draw(GraphicsStateGuardianBase *gsg,
             const GeomVertexDataPipelineReader *data_reader,
             const GeomVertexDataPipelineReader *data_reader,
-            bool force) const;
+            size_t num_instances, bool force) const;
 
 
 private:
 private:
   const Geom *_object;
   const Geom *_object;

+ 21 - 0
panda/src/gobj/geomVertexArrayFormat.cxx

@@ -655,6 +655,27 @@ compare_to(const GeomVertexArrayFormat &other) const {
   return 0;
   return 0;
 }
 }
 
 
+/**
+ * Returns a suitable format for sending an array of instances to the graphics
+ * backend.
+ *
+ * This may only be called after the format has been registered.  The return
+ * value will have been already registered.
+ */
+const GeomVertexArrayFormat *GeomVertexArrayFormat::
+get_instance_array_format() {
+  static CPT(GeomVertexArrayFormat) inst_array_format;
+
+  if (inst_array_format == nullptr) {
+    GeomVertexArrayFormat *new_array_format = new GeomVertexArrayFormat("instance_matrix", 4, NT_stdfloat, C_matrix);
+    new_array_format->set_divisor(1);
+    inst_array_format = GeomVertexArrayFormat::register_format(new_array_format);
+  }
+
+  nassertr(inst_array_format != nullptr, nullptr);
+  return inst_array_format.p();
+}
+
 /**
 /**
  * Resorts the _columns vector so that the columns are listed in the same
  * Resorts the _columns vector so that the columns are listed in the same
  * order they appear in the record.
  * order they appear in the record.

+ 2 - 0
panda/src/gobj/geomVertexArrayFormat.h

@@ -123,6 +123,8 @@ PUBLISHED:
 public:
 public:
   int compare_to(const GeomVertexArrayFormat &other) const;
   int compare_to(const GeomVertexArrayFormat &other) const;
 
 
+  static const GeomVertexArrayFormat *get_instance_array_format();
+
 private:
 private:
   class Registry;
   class Registry;
   INLINE static Registry *get_registry();
   INLINE static Registry *get_registry();

+ 31 - 0
panda/src/gobj/geomVertexFormat.cxx

@@ -134,6 +134,32 @@ get_post_animated_format() const {
   return _post_animated_format;
   return _post_animated_format;
 }
 }
 
 
+/**
+ * Returns a suitable vertex format for sending the animated vertices to the
+ * graphics backend.  This is the same format as the source format, with the
+ * instancing columns added.
+ *
+ * This may only be called after the format has been registered.  The return
+ * value will have been already registered.
+ */
+CPT(GeomVertexFormat) GeomVertexFormat::
+get_post_instanced_format() const {
+  nassertr(is_registered(), nullptr);
+
+  if (_post_instanced_format == nullptr) {
+    PT(GeomVertexFormat) new_format = new GeomVertexFormat(*this);
+    new_format->add_array(GeomVertexArrayFormat::register_format(GeomVertexArrayFormat::get_instance_array_format()));
+
+    CPT(GeomVertexFormat) registered =
+      GeomVertexFormat::register_format(new_format);
+    ((GeomVertexFormat *)this)->_post_instanced_format = registered;
+  }
+
+  _post_instanced_format->test_ref_count_integrity();
+
+  return _post_instanced_format;
+}
+
 /**
 /**
  * Returns a new GeomVertexFormat that includes all of the columns defined in
  * Returns a new GeomVertexFormat that includes all of the columns defined in
  * either this GeomVertexFormat or the other one.  If any column is defined in
  * either this GeomVertexFormat or the other one.  If any column is defined in
@@ -818,6 +844,11 @@ do_unregister() {
     unref_delete(_post_animated_format);
     unref_delete(_post_animated_format);
   }
   }
   _post_animated_format = nullptr;
   _post_animated_format = nullptr;
+
+  if (_post_instanced_format != nullptr) {
+    unref_delete(_post_instanced_format);
+    _post_instanced_format = nullptr;
+  }
 }
 }
 
 
 /**
 /**

+ 3 - 1
panda/src/gobj/geomVertexFormat.h

@@ -72,6 +72,7 @@ PUBLISHED:
   MAKE_PROPERTY(animation, get_animation, set_animation);
   MAKE_PROPERTY(animation, get_animation, set_animation);
 
 
   CPT(GeomVertexFormat) get_post_animated_format() const;
   CPT(GeomVertexFormat) get_post_animated_format() const;
+  CPT(GeomVertexFormat) get_post_instanced_format() const;
   CPT(GeomVertexFormat) get_union_format(const GeomVertexFormat *other) const;
   CPT(GeomVertexFormat) get_union_format(const GeomVertexFormat *other) const;
 
 
   INLINE size_t get_num_arrays() const;
   INLINE size_t get_num_arrays() const;
@@ -222,7 +223,8 @@ private:
   typedef pvector<MorphRecord> Morphs;
   typedef pvector<MorphRecord> Morphs;
   Morphs _morphs;
   Morphs _morphs;
 
 
-  const GeomVertexFormat *_post_animated_format;
+  const GeomVertexFormat *_post_animated_format = nullptr;
+  const GeomVertexFormat *_post_instanced_format = nullptr;
 
 
   // This is the global registry of all currently-in-use formats.
   // This is the global registry of all currently-in-use formats.
   typedef pset<GeomVertexFormat *, IndirectCompareTo<GeomVertexFormat> > Formats;
   typedef pset<GeomVertexFormat *, IndirectCompareTo<GeomVertexFormat> > Formats;

+ 11 - 0
panda/src/gobj/internalName.I

@@ -366,6 +366,17 @@ get_view() {
   return _view;
   return _view;
 }
 }
 
 
+/**
+ * Returns the standard InternalName "instance_matrix".
+ */
+INLINE PT(InternalName) InternalName::
+get_instance_matrix() {
+  if (_instance_matrix == nullptr) {
+    _instance_matrix = InternalName::make("instance_matrix");
+  }
+  return _instance_matrix;
+}
+
 /**
 /**
  *
  *
  */
  */

+ 1 - 0
panda/src/gobj/internalName.cxx

@@ -40,6 +40,7 @@ PT(InternalName) InternalName::_world;
 PT(InternalName) InternalName::_camera;
 PT(InternalName) InternalName::_camera;
 PT(InternalName) InternalName::_model;
 PT(InternalName) InternalName::_model;
 PT(InternalName) InternalName::_view;
 PT(InternalName) InternalName::_view;
+PT(InternalName) InternalName::_instance_matrix;
 
 
 TypeHandle InternalName::_type_handle;
 TypeHandle InternalName::_type_handle;
 TypeHandle InternalName::_texcoord_type_handle;
 TypeHandle InternalName::_texcoord_type_handle;

+ 2 - 0
panda/src/gobj/internalName.h

@@ -92,6 +92,7 @@ PUBLISHED:
   INLINE static PT(InternalName) get_camera();
   INLINE static PT(InternalName) get_camera();
   INLINE static PT(InternalName) get_model();
   INLINE static PT(InternalName) get_model();
   INLINE static PT(InternalName) get_view();
   INLINE static PT(InternalName) get_view();
+  INLINE static PT(InternalName) get_instance_matrix();
 
 
 #ifdef HAVE_PYTHON
 #ifdef HAVE_PYTHON
   // These versions are exposed to Python, which have additional logic to map
   // These versions are exposed to Python, which have additional logic to map
@@ -141,6 +142,7 @@ private:
   static PT(InternalName) _camera;
   static PT(InternalName) _camera;
   static PT(InternalName) _model;
   static PT(InternalName) _model;
   static PT(InternalName) _view;
   static PT(InternalName) _view;
+  static PT(InternalName) _instance_matrix;
 
 
 public:
 public:
   // Datagram stuff
   // Datagram stuff

+ 1 - 1
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -203,7 +203,7 @@ public:
 
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force)=0;
+                                     size_t num_instances, bool force)=0;
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force)=0;

+ 4 - 0
panda/src/pgraph/CMakeLists.txt

@@ -39,6 +39,8 @@ set(P3PGRAPH_HEADERS
   geomDrawCallbackData.I geomDrawCallbackData.h
   geomDrawCallbackData.I geomDrawCallbackData.h
   geomNode.I geomNode.h
   geomNode.I geomNode.h
   geomTransformer.I geomTransformer.h
   geomTransformer.I geomTransformer.h
+  instanceList.I instanceList.h
+  instancedNode.I instancedNode.h
   internalNameCollection.I internalNameCollection.h
   internalNameCollection.I internalNameCollection.h
   lensNode.I lensNode.h
   lensNode.I lensNode.h
   light.I light.h
   light.I light.h
@@ -139,6 +141,8 @@ set(P3PGRAPH_SOURCES
   geomDrawCallbackData.cxx
   geomDrawCallbackData.cxx
   geomNode.cxx
   geomNode.cxx
   geomTransformer.cxx
   geomTransformer.cxx
+  instanceList.cxx
+  instancedNode.cxx
   internalNameCollection.cxx
   internalNameCollection.cxx
   lensNode.cxx
   lensNode.cxx
   light.cxx
   light.cxx

+ 6 - 0
panda/src/pgraph/config_pgraph.cxx

@@ -42,6 +42,8 @@
 #include "geomDrawCallbackData.h"
 #include "geomDrawCallbackData.h"
 #include "geomNode.h"
 #include "geomNode.h"
 #include "geomTransformer.h"
 #include "geomTransformer.h"
+#include "instanceList.h"
+#include "instancedNode.h"
 #include "lensNode.h"
 #include "lensNode.h"
 #include "light.h"
 #include "light.h"
 #include "lightAttrib.h"
 #include "lightAttrib.h"
@@ -416,6 +418,8 @@ init_libpgraph() {
   GeomDrawCallbackData::init_type();
   GeomDrawCallbackData::init_type();
   GeomNode::init_type();
   GeomNode::init_type();
   GeomTransformer::init_type();
   GeomTransformer::init_type();
+  InstanceList::init_type();
+  InstancedNode::init_type();
   LensNode::init_type();
   LensNode::init_type();
   Light::init_type();
   Light::init_type();
   LightAttrib::init_type();
   LightAttrib::init_type();
@@ -484,6 +488,8 @@ init_libpgraph() {
   Fog::register_with_read_factory();
   Fog::register_with_read_factory();
   FogAttrib::register_with_read_factory();
   FogAttrib::register_with_read_factory();
   GeomNode::register_with_read_factory();
   GeomNode::register_with_read_factory();
+  InstanceList::register_with_read_factory();
+  InstancedNode::register_with_read_factory();
   LensNode::register_with_read_factory();
   LensNode::register_with_read_factory();
   LightAttrib::register_with_read_factory();
   LightAttrib::register_with_read_factory();
   LightRampAttrib::register_with_read_factory();
   LightRampAttrib::register_with_read_factory();

+ 18 - 2
panda/src/pgraph/cullTraverser.cxx

@@ -271,10 +271,10 @@ show_bounds(CullTraverserData &data, bool tight) {
       CullableObject *outer_viz =
       CullableObject *outer_viz =
         new CullableObject(std::move(bounds_viz), get_bounds_outer_viz_state(),
         new CullableObject(std::move(bounds_viz), get_bounds_outer_viz_state(),
                            internal_transform);
                            internal_transform);
+      outer_viz->_instances = data._instances;
       _cull_handler->record_object(outer_viz, this);
       _cull_handler->record_object(outer_viz, this);
     }
     }
-
-  } else {
+  } else if (data._instances == nullptr) {
     draw_bounding_volume(node->get_bounds(), internal_transform);
     draw_bounding_volume(node->get_bounds(), internal_transform);
 
 
     if (node->is_geom_node()) {
     if (node->is_geom_node()) {
@@ -287,6 +287,22 @@ show_bounds(CullTraverserData &data, bool tight) {
                              internal_transform);
                              internal_transform);
       }
       }
     }
     }
+  } else {
+    // Draw bounds for every instance.
+    for (const InstanceList::Instance &instance : *data._instances) {
+      CPT(TransformState) transform = internal_transform->compose(instance.get_transform());
+      draw_bounding_volume(node->get_bounds(), transform);
+
+      if (node->is_geom_node()) {
+        // Also show the bounding volumes of included Geoms.
+        transform = transform->compose(node->get_transform());
+        GeomNode *gnode = (GeomNode *)node;
+        int num_geoms = gnode->get_num_geoms();
+        for (int i = 0; i < num_geoms; ++i) {
+          draw_bounding_volume(gnode->get_geom(i)->get_bounds(), transform);
+        }
+      }
+    }
   }
   }
 }
 }
 
 

+ 3 - 1
panda/src/pgraph/cullTraverserData.I

@@ -50,6 +50,7 @@ CullTraverserData(const CullTraverserData &parent, PandaNode *child) :
   _state(parent._state),
   _state(parent._state),
   _view_frustum(parent._view_frustum),
   _view_frustum(parent._view_frustum),
   _cull_planes(parent._cull_planes),
   _cull_planes(parent._cull_planes),
+  _instances(parent._instances),
   _draw_mask(parent._draw_mask),
   _draw_mask(parent._draw_mask),
   _portal_depth(parent._portal_depth)
   _portal_depth(parent._portal_depth)
 {
 {
@@ -110,7 +111,8 @@ get_modelview_transform(const CullTraverser *trav) const {
  */
  */
 INLINE CPT(TransformState) CullTraverserData::
 INLINE CPT(TransformState) CullTraverserData::
 get_internal_transform(const CullTraverser *trav) const {
 get_internal_transform(const CullTraverser *trav) const {
-  return trav->get_scene()->get_cs_world_transform()->compose(_net_transform);
+  const TransformState *cs_world_transform = trav->get_scene()->get_cs_world_transform();
+  return cs_world_transform->compose(_net_transform);
 }
 }
 
 
 /**
 /**

+ 9 - 0
panda/src/pgraph/cullTraverserData.cxx

@@ -77,6 +77,15 @@ apply_transform_and_state(CullTraverser *trav) {
 void CullTraverserData::
 void CullTraverserData::
 apply_transform(const TransformState *node_transform) {
 apply_transform(const TransformState *node_transform) {
   if (!node_transform->is_identity()) {
   if (!node_transform->is_identity()) {
+    if (_instances != nullptr) {
+      InstanceList *instances = new InstanceList(*_instances);
+      for (InstanceList::Instance &instance : *instances) {
+        instance.set_transform(instance.get_transform()->compose(node_transform));
+      }
+      _instances = std::move(instances);
+      return;
+    }
+
     _net_transform = _net_transform->compose(node_transform);
     _net_transform = _net_transform->compose(node_transform);
 
 
     if ((_view_frustum != nullptr) ||
     if ((_view_frustum != nullptr) ||

+ 2 - 0
panda/src/pgraph/cullTraverserData.h

@@ -23,6 +23,7 @@
 #include "pointerTo.h"
 #include "pointerTo.h"
 #include "drawMask.h"
 #include "drawMask.h"
 #include "pvector.h"
 #include "pvector.h"
+#include "instanceList.h"
 
 
 class PandaNode;
 class PandaNode;
 class CullTraverser;
 class CullTraverser;
@@ -81,6 +82,7 @@ public:
   CPT(RenderState) _state;
   CPT(RenderState) _state;
   PT(GeometricBoundingVolume) _view_frustum;
   PT(GeometricBoundingVolume) _view_frustum;
   CPT(CullPlanes) _cull_planes;
   CPT(CullPlanes) _cull_planes;
+  CPT(InstanceList) _instances;
   DrawMask _draw_mask;
   DrawMask _draw_mask;
   int _portal_depth;
   int _portal_depth;
 
 

+ 18 - 3
panda/src/pgraph/cullableObject.I

@@ -70,7 +70,7 @@ operator = (const CullableObject &copy) {
  */
  */
 INLINE void CullableObject::
 INLINE void CullableObject::
 draw(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
 draw(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
-  if (_draw_callback != nullptr) {
+  if (UNLIKELY(_draw_callback != nullptr)) {
     // It has a callback associated.
     // It has a callback associated.
     gsg->clear_before_callback();
     gsg->clear_before_callback();
     gsg->set_state_and_transform(_state, _internal_transform);
     gsg->set_state_and_transform(_state, _internal_transform);
@@ -81,11 +81,26 @@ draw(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
       gsg->clear_state_and_transform();
       gsg->clear_state_and_transform();
     }
     }
     // Now the callback has taken care of drawing.
     // Now the callback has taken care of drawing.
-  } else {
+  }
+  else if (LIKELY(_instances == nullptr)) {
     nassertv(_geom != nullptr);
     nassertv(_geom != nullptr);
     gsg->set_state_and_transform(_state, _internal_transform);
     gsg->set_state_and_transform(_state, _internal_transform);
     draw_inline(gsg, force, current_thread);
     draw_inline(gsg, force, current_thread);
   }
   }
+  else {
+    // It has an instance list left over (not munged into vertex data), which
+    // means the shader doesn't implement instancing.  Just render the object
+    // more than once.
+    nassertv(_geom != nullptr);
+    GeomPipelineReader geom_reader(_geom, current_thread);
+    GeomVertexDataPipelineReader data_reader(_munged_data, current_thread);
+    data_reader.check_array_readers();
+
+    for (const InstanceList::Instance &instance : *_instances) {
+      gsg->set_state_and_transform(_state, _internal_transform->compose(instance.get_transform()));
+      geom_reader.draw(gsg, &data_reader, _num_instances, force);
+    }
+  }
 }
 }
 
 
 /**
 /**
@@ -130,7 +145,7 @@ flush_level() {
  */
  */
 INLINE void CullableObject::
 INLINE void CullableObject::
 draw_inline(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
 draw_inline(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
-  _geom->draw(gsg, _munged_data, force, current_thread);
+  _geom->draw(gsg, _munged_data, _num_instances, force, current_thread);
 }
 }
 
 
 /**
 /**

+ 34 - 0
panda/src/pgraph/cullableObject.cxx

@@ -39,6 +39,7 @@ PStatCollector CullableObject::_munge_geom_pcollector("*:Munge:Geom");
 PStatCollector CullableObject::_munge_sprites_pcollector("*:Munge:Sprites");
 PStatCollector CullableObject::_munge_sprites_pcollector("*:Munge:Sprites");
 PStatCollector CullableObject::_munge_sprites_verts_pcollector("*:Munge:Sprites:Verts");
 PStatCollector CullableObject::_munge_sprites_verts_pcollector("*:Munge:Sprites:Verts");
 PStatCollector CullableObject::_munge_sprites_prims_pcollector("*:Munge:Sprites:Prims");
 PStatCollector CullableObject::_munge_sprites_prims_pcollector("*:Munge:Sprites:Prims");
+PStatCollector CullableObject::_munge_instances_pcollector("*:Munge:Instances");
 PStatCollector CullableObject::_sw_sprites_pcollector("SW Sprites");
 PStatCollector CullableObject::_sw_sprites_pcollector("SW Sprites");
 
 
 TypeHandle CullableObject::_type_handle;
 TypeHandle CullableObject::_type_handle;
@@ -173,6 +174,23 @@ munge_geom(GraphicsStateGuardianBase *gsg, GeomMunger *munger,
       std::swap(_munged_data, animated_vertices);
       std::swap(_munged_data, animated_vertices);
     }
     }
 
 
+    if (sattr != nullptr) {
+      if (_instances != nullptr &&
+          sattr->get_flag(ShaderAttrib::F_hardware_instancing)) {
+        // We are under an InstancedNode, and the shader implements hardware.
+        // Munge the instance list into the vertex data.
+        munge_instances(current_thread);
+        _num_instances = _instances->size();
+        _instances = nullptr;
+      } else {
+        // No, use the instance count from the ShaderAttrib.
+        int count = sattr->get_instance_count();
+        _num_instances = (count > 0) ? (size_t)count : 1;
+      }
+    } else {
+      _num_instances = 1;
+    }
+
 #ifndef NDEBUG
 #ifndef NDEBUG
     if (show_vertex_animation) {
     if (show_vertex_animation) {
       GeomVertexDataPipelineReader data_reader(_munged_data, current_thread);
       GeomVertexDataPipelineReader data_reader(_munged_data, current_thread);
@@ -204,6 +222,22 @@ output(std::ostream &out) const {
   }
   }
 }
 }
 
 
+/**
+ * Returns a GeomVertexData that represents the results of computing the
+ * instance arrays for this data.
+ */
+void CullableObject::
+munge_instances(Thread *current_thread) {
+  PStatTimer timer(_munge_instances_pcollector, current_thread);
+
+  PT(GeomVertexData) instanced_data = new GeomVertexData(*_munged_data);
+  const GeomVertexArrayFormat *array_format = GeomVertexArrayFormat::get_instance_array_format();
+
+  CPT(GeomVertexArrayData) new_array = _instances->get_array_data(array_format);
+  instanced_data->insert_array((size_t)-1, new_array);
+  _munged_data = instanced_data;
+}
+
 /**
 /**
  * Converts a table of points to quads for rendering on systems that don't
  * Converts a table of points to quads for rendering on systems that don't
  * support fancy points.
  * support fancy points.

+ 5 - 0
panda/src/pgraph/cullableObject.h

@@ -30,6 +30,7 @@
 #include "lightMutex.h"
 #include "lightMutex.h"
 #include "callbackObject.h"
 #include "callbackObject.h"
 #include "geomDrawCallbackData.h"
 #include "geomDrawCallbackData.h"
+#include "instanceList.h"
 
 
 class CullTraverser;
 class CullTraverser;
 class GeomMunger;
 class GeomMunger;
@@ -73,8 +74,11 @@ public:
   CPT(RenderState) _state;
   CPT(RenderState) _state;
   CPT(TransformState) _internal_transform;
   CPT(TransformState) _internal_transform;
   PT(CallbackObject) _draw_callback;
   PT(CallbackObject) _draw_callback;
+  CPT(InstanceList) _instances;
+  int _num_instances = 1;
 
 
 private:
 private:
+  void munge_instances(Thread *current_thread);
   bool munge_points_to_quads(const CullTraverser *traverser, bool force);
   bool munge_points_to_quads(const CullTraverser *traverser, bool force);
 
 
   static CPT(RenderState) get_flash_cpu_state();
   static CPT(RenderState) get_flash_cpu_state();
@@ -113,6 +117,7 @@ private:
   static PStatCollector _munge_sprites_pcollector;
   static PStatCollector _munge_sprites_pcollector;
   static PStatCollector _munge_sprites_verts_pcollector;
   static PStatCollector _munge_sprites_verts_pcollector;
   static PStatCollector _munge_sprites_prims_pcollector;
   static PStatCollector _munge_sprites_prims_pcollector;
+  static PStatCollector _munge_instances_pcollector;
   static PStatCollector _sw_sprites_pcollector;
   static PStatCollector _sw_sprites_pcollector;
 
 
 public:
 public:

+ 1 - 2
panda/src/pgraph/geomDrawCallbackData.cxx

@@ -45,7 +45,6 @@ upcall() {
       _gsg->clear_state_and_transform();
       _gsg->clear_state_and_transform();
     }
     }
 
 
-    _obj->_geom->draw(_gsg, _obj->_munged_data, _force,
-                      Thread::get_current_thread());
+    _obj->draw_inline(_gsg, _force, Thread::get_current_thread());
   }
   }
 }
 }

+ 11 - 0
panda/src/pgraph/geomNode.cxx

@@ -39,6 +39,7 @@
 #include "boundingSphere.h"
 #include "boundingSphere.h"
 #include "config_mathutil.h"
 #include "config_mathutil.h"
 #include "preparedGraphicsObjects.h"
 #include "preparedGraphicsObjects.h"
+#include "instanceList.h"
 
 
 
 
 bool allow_flatten_color = ConfigVariableBool
 bool allow_flatten_color = ConfigVariableBool
@@ -527,6 +528,16 @@ add_for_draw(CullTraverser *trav, CullTraverserData &data) {
       continue;
       continue;
     }
     }
 
 
+    if (data._instances != nullptr) {
+      // Draw each individual instance.  We don't bother culling each
+      // individual Geom for each instance; that is probably way too slow.
+      CullableObject *object =
+        new CullableObject(std::move(geom), std::move(state), internal_transform);
+      object->_instances = data._instances;
+      trav->get_cull_handler()->record_object(object, trav);
+      continue;
+    }
+
     // Cull the Geom bounding volume against the view frustum andor the cull
     // Cull the Geom bounding volume against the view frustum andor the cull
     // planes.  Don't bother unless we've got more than one Geom, since
     // planes.  Don't bother unless we've got more than one Geom, since
     // otherwise the bounding volume of the GeomNode is (probably) the same as
     // otherwise the bounding volume of the GeomNode is (probably) the same as

+ 280 - 0
panda/src/pgraph/instanceList.I

@@ -0,0 +1,280 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instanceList.I
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+/**
+ * Initializes an instance with the identity transform.
+ */
+INLINE InstanceList::Instance::
+Instance() : _transform(TransformState::make_identity()) {
+}
+
+/**
+ * Initializes an instance with the given transformation.
+ */
+INLINE InstanceList::Instance::
+Instance(CPT(TransformState) transform) : _transform(std::move(transform)) {
+}
+
+/**
+ *
+ */
+INLINE LPoint3 InstanceList::Instance::
+get_pos() const {
+  return get_transform()->get_pos();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_pos(const LPoint3 &pos) {
+  set_transform(get_transform()->set_pos(pos));
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_pos(PN_stdfloat x, PN_stdfloat y, PN_stdfloat z) {
+  set_pos(LPoint3(x, y, z));
+}
+
+/**
+ *
+ */
+INLINE LVecBase3 InstanceList::Instance::
+get_hpr() const {
+  return get_transform()->get_hpr();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_hpr(const LVecBase3 &hpr) {
+  set_transform(get_transform()->set_hpr(hpr));
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_hpr(PN_stdfloat h, PN_stdfloat p, PN_stdfloat r) {
+  set_hpr(LVecBase3(h, p, r));
+}
+
+/**
+ *
+ */
+INLINE LQuaternion InstanceList::Instance::
+get_quat() const {
+  return get_transform()->get_quat();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_quat(const LQuaternion &quat) {
+  set_transform(get_transform()->set_quat(quat));
+}
+
+/**
+ *
+ */
+INLINE LVecBase3 InstanceList::Instance::
+get_scale() const {
+  return get_transform()->get_scale();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_scale(const LVecBase3 &scale) {
+  set_transform(get_transform()->set_scale(scale));
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_scale(PN_stdfloat sx, PN_stdfloat sy, PN_stdfloat sz) {
+  set_scale(LVecBase3(sx, sy, sz));
+}
+
+/**
+ *
+ */
+INLINE const LMatrix4 &InstanceList::Instance::
+get_mat() const {
+  return get_transform()->get_mat();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_mat(const LMatrix4 &mat) {
+  set_transform(TransformState::make_mat(mat));
+}
+
+/**
+ *
+ */
+INLINE const TransformState *InstanceList::Instance::
+get_transform() const {
+  return _transform.p();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_transform(CPT(TransformState) transform) {
+  _transform = std::move(transform);
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(InstanceList::Instance instance) {
+  _instances.push_back(std::move(instance));
+  _cached_array.clear();
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(const TransformState *transform) {
+  _instances.push_back(Instance(transform));
+  _cached_array.clear();
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(const LPoint3 &pos, const LVecBase3 &hpr, const LVecBase3 &scale) {
+
+  append(TransformState::make_pos_hpr_scale(pos, hpr, scale));
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(const LPoint3 &pos, const LQuaternion &quat, const LVecBase3 &scale) {
+
+  append(TransformState::make_pos_quat_scale(pos, quat, scale));
+}
+
+/**
+ * Returns the total number of instances in the list.
+ */
+INLINE size_t InstanceList::
+size() const {
+  return _instances.size();
+}
+
+/**
+ * Returns the nth instance in the list.
+ */
+INLINE const InstanceList::Instance &InstanceList::
+operator [] (size_t n) const {
+  return _instances[n];
+}
+
+/**
+ * Returns the nth instance in the list.
+ */
+INLINE InstanceList::Instance &InstanceList::
+operator [] (size_t n) {
+  _cached_array.clear();
+  return _instances[n];
+}
+
+/**
+ * Empties the instance list.
+ */
+INLINE void InstanceList::
+clear() {
+  _instances.clear();
+  _cached_array.clear();
+}
+
+/**
+ * Reserves space for the given number of instances.
+ */
+INLINE void InstanceList::
+reserve(size_t n) {
+  _instances.reserve(n);
+}
+
+/**
+ * Returns true if the InstanceList is empty.
+ */
+INLINE bool InstanceList::
+empty() const {
+  return _instances.empty();
+}
+
+/**
+ * Returns an iterator to the beginning of the list.
+ */
+INLINE InstanceList::iterator InstanceList::
+begin() {
+  return _instances.begin();
+}
+
+/**
+ * Returns a const_iterator to the beginning of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+begin() const {
+  return _instances.begin();
+}
+
+/**
+ * Returns a const_iterator to the beginning of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+cbegin() const {
+  return _instances.cbegin();
+}
+
+/**
+ * Returns an iterator to the end of the list.
+ */
+INLINE InstanceList::iterator InstanceList::
+end() {
+  return _instances.end();
+}
+
+/**
+ * Returns a const_iterator to the end of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+end() const {
+  return _instances.end();
+}
+
+/**
+ * Returns a const_iterator to the end of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+cend() const {
+  return _instances.cend();
+}

+ 213 - 0
panda/src/pgraph/instanceList.cxx

@@ -0,0 +1,213 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instanceList.cxx
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+#include "instanceList.h"
+#include "indent.h"
+#include "bamReader.h"
+#include "bamWriter.h"
+#include "bitArray.h"
+#include "geomVertexWriter.h"
+
+TypeHandle InstanceList::_type_handle;
+
+/**
+ * Required to implement CopyOnWriteObject.
+ */
+PT(CopyOnWriteObject) InstanceList::
+make_cow_copy() {
+  return new InstanceList(*this);
+}
+
+/**
+ *
+ */
+InstanceList::
+InstanceList() {
+}
+
+/**
+ *
+ */
+InstanceList::
+InstanceList(const InstanceList &copy) :
+  _instances(copy._instances)
+{
+}
+
+/**
+ *
+ */
+InstanceList::
+~InstanceList() {
+}
+
+/**
+ * Transforms all of the instances in the list by the indicated matrix.
+ */
+void InstanceList::
+xform(const LMatrix4 &mat) {
+
+}
+
+/**
+ * Returns an immutable copy without the bits turned on in the indicated mask.
+ */
+CPT(InstanceList) InstanceList::
+without(const BitArray &mask) const {
+  size_t num_instances = size();
+  size_t num_culled = (size_t)mask.get_num_on_bits();
+  if (num_culled == 0) {
+    return this;
+  }
+  else if (num_culled >= num_instances) {
+    static CPT(InstanceList) empty_list;
+    if (empty_list == nullptr) {
+      empty_list = new InstanceList;
+    }
+
+    nassertr(num_culled <= num_instances, empty_list);
+    return empty_list;
+  }
+
+  InstanceList *new_list = new InstanceList;
+  new_list->_instances.reserve(num_instances - num_culled);
+
+  for (size_t i = (size_t)mask.get_lowest_off_bit(); i < num_instances; ++i) {
+    if (!mask.get_bit(i)) {
+      new_list->_instances.push_back(_instances[i]);
+    }
+  }
+
+  return new_list;
+}
+
+/**
+ * Returns a GeomVertexArrayData containing the matrices.
+ */
+CPT(GeomVertexArrayData) InstanceList::
+get_array_data(const GeomVertexArrayFormat *format) const {
+  CPT(GeomVertexArrayData) array_data = _cached_array;
+  if (array_data != nullptr) {
+    if (array_data->get_array_format() == format) {
+      return array_data;
+    }
+  }
+
+  nassertr(format != nullptr, nullptr);
+
+  size_t num_instances = size();
+  PT(GeomVertexArrayData) new_array = new GeomVertexArrayData(format, GeomEnums::UH_stream);
+  new_array->unclean_set_num_rows(num_instances);
+
+  {
+    GeomVertexWriter writer(new_array, Thread::get_current_thread());
+    writer.set_column(InternalName::get_instance_matrix());
+    for (size_t i = 0; i < num_instances; ++i) {
+      writer.set_matrix4(_instances[i].get_mat());
+    }
+  }
+
+  _cached_array = new_array;
+  return new_array;
+}
+
+/**
+ *
+ */
+void InstanceList::
+output(std::ostream &out) const {
+  out << "InstanceList[" << size() << "]";
+}
+
+/**
+ *
+ */
+void InstanceList::
+write(std::ostream &out, int indent_level) const {
+  indent(out, indent_level) << "InstanceList[" << size() << "]:\n";
+  for (const Instance &instance : *this) {
+    indent(out, indent_level + 2) << *instance.get_transform() << "\n";
+  }
+}
+
+/**
+ * Tells the BamReader how to create objects of type InstanceList.
+ */
+void InstanceList::
+register_with_read_factory() {
+  BamReader::get_factory()->register_factory(get_class_type(), make_from_bam);
+}
+
+/**
+ * Writes the contents of this object to the datagram for shipping out to a
+ * Bam file.
+ */
+void InstanceList::
+write_datagram(BamWriter *manager, Datagram &dg) {
+  CopyOnWriteObject::write_datagram(manager, dg);
+
+  for (const Instance &instance : *(const InstanceList *)this) {
+    manager->write_pointer(dg, instance.get_transform());
+  }
+}
+
+/**
+ * Receives an array of pointers, one for each time manager->read_pointer()
+ * was called in fillin(). Returns the number of pointers processed.
+ */
+int InstanceList::
+complete_pointers(TypedWritable **p_list, BamReader *manager) {
+  int pi = CopyOnWriteObject::complete_pointers(p_list, manager);
+
+  for (Instance &instance : *this) {
+    instance = Instance(DCAST(TransformState, p_list[pi++]));
+  }
+
+  return pi;
+}
+
+/**
+ * This function is called by the BamReader's factory when a new object of
+ * type InstanceList is encountered in the Bam file.  It should create
+ * the InstanceList and extract its information from the file.
+ */
+TypedWritable *InstanceList::
+make_from_bam(const FactoryParams &params) {
+  InstanceList *object = new InstanceList;
+  DatagramIterator scan;
+  BamReader *manager;
+
+  parse_params(params, scan, manager);
+  object->fillin(scan, manager);
+
+  return object;
+}
+
+/**
+ * This internal function is called by make_from_bam to read in all of the
+ * relevant data from the BamFile for the new InstanceList.
+ */
+void InstanceList::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  CopyOnWriteObject::fillin(scan, manager);
+
+  size_t num_instances = scan.get_uint16();
+  _instances.clear();
+  _instances.resize(num_instances);
+
+  for (size_t i = 0; i < num_instances; ++i) {
+    manager->read_pointer(scan);
+  }
+
+  _cached_array.clear();
+}

+ 159 - 0
panda/src/pgraph/instanceList.h

@@ -0,0 +1,159 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instanceList.h
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+#ifndef INSTANCELIST_H
+#define INSTANCELIST_H
+
+#include "pandabase.h"
+#include "copyOnWriteObject.h"
+#include "transformState.h"
+#include "pvector.h"
+#include "geomVertexArrayData.h"
+
+class BitArray;
+class FactoryParams;
+
+/**
+ * This structure stores a list of per-instance data, used by InstancedNode.
+ *
+ * @since 1.11.0
+ */
+class EXPCL_PANDA_PGRAPH InstanceList : public CopyOnWriteObject {
+protected:
+  virtual PT(CopyOnWriteObject) make_cow_copy() override;
+
+PUBLISHED:
+  InstanceList();
+  InstanceList(const InstanceList &copy);
+  virtual ~InstanceList();
+  ALLOC_DELETED_CHAIN(InstanceList);
+
+  /**
+   * An individual instance in an InstanceList.
+   *
+   * @since 1.11.0
+   */
+  class EXPCL_PANDA_PGRAPH Instance {
+  public:
+    INLINE explicit Instance();
+    INLINE explicit Instance(CPT(TransformState) transform);
+
+  PUBLISHED:
+    INLINE LPoint3 get_pos() const;
+    INLINE void set_pos(const LPoint3 &);
+    INLINE void set_pos(PN_stdfloat x, PN_stdfloat y, PN_stdfloat z);
+
+    INLINE LVecBase3 get_hpr() const;
+    INLINE void set_hpr(const LVecBase3 &);
+    INLINE void set_hpr(PN_stdfloat h, PN_stdfloat p, PN_stdfloat r);
+
+    INLINE LQuaternion get_quat() const;
+    INLINE void set_quat(const LQuaternion &);
+
+    INLINE LVecBase3 get_scale() const;
+    INLINE void set_scale(const LVecBase3 &);
+    INLINE void set_scale(PN_stdfloat sx, PN_stdfloat sy, PN_stdfloat sz);
+
+    INLINE const LMatrix4 &get_mat() const;
+    INLINE void set_mat(const LMatrix4 &mat);
+
+    INLINE const TransformState *get_transform() const;
+    INLINE void set_transform(CPT(TransformState));
+    MAKE_PROPERTY(transform, get_transform);
+
+  private:
+    CPT(TransformState) _transform;
+  };
+
+  void append(Instance instance);
+  void append(const TransformState *transform);
+  void append(const LPoint3 &pos,
+              const LVecBase3 &hpr = LVecBase3(0),
+              const LVecBase3 &scale = LVecBase3(1));
+  void append(const LPoint3 &pos,
+              const LQuaternion &quat,
+              const LVecBase3 &scale = LVecBase3(1));
+
+  INLINE size_t size() const;
+  INLINE const Instance &operator [] (size_t n) const;
+  INLINE Instance &operator [] (size_t n);
+  INLINE void clear();
+  INLINE void reserve(size_t);
+
+  void xform(const LMatrix4 &mat);
+
+public:
+  typedef pvector<Instance> Instances;
+  typedef Instances::iterator iterator;
+  typedef Instances::const_iterator const_iterator;
+
+  INLINE bool empty() const;
+
+  INLINE iterator begin();
+  INLINE const_iterator begin() const;
+  INLINE const_iterator cbegin() const;
+
+  INLINE iterator end();
+  INLINE const_iterator end() const;
+  INLINE const_iterator cend() const;
+
+  CPT(InstanceList) without(const BitArray &mask) const;
+
+  CPT(GeomVertexArrayData) get_array_data(const GeomVertexArrayFormat *format) const;
+
+  virtual void output(std::ostream &out) const;
+  virtual void write(std::ostream &out, int indent_level) const;
+
+private:
+  Instances _instances;
+
+  mutable CPT(GeomVertexArrayData) _cached_array;
+
+public:
+  static void register_with_read_factory();
+  virtual void write_datagram(BamWriter *manager, Datagram &dg) override;
+  virtual int complete_pointers(TypedWritable **plist, BamReader *manager) override;
+
+protected:
+  static TypedWritable *make_from_bam(const FactoryParams &params);
+  void fillin(DatagramIterator &scan, BamReader *manager) override;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    CopyOnWriteObject::init_type();
+    register_type(_type_handle, "InstanceList",
+                  CopyOnWriteObject::get_class_type());
+  }
+  virtual TypeHandle get_type() const override {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() override {
+    init_type();
+    return get_class_type();
+  }
+
+private:
+  static TypeHandle _type_handle;
+};
+
+inline std::ostream &operator <<(std::ostream &out, const InstanceList &list) {
+  list.output(out);
+  return out;
+}
+
+#include "instanceList.I"
+
+#endif

+ 39 - 0
panda/src/pgraph/instancedNode.I

@@ -0,0 +1,39 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instancedNode.I
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+/**
+ * Returns the number of instances.
+ */
+INLINE size_t InstancedNode::
+get_num_instances() const {
+  Thread *current_thread = Thread::get_current_thread();
+  CDReader cdata(_cycler, current_thread);
+  nassertr_always(cdata->_instances != nullptr, 0);
+  return cdata->_instances.get_read_pointer(current_thread)->size();
+}
+
+/**
+ * Returns the list of instances.
+ */
+INLINE CPT(InstanceList) InstancedNode::
+get_instances(Thread *current_thread) const {
+  CDReader cdata(_cycler, current_thread);
+  return cdata->_instances.get_read_pointer(current_thread);
+}
+
+/**
+ *
+ */
+INLINE InstancedNode::CData::
+CData() : _instances(new InstanceList) {
+}

+ 492 - 0
panda/src/pgraph/instancedNode.cxx

@@ -0,0 +1,492 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instancedNode.cxx
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+#include "instancedNode.h"
+#include "boundingBox.h"
+#include "boundingSphere.h"
+#include "cullTraverserData.h"
+#include "cullPlanes.h"
+
+TypeHandle InstancedNode::_type_handle;
+
+/**
+ *
+ */
+InstancedNode::
+InstancedNode(const std::string &name) :
+  PandaNode(name)
+{
+  set_cull_callback();
+}
+
+/**
+ *
+ */
+InstancedNode::
+InstancedNode(const InstancedNode &copy) :
+  PandaNode(copy),
+  _cycler(copy._cycler)
+{
+  set_cull_callback();
+}
+
+/**
+ *
+ */
+InstancedNode::
+~InstancedNode() {
+}
+
+/**
+ * Returns a newly-allocated PandaNode that is a shallow copy of this one.  It
+ * will be a different pointer, but its internal data may or may not be shared
+ * with that of the original PandaNode.  No children will be copied.
+ */
+PandaNode *InstancedNode::
+make_copy() const {
+  return new InstancedNode(*this);
+}
+
+/**
+ * Returns the list of instances.
+ *
+ * Don't call this in a downstream thread unless you don't mind it blowing
+ * away other changes you might have recently made in an upstream thread.
+ */
+PT(InstanceList) InstancedNode::
+modify_instances() {
+  Thread *current_thread = Thread::get_current_thread();
+  CDWriter cdata(_cycler, true, current_thread);
+  PT(InstanceList) instances = cdata->_instances.get_write_pointer();
+  mark_bounds_stale(current_thread->get_pipeline_stage(), current_thread);
+  mark_bam_modified();
+  return instances;
+}
+
+/**
+ * Entirely replaces the list of instances with the given list.
+ *
+ * Don't call this in a downstream thread unless you don't mind it blowing
+ * away other changes you might have recently made in an upstream thread.
+ */
+void InstancedNode::
+set_instances(PT(InstanceList) instances) {
+  Thread *current_thread = Thread::get_current_thread();
+  CDWriter cdata(_cycler, true);
+  cdata->_instances = std::move(instances);
+  mark_bounds_stale(current_thread->get_pipeline_stage(), current_thread);
+  mark_bam_modified();
+}
+
+/**
+ * Returns true if it is generally safe to flatten out this particular kind of
+ * PandaNode by duplicating instances (by calling dupe_for_flatten()), false
+ * otherwise (for instance, a Camera cannot be safely flattened, because the
+ * Camera pointer itself is meaningful).
+ */
+bool InstancedNode::
+safe_to_flatten() const {
+  return false;
+}
+
+/**
+ * Returns true if it is generally safe to combine this particular kind of
+ * PandaNode with other kinds of PandaNodes of compatible type, adding
+ * children or whatever.  For instance, an LODNode should not be combined with
+ * any other PandaNode, because its set of children is meaningful.
+ */
+bool InstancedNode::
+safe_to_combine() const {
+  // This can happen iff the instance list is identical; see combine_with().
+  return true;
+}
+
+/**
+ * Transforms the contents of this node by the indicated matrix, if it means
+ * anything to do so.  For most kinds of nodes, this does nothing.
+ */
+void InstancedNode::
+xform(const LMatrix4 &mat) {
+}
+
+/**
+ * Collapses this node with the other node, if possible, and returns a pointer
+ * to the combined node, or NULL if the two nodes cannot safely be combined.
+ *
+ * The return value may be this, other, or a new node altogether.
+ *
+ * This function is called from GraphReducer::flatten(), and need not deal
+ * with children; its job is just to decide whether to collapse the two nodes
+ * and what the collapsed node should look like.
+ */
+PandaNode *InstancedNode::
+combine_with(PandaNode *other) {
+  if (is_exact_type(get_class_type()) && other->is_exact_type(get_class_type())) {
+    InstancedNode *iother = DCAST(InstancedNode, other);
+
+    // Only combine them if the instance lists for both are identical.
+    Thread *current_thread = Thread::get_current_thread();
+    CDReader this_cdata(_cycler, current_thread);
+    CDReader other_cdata(iother->_cycler, current_thread);
+    CPT(InstanceList) this_instances = this_cdata->_instances.get_read_pointer(current_thread);
+    CPT(InstanceList) other_instances = other_cdata->_instances.get_read_pointer(current_thread);
+    if (this_instances == other_instances) {
+      return this;
+    }
+  }
+
+  return nullptr;
+}
+
+/**
+ * This is used to support NodePath::calc_tight_bounds().  It is not intended
+ * to be called directly, and it has nothing to do with the normal Panda
+ * bounding-volume computation.
+ *
+ * If the node contains any geometry, this updates min_point and max_point to
+ * enclose its bounding box.  found_any is to be set true if the node has any
+ * geometry at all, or left alone if it has none.  This method may be called
+ * over several nodes, so it may enter with min_point, max_point, and
+ * found_any already set.
+ */
+CPT(TransformState) InstancedNode::
+calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point, bool &found_any,
+                  const TransformState *transform, Thread *current_thread) const {
+
+  CPT(InstanceList) instances = get_instances(current_thread);
+  CPT(TransformState) next_transform = transform->compose(get_transform(current_thread));
+
+  for (size_t ii = 0; ii < instances->size(); ++ii) {
+    CPT(TransformState) instance_transform = next_transform->compose((*instances)[ii].get_transform());
+
+    Children cr = get_children(current_thread);
+    size_t num_children = cr.get_num_children();
+    for (size_t ci = 0; ci < num_children; ++ci) {
+      cr.get_child(ci)->calc_tight_bounds(min_point, max_point,
+                                          found_any, instance_transform,
+                                          current_thread);
+    }
+  }
+
+  return next_transform;
+}
+
+/**
+ * This function will be called during the cull traversal to perform any
+ * additional operations that should be performed at cull time.  This may
+ * include additional manipulation of render state or additional
+ * visible/invisible decisions, or any other arbitrary operation.
+ *
+ * Note that this function will *not* be called unless set_cull_callback() is
+ * called in the constructor of the derived class.  It is necessary to call
+ * set_cull_callback() to indicated that we require cull_callback() to be
+ * called.
+ *
+ * By the time this function is called, the node has already passed the
+ * bounding-volume test for the viewing frustum, and the node's transform and
+ * state have already been applied to the indicated CullTraverserData object.
+ *
+ * The return value is true if this node should be visible, or false if it
+ * should be culled.
+ */
+bool InstancedNode::
+cull_callback(CullTraverser *trav, CullTraverserData &data) {
+  Thread *current_thread = trav->get_current_thread();
+
+  CPT(InstanceList) instances = get_instances(current_thread);
+
+  if (data._instances != nullptr) {
+    // We are already under an instanced node.  Create a new combined list.
+    InstanceList *new_list = new InstanceList();
+    new_list->reserve(data._instances->size() * instances->size());
+    for (const InstanceList::Instance &parent_instance : *data._instances) {
+      for (const InstanceList::Instance &this_instance : *instances) {
+        new_list->append(parent_instance.get_transform()->compose(this_instance.get_transform()));
+      }
+    }
+    instances = new_list;
+  }
+
+  if (data._view_frustum != nullptr || !data._cull_planes->is_empty()) {
+    // Culling is on, so we need to figure out which instances are visible.
+    Children children = data.node_reader()->get_children();
+    data.node_reader()->release();
+
+    // Keep track of which instances should be culled away.
+    BitArray culled_instances;
+    culled_instances.set_range(0, instances->size());
+
+    for (size_t ii = 0; ii < instances->size(); ++ii) {
+      CullTraverserData instance_data(data);
+      instance_data.apply_transform((*instances)[ii].get_transform());
+
+      for (size_t ci = 0; ci < children.size(); ++ci) {
+        CullTraverserData child_data(instance_data, children.get_child(ci));
+        if (child_data.is_in_view(trav->get_camera_mask())) {
+          // Yep, the instance is in view.
+          culled_instances.clear_bit(ii);
+          break;
+        }
+      }
+    }
+
+    instances = instances->without(culled_instances);
+  } else {
+    data.node_reader()->release();
+  }
+
+  if (instances->empty()) {
+    // There are no instances, or they are all culled away.
+    return false;
+  }
+
+  data._instances = std::move(instances);
+
+  // Disable culling from this point on, for now.  It's probably not worth it
+  // to keep lists of transformed bounding volumes for each instance.
+  data._view_frustum = nullptr;
+  data._cull_planes = CullPlanes::make_empty();
+
+  return true;
+
+  /*
+  for (const InstanceList::Instance &instance : *instances) {
+    CullTraverserData instance_data(data);
+    instance_data.apply_transform(instance.get_transform());
+    trav->traverse_below(instance_data);
+  }
+  return false;
+  */
+}
+
+/**
+ *
+ */
+void InstancedNode::
+output(std::ostream &out) const {
+  PandaNode::output(out);
+  out << " (" << get_num_instances() << " instances)";
+}
+
+/**
+ * Returns a BoundingVolume that represents the external contents of the node.
+ * This should encompass the internal bounds, but also the bounding volumes of
+ * of all this node's children, which are passed in.
+ */
+void InstancedNode::
+compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                        BoundingVolume::BoundsType btype,
+                        const BoundingVolume **volumes, size_t num_volumes,
+                        int pipeline_stage, Thread *current_thread) const {
+
+  CPT(InstanceList) instances = get_instances(current_thread);
+
+  PT(GeometricBoundingVolume) gbv;
+  if (btype == BoundingVolume::BT_sphere) {
+    gbv = new BoundingSphere;
+  } else {
+    gbv = new BoundingBox;
+  }
+
+  if (num_volumes == 0 || instances->empty()) {
+    external_bounds = gbv;
+    return;
+  }
+
+  // Compute a sphere at the origin, encompassing the children.  This may not
+  // be the most optimal shape, but it allows us to easily estimate a bounding
+  // volume without having to take each instance transform into account.
+  PN_stdfloat max_radius = 0;
+  LVector3 max_abs_box(0);
+
+  for (size_t i = 0; i < num_volumes; ++i) {
+    const BoundingVolume *child_volume = volumes[i];
+    if (child_volume->is_empty()) {
+      continue;
+    }
+    if (child_volume->is_infinite()) {
+      gbv->set_infinite();
+      break;
+    }
+    if (const BoundingSphere *child_sphere = child_volume->as_bounding_sphere()) {
+      max_radius = child_sphere->get_center().length() + child_sphere->get_radius();
+    }
+    else if (const FiniteBoundingVolume *child_finite = child_volume->as_finite_bounding_volume()) {
+      LPoint3 min1 = child_finite->get_min();
+      LPoint3 max1 = child_finite->get_max();
+      max_abs_box.set(
+        std::max(max_abs_box[0], std::max(std::fabs(min1[0]), std::fabs(max1[0]))),
+        std::max(max_abs_box[1], std::max(std::fabs(min1[1]), std::fabs(max1[1]))),
+        std::max(max_abs_box[2], std::max(std::fabs(min1[2]), std::fabs(max1[2]))));
+    }
+    else {
+      gbv->set_infinite();
+      break;
+    }
+  }
+
+  max_radius = std::max(max_radius, max_abs_box.length());
+  if (max_radius == 0 || gbv->is_infinite()) {
+    external_bounds = gbv;
+    return;
+  }
+
+  // Now that we have a sphere encompassing the children, we will make a box
+  // surrounding all the instances, extended by the computed radius.
+  LPoint3 min_point = (*instances)[0].get_pos();
+  LPoint3 max_point(min_point);
+
+  for (const InstanceList::Instance &instance : *instances) {
+    // To make the math easier and not have to take rotations into account, we
+    // take the highest scale component and multiply it by the radius of the
+    // bounding sphere on the origin we just calculated.
+    LVecBase3 scale = instance.get_scale();
+    PN_stdfloat max_scale = std::max(std::fabs(scale[0]), std::max(std::fabs(scale[1]), std::fabs(scale[2])));
+    PN_stdfloat inst_radius = max_scale * max_radius;
+    LVector3 extends_by(inst_radius);
+    LPoint3 pos = instance.get_pos();
+    min_point = min_point.fmin(pos - extends_by);
+    max_point = max_point.fmax(pos + extends_by);
+  }
+
+  if (min_point == max_point) {
+    external_bounds = gbv;
+    return;
+  }
+
+  // If we really need to make a sphere, we use the center of the bounding box
+  // as our sphere center, and iterate again to find the furthest instance.
+  if (btype == BoundingVolume::BT_sphere) {
+    LPoint3 center = (min_point + max_point) * 0.5;
+
+    PN_stdfloat max_distance = 0;
+    for (const InstanceList::Instance &instance : *instances) {
+      LVecBase3 scale = instance.get_scale();
+      PN_stdfloat max_scale = std::max(std::fabs(scale[0]), std::max(std::fabs(scale[1]), std::fabs(scale[2])));
+      PN_stdfloat inst_radius = max_scale * max_radius;
+      PN_stdfloat distance = (instance.get_pos() - center).length() + inst_radius;
+      max_distance = std::max(max_distance, distance);
+    }
+
+    if (max_distance == 0) {
+      external_bounds = gbv;
+      return;
+    }
+    ((BoundingSphere *)gbv.p())->set_center(center);
+    ((BoundingSphere *)gbv.p())->set_radius(max_distance);
+  } else {
+    ((BoundingBox *)gbv.p())->set_min_max(min_point, max_point);
+  }
+
+  // If we have a transform, apply it to the bounding volume we just
+  // computed.
+  CPT(TransformState) transform = get_transform(current_thread);
+  if (!transform->is_identity()) {
+    gbv->xform(transform->get_mat());
+  }
+
+  external_bounds = gbv;
+}
+
+/**
+ * Tells the BamReader how to create objects of type GeomNode.
+ */
+void InstancedNode::
+register_with_read_factory() {
+  BamReader::get_factory()->register_factory(get_class_type(), make_from_bam);
+}
+
+/**
+ * Writes the contents of this object to the datagram for shipping out to a
+ * Bam file.
+ */
+void InstancedNode::
+write_datagram(BamWriter *manager, Datagram &dg) {
+  PandaNode::write_datagram(manager, dg);
+  manager->write_cdata(dg, _cycler);
+}
+
+/**
+ * This function is called by the BamReader's factory when a new object of
+ * type InstancedNode is encountered in the Bam file.  It should create the
+ * InstancedNode and extract its information from the file.
+ */
+TypedWritable *InstancedNode::
+make_from_bam(const FactoryParams &params) {
+  InstancedNode *node = new InstancedNode("");
+  DatagramIterator scan;
+  BamReader *manager;
+
+  parse_params(params, scan, manager);
+  node->fillin(scan, manager);
+
+  return node;
+}
+
+/**
+ * This internal function is called by make_from_bam to read in all of the
+ * relevant data from the BamFile for the new InstancedNode.
+ */
+void InstancedNode::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  PandaNode::fillin(scan, manager);
+  manager->read_cdata(scan, _cycler);
+}
+
+/**
+ *
+ */
+InstancedNode::CData::
+CData(const InstancedNode::CData &copy) :
+  _instances(copy._instances)
+{
+}
+
+/**
+ *
+ */
+CycleData *InstancedNode::CData::
+make_copy() const {
+  return new CData(*this);
+}
+
+/**
+ * Writes the contents of this object to the datagram for shipping out to a
+ * Bam file.
+ */
+void InstancedNode::CData::
+write_datagram(BamWriter *manager, Datagram &dg) const {
+  CPT(InstanceList) instances = _instances.get_read_pointer();
+  manager->write_pointer(dg, instances.p());
+}
+
+/**
+ * Receives an array of pointers, one for each time manager->read_pointer()
+ * was called in fillin(). Returns the number of pointers processed.
+ */
+int InstancedNode::CData::
+complete_pointers(TypedWritable **p_list, BamReader *manager) {
+  int pi = CycleData::complete_pointers(p_list, manager);
+
+  _instances = DCAST(InstanceList, p_list[pi++]);
+  return pi;
+}
+
+/**
+ * This internal function is called by make_from_bam to read in all of the
+ * relevant data from the BamFile for the new GeomNode.
+ */
+void InstancedNode::CData::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  manager->read_pointer(scan);
+}

+ 136 - 0
panda/src/pgraph/instancedNode.h

@@ -0,0 +1,136 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instancedNode.h
+ * @author rdb
+ * @date 2019-03-09
+ */
+
+#ifndef INSTANCEDNODE_H
+#define INSTANCEDNODE_H
+
+#include "pandabase.h"
+#include "pandaNode.h"
+#include "copyOnWritePointer.h"
+#include "instanceList.h"
+
+/**
+ * This is a special node that instances its contents using a list of
+ * transforms that get applied on top of the node's own transform.  This is a
+ * bit more limited than the regular instance_to mechanism, but it is a better
+ * choice for hardware instancing.
+ *
+ * For best performance, it is highly recommended to flatten the nodes under
+ * this (by calling flatten_strong()), since culling will not be performed for
+ * individual sub-nodes under each instance.
+ *
+ * @since 1.11.0
+ */
+class EXPCL_PANDA_PGRAPH InstancedNode : public PandaNode {
+PUBLISHED:
+  explicit InstancedNode(const std::string &name);
+
+protected:
+  InstancedNode(const InstancedNode &copy);
+
+public:
+  virtual ~InstancedNode();
+  virtual PandaNode *make_copy() const override;
+
+  INLINE size_t get_num_instances() const;
+  INLINE CPT(InstanceList) get_instances(Thread *current_thread = Thread::get_current_thread()) const;
+  PT(InstanceList) modify_instances();
+  void set_instances(PT(InstanceList) instances);
+
+PUBLISHED:
+  MAKE_PROPERTY(instances, modify_instances, set_instances);
+
+public:
+  virtual bool safe_to_flatten() const override;
+  virtual bool safe_to_combine() const override;
+  virtual void xform(const LMatrix4 &mat) override;
+  virtual PandaNode *combine_with(PandaNode *other) override;
+
+  virtual CPT(TransformState)
+    calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point,
+                      bool &found_any,
+                      const TransformState *transform,
+                      Thread *current_thread) const override;
+
+  virtual bool cull_callback(CullTraverser *trav, CullTraverserData &data) override;
+
+  virtual void output(std::ostream &out) const override;
+
+protected:
+  virtual void compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                                       BoundingVolume::BoundsType btype,
+                                       const BoundingVolume **volumes,
+                                       size_t num_volumes,
+                                       int pipeline_stage,
+                                       Thread *current_thread) const override;
+
+private:
+  // This is the data that must be cycled between pipeline stages.
+  class EXPCL_PANDA_PGRAPH CData final : public CycleData {
+  public:
+    INLINE CData();
+    CData(const CData &copy);
+    virtual CycleData *make_copy() const override;
+    virtual void write_datagram(BamWriter *manager, Datagram &dg) const override;
+    virtual int complete_pointers(TypedWritable **plist, BamReader *manager) override;
+    virtual void fillin(DatagramIterator &scan, BamReader *manager) override;
+    virtual TypeHandle get_parent_type() const override {
+      return InstancedNode::get_class_type();
+    }
+
+  private:
+    COWPT(InstanceList) _instances;
+
+    friend class InstancedNode;
+  };
+
+  PipelineCycler<CData> _cycler;
+  typedef CycleDataReader<CData> CDReader;
+  typedef CycleDataWriter<CData> CDWriter;
+  typedef CycleDataStageReader<CData> CDStageReader;
+  typedef CycleDataLockedStageReader<CData> CDLockedStageReader;
+  typedef CycleDataStageWriter<CData> CDStageWriter;
+
+public:
+  static void register_with_read_factory();
+  virtual void write_datagram(BamWriter *manager, Datagram &dg) override;
+
+protected:
+  static TypedWritable *make_from_bam(const FactoryParams &params);
+  void fillin(DatagramIterator &scan, BamReader *manager) override;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    PandaNode::init_type();
+    register_type(_type_handle, "InstancedNode",
+                  PandaNode::get_class_type());
+    CData::init_type();
+  }
+  virtual TypeHandle get_type() const override {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() override {
+    init_type();
+    return get_class_type();
+  }
+
+private:
+  static TypeHandle _type_handle;
+};
+
+#include "instancedNode.I"
+
+#endif

+ 2 - 0
panda/src/pgraph/p3pgraph_composite2.cxx

@@ -20,3 +20,5 @@
 #include "geomDrawCallbackData.cxx"
 #include "geomDrawCallbackData.cxx"
 #include "geomNode.cxx"
 #include "geomNode.cxx"
 #include "geomTransformer.cxx"
 #include "geomTransformer.cxx"
+#include "instanceList.cxx"
+#include "instancedNode.cxx"

+ 57 - 35
panda/src/pgraph/pandaNode.cxx

@@ -2315,6 +2315,59 @@ compute_internal_bounds(CPT(BoundingVolume) &internal_bounds,
   internal_vertices = 0;
   internal_vertices = 0;
 }
 }
 
 
+/**
+ * Returns a BoundingVolume that represents the external contents of the node.
+ * This should encompass the internal bounds, but also the bounding volumes of
+ * of all this node's children, which are passed in.
+ */
+void PandaNode::
+compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                        BoundingVolume::BoundsType btype,
+                        const BoundingVolume **volumes, size_t num_volumes,
+                        int pipeline_stage, Thread *current_thread) const {
+
+  CPT(TransformState) transform = get_transform(current_thread);
+  PT(GeometricBoundingVolume) gbv;
+
+  if (btype == BoundingVolume::BT_box) {
+    gbv = new BoundingBox;
+  }
+  else if (btype == BoundingVolume::BT_sphere || !transform->is_identity()) {
+    gbv = new BoundingSphere;
+  }
+  else {
+    // If all of the child volumes are a BoundingBox, and we have no
+    // transform, then our volume is also a BoundingBox.
+    bool all_box = true;
+
+    for (size_t i = 0; i < num_volumes; ++i) {
+      if (volumes[i]->as_bounding_box() == nullptr) {
+        all_box = false;
+      }
+    }
+
+    if (all_box) {
+      gbv = new BoundingBox;
+    } else {
+      gbv = new BoundingSphere;
+    }
+  }
+
+  if (num_volumes > 0) {
+    const BoundingVolume **child_begin = &volumes[0];
+    const BoundingVolume **child_end = child_begin + num_volumes;
+    ((BoundingVolume *)gbv)->around(child_begin, child_end);
+
+    // If we have a transform, apply it to the bounding volume we just
+    // computed.
+    if (!transform->is_identity()) {
+      gbv->xform(transform->get_mat());
+    }
+  }
+
+  external_bounds = gbv;
+}
+
 /**
 /**
  * Called after a scene graph update that either adds or remove parents from
  * Called after a scene graph update that either adds or remove parents from
  * this node, this just provides a hook for derived PandaNode objects that
  * this node, this just provides a hook for derived PandaNode objects that
@@ -3263,7 +3316,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
 #endif
     int child_volumes_i = 0;
     int child_volumes_i = 0;
 
 
-    bool all_box = true;
     CPT(BoundingVolume) internal_bounds = nullptr;
     CPT(BoundingVolume) internal_bounds = nullptr;
 
 
     if (update_bounds) {
     if (update_bounds) {
@@ -3276,9 +3328,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
 #endif
         nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
         nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
         child_volumes[child_volumes_i++] = internal_bounds;
         child_volumes[child_volumes_i++] = internal_bounds;
-        if (internal_bounds->as_bounding_box() == nullptr) {
-          all_box = false;
-        }
       }
       }
     }
     }
 
 
@@ -3374,9 +3423,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
 #endif
             nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
             nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
             child_volumes[child_volumes_i++] = child_cdataw->_external_bounds;
             child_volumes[child_volumes_i++] = child_cdataw->_external_bounds;
-            if (child_cdataw->_external_bounds->as_bounding_box() == nullptr) {
-              all_box = false;
-            }
           }
           }
           num_vertices += child_cdataw->_nested_vertices;
           num_vertices += child_cdataw->_nested_vertices;
         }
         }
@@ -3429,9 +3475,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
 #endif
             nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
             nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
             child_volumes[child_volumes_i++] = child_cdata->_external_bounds;
             child_volumes[child_volumes_i++] = child_cdata->_external_bounds;
-            if (child_cdata->_external_bounds->as_bounding_box() == nullptr) {
-              all_box = false;
-            }
           }
           }
           num_vertices += child_cdata->_nested_vertices;
           num_vertices += child_cdata->_nested_vertices;
         }
         }
@@ -3485,38 +3528,17 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
         if (update_bounds) {
         if (update_bounds) {
           cdataw->_nested_vertices = num_vertices;
           cdataw->_nested_vertices = num_vertices;
 
 
-          CPT(TransformState) transform = get_transform(current_thread);
-          PT(GeometricBoundingVolume) gbv;
-
           BoundingVolume::BoundsType btype = cdataw->_bounds_type;
           BoundingVolume::BoundsType btype = cdataw->_bounds_type;
           if (btype == BoundingVolume::BT_default) {
           if (btype == BoundingVolume::BT_default) {
             btype = bounds_type;
             btype = bounds_type;
           }
           }
 
 
-          if (btype == BoundingVolume::BT_box ||
-              (btype != BoundingVolume::BT_sphere && all_box && transform->is_identity())) {
-            // If all of the child volumes are a BoundingBox, and we have no
-            // transform, then our volume is also a BoundingBox.
+          compute_external_bounds(cdataw->_external_bounds, btype,
+                                  child_volumes, child_volumes_i,
+                                  pipeline_stage, current_thread);
 
 
-            gbv = new BoundingBox;
-          } else {
-            // Otherwise, it's a sphere.
-            gbv = new BoundingSphere;
-          }
-
-          if (child_volumes_i > 0) {
-            const BoundingVolume **child_begin = &child_volumes[0];
-            const BoundingVolume **child_end = child_begin + child_volumes_i;
-            ((BoundingVolume *)gbv)->around(child_begin, child_end);
-
-            // If we have a transform, apply it to the bounding volume we just
-            // computed.
-            if (!transform->is_identity()) {
-              gbv->xform(transform->get_mat());
-            }
-          }
+          nassertr(cdataw->_external_bounds != nullptr, cdataw);
 
 
-          cdataw->_external_bounds = gbv;
           cdataw->_last_bounds_update = next_update;
           cdataw->_last_bounds_update = next_update;
         }
         }
 
 

+ 6 - 0
panda/src/pgraph/pandaNode.h

@@ -351,6 +351,12 @@ protected:
                                        int &internal_vertices,
                                        int &internal_vertices,
                                        int pipeline_stage,
                                        int pipeline_stage,
                                        Thread *current_thread) const;
                                        Thread *current_thread) const;
+  virtual void compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                                       BoundingVolume::BoundsType btype,
+                                       const BoundingVolume **volumes,
+                                       size_t num_volumes,
+                                       int pipeline_stage,
+                                       Thread *current_thread) const;
   virtual void parents_changed();
   virtual void parents_changed();
   virtual void children_changed();
   virtual void children_changed();
   virtual void transform_changed();
   virtual void transform_changed();

+ 1 - 1
panda/src/pgraph/shaderAttrib.I

@@ -82,7 +82,7 @@ get_shader_priority() const {
 
 
 /**
 /**
  * Returns the number of geometry instances.  A value of 0 means not to use
  * Returns the number of geometry instances.  A value of 0 means not to use
- * instancing at all.
+ * instancing at all.  This value is ignored if F_hardware_instancing is set.
  */
  */
 INLINE int ShaderAttrib::
 INLINE int ShaderAttrib::
 get_instance_count() const {
 get_instance_count() const {

+ 2 - 0
panda/src/pgraph/shaderAttrib.cxx

@@ -249,6 +249,8 @@ set_shader_inputs(const pvector<ShaderInput> &inputs) const {
  * Sets the geometry instance count.  Do not confuse this with instanceTo,
  * Sets the geometry instance count.  Do not confuse this with instanceTo,
  * which is used for animation instancing, and has nothing to do with this.  A
  * which is used for animation instancing, and has nothing to do with this.  A
  * value of 0 means not to use instancing at all.
  * value of 0 means not to use instancing at all.
+ *
+ * This value should not be set if F_hardware_instancing is also set.
  */
  */
 CPT(RenderAttrib) ShaderAttrib::
 CPT(RenderAttrib) ShaderAttrib::
 set_instance_count(int instance_count) const {
 set_instance_count(int instance_count) const {

+ 1 - 0
panda/src/pgraph/shaderAttrib.h

@@ -51,6 +51,7 @@ PUBLISHED:
     F_subsume_alpha_test  = 1 << 1,  // Shader promises to subsume the alpha test using TEXKILL
     F_subsume_alpha_test  = 1 << 1,  // Shader promises to subsume the alpha test using TEXKILL
     F_hardware_skinning   = 1 << 2,  // Shader needs pre-animated vertices
     F_hardware_skinning   = 1 << 2,  // Shader needs pre-animated vertices
     F_shader_point_size   = 1 << 3,  // Shader provides point size, not RenderModeAttrib
     F_shader_point_size   = 1 << 3,  // Shader provides point size, not RenderModeAttrib
+    F_hardware_instancing = 1 << 4,  // Shader needs instance list
   };
   };
 
 
   INLINE bool               has_shader() const;
   INLINE bool               has_shader() const;

+ 2 - 2
panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx

@@ -472,14 +472,14 @@ end_frame(Thread *current_thread) {
 bool TinyGraphicsStateGuardian::
 bool TinyGraphicsStateGuardian::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
+                      size_t num_instances, bool force) {
 #ifndef NDEBUG
 #ifndef NDEBUG
   if (tinydisplay_cat.is_spam()) {
   if (tinydisplay_cat.is_spam()) {
     tinydisplay_cat.spam() << "begin_draw_primitives: " << *(data_reader->get_object()) << "\n";
     tinydisplay_cat.spam() << "begin_draw_primitives: " << *(data_reader->get_object()) << "\n";
   }
   }
 #endif  // NDEBUG
 #endif  // NDEBUG
 
 
-  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, force)) {
+  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, num_instances, force)) {
     return false;
     return false;
   }
   }
   nassertr(_data_reader != nullptr, false);
   nassertr(_data_reader != nullptr, false);

+ 1 - 1
panda/src/tinydisplay/tinyGraphicsStateGuardian.h

@@ -64,7 +64,7 @@ public:
 
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
                               bool force);
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader,
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader,