Browse Source

better implementation of vertex-column-alignment

David Rose 14 years ago
parent
commit
6824cb6928

+ 15 - 0
panda/src/egg2pg/config_egg2pg.cxx

@@ -184,6 +184,21 @@ ConfigVariableInt egg_vertex_max_num_joints
           "more than this number of joints, the joints with the lesser membership "
           "value are ignored.  Set this to -1 to allow any number of joints."));
 
+ConfigVariableBool egg_vertex_animation_align_16
+("egg-vertex-animation-align-16", 
+#ifdef LINMATH_VECTORIZE
+ true,
+#else
+ false,
+#endif
+ PRC_DESC("If this is true, then animated vertices will be created with 4-component "
+          "floats and aligned to 16-byte boundaries, to allow efficient vectorization "
+          "(e.g. SSE2) operations when computing animations.  If this is false, "
+          "animated vertices will be packed as tightly as possible, in the normal way, "
+          "which will optimize the amount of memory that must be sent to the graphics "
+          "card, but prevent the use of SSE2 to calculate animation.  This does not "
+          "affect unanimated vertices, which are always packed tightly."));
+
 ConfigureFn(config_egg2pg) {
   init_libegg2pg();
 }

+ 1 - 0
panda/src/egg2pg/config_egg2pg.h

@@ -54,6 +54,7 @@ extern EXPCL_PANDAEGG ConfigVariableBool egg_emulate_bface;
 extern EXPCL_PANDAEGG ConfigVariableBool egg_preload_simple_textures;
 extern EXPCL_PANDAEGG ConfigVariableDouble egg_vertex_membership_quantize;
 extern EXPCL_PANDAEGG ConfigVariableInt egg_vertex_max_num_joints;
+extern EXPCL_PANDAEGG ConfigVariableBool egg_vertex_animation_align_16;
 
 extern EXPCL_PANDAEGG void init_libegg2pg();
 

+ 27 - 6
panda/src/egg2pg/eggLoader.cxx

@@ -2213,15 +2213,36 @@ make_vertex_data(const EggRenderState *render_state,
     return (*di).second;
   }
 
+  bool align_16 = false;
+  if (is_dynamic) {
+    align_16 = egg_vertex_animation_align_16;
+  }
+  
   PT(GeomVertexArrayFormat) array_format = new GeomVertexArrayFormat;
-  array_format->add_column
-    (InternalName::get_vertex(), vertex_pool->get_num_dimensions(),
-     Geom::NT_stdfloat, Geom::C_point);
+  if (align_16) {
+    // Enforce a 4-component float and a 16-byte alignment.
+    array_format->add_column
+      (InternalName::get_vertex(), 4,
+       Geom::NT_stdfloat, Geom::C_point, -1, 16);
+  } else {
+    // Allow a tightly-packed 3-component float.
+    array_format->add_column
+      (InternalName::get_vertex(), vertex_pool->get_num_dimensions(),
+       Geom::NT_stdfloat, Geom::C_point);
+  }
 
   if (vertex_pool->has_normals()) {
-    array_format->add_column
-      (InternalName::get_normal(), 3, 
-       Geom::NT_stdfloat, Geom::C_vector);
+    if (align_16) {
+      // Enforce a 4-component float and a 16-byte alignment.
+      array_format->add_column
+        (InternalName::get_normal(), 4,
+         Geom::NT_stdfloat, Geom::C_vector, -1, 16);
+    } else {
+      // Allow a tightly-packed 3-component float.
+      array_format->add_column
+        (InternalName::get_normal(), 3,
+         Geom::NT_stdfloat, Geom::C_vector);
+    }
   }
 
   if (!ignore_color) {

+ 26 - 14
panda/src/glstuff/glGeomMunger_src.cxx

@@ -96,7 +96,7 @@ munge_format_impl(const GeomVertexFormat *orig,
     // Replace the existing color format with the new format.
     new_array_format->add_column
       (InternalName::get_color(), 4, NT_uint8,
-       C_color, color_type->get_start());
+       C_color, color_type->get_start(), color_type->get_column_alignment());
   }
 
   if (animation.get_animation_type() == AT_hardware) {
@@ -144,7 +144,8 @@ munge_format_impl(const GeomVertexFormat *orig,
       const GeomVertexColumn *column = format->get_column(i);
       PT(GeomVertexArrayFormat) new_array_format = new GeomVertexArrayFormat;
       new_array_format->add_column(column->get_name(), column->get_num_components(),
-                                   column->get_numeric_type(), column->get_contents());
+                                   column->get_numeric_type(), column->get_contents(),
+                                   -1, column->get_column_alignment());
       new_format->add_array(new_array_format);
     }
     format = GeomVertexFormat::register_format(new_format);
@@ -158,7 +159,8 @@ munge_format_impl(const GeomVertexFormat *orig,
     if (column != (const GeomVertexColumn *)NULL) {
       new_array_format->add_column
         (column->get_name(), column->get_num_components(), 
-         column->get_numeric_type(), column->get_contents());
+         column->get_numeric_type(), column->get_contents(),
+         -1, column->get_column_alignment());
       new_format->remove_column(column->get_name());
     }
 
@@ -166,7 +168,8 @@ munge_format_impl(const GeomVertexFormat *orig,
     if (column != (const GeomVertexColumn *)NULL) {
       new_array_format->add_column
         (column->get_name(), column->get_num_components(), 
-         column->get_numeric_type(), column->get_contents());
+         column->get_numeric_type(), column->get_contents(),
+         -1, column->get_column_alignment());
       new_format->remove_column(column->get_name());
     }
 
@@ -174,7 +177,8 @@ munge_format_impl(const GeomVertexFormat *orig,
     if (column != (const GeomVertexColumn *)NULL) {
       new_array_format->add_column
         (column->get_name(), column->get_num_components(), 
-         column->get_numeric_type(), column->get_contents());
+         column->get_numeric_type(), column->get_contents(),
+         -1, column->get_column_alignment());
       new_format->remove_column(column->get_name());
     }
 
@@ -196,7 +200,8 @@ munge_format_impl(const GeomVertexFormat *orig,
             
             if (texcoord_type != (const GeomVertexColumn *)NULL) {
               new_array_format->add_column
-                (name, texcoord_type->get_num_values(), NT_stdfloat, C_texcoord);
+                (name, texcoord_type->get_num_values(), NT_stdfloat, C_texcoord,
+                 -1, texcoord_type->get_column_alignment());
             } else {
               // We have to add something as a placeholder, even if the
               // texture coordinates aren't defined.
@@ -237,7 +242,7 @@ premunge_format_impl(const GeomVertexFormat *orig) {
     // Replace the existing color format with the new format.
     new_array_format->add_column
       (InternalName::get_color(), 4, NT_uint8,
-       C_color, color_type->get_start());
+       C_color, color_type->get_start(), color_type->get_column_alignment());
   }
 
   CPT(GeomVertexFormat) format = GeomVertexFormat::register_format(new_format);
@@ -249,7 +254,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
       const GeomVertexColumn *column = format->get_column(i);
       PT(GeomVertexArrayFormat) new_array_format = new GeomVertexArrayFormat;
       new_array_format->add_column(column->get_name(), column->get_num_components(),
-                                   column->get_numeric_type(), column->get_contents());
+                                   column->get_numeric_type(), column->get_contents(),
+                                   -1, column->get_column_alignment());
       new_format->add_array(new_array_format);
     }
     format = GeomVertexFormat::register_format(new_format);
@@ -268,7 +274,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
     if (column != (const GeomVertexColumn *)NULL) {
       new_array_format->add_column
         (column->get_name(), column->get_num_components(), 
-         column->get_numeric_type(), column->get_contents());
+         column->get_numeric_type(), column->get_contents(),
+         -1, column->get_column_alignment());
       new_format->remove_column(column->get_name());
     }
 
@@ -276,7 +283,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
     if (column != (const GeomVertexColumn *)NULL) {
       new_array_format->add_column
         (column->get_name(), column->get_num_components(), 
-         column->get_numeric_type(), column->get_contents());
+         column->get_numeric_type(), column->get_contents(),
+         -1, column->get_column_alignment());
       new_format->remove_column(column->get_name());
     }
 
@@ -284,7 +292,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
     if (column != (const GeomVertexColumn *)NULL) {
       new_array_format->add_column
         (column->get_name(), column->get_num_components(), 
-         column->get_numeric_type(), column->get_contents());
+         column->get_numeric_type(), column->get_contents(),
+         -1, column->get_column_alignment());
       new_format->remove_column(column->get_name());
     }
 
@@ -307,7 +316,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
             
             if (texcoord_type != (const GeomVertexColumn *)NULL) {
               new_array_format->add_column
-                (name, texcoord_type->get_num_values(), NT_stdfloat, C_texcoord);
+                (name, texcoord_type->get_num_values(), NT_stdfloat, C_texcoord,
+                 -1, texcoord_type->get_column_alignment());
             } else {
               // We have to add something as a placeholder, even if the
               // texture coordinates aren't defined.
@@ -320,7 +330,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
     }
 
     // Now go through the remaining arrays and make sure they are
-    // tightly packed.  If not, repack them.
+    // tightly packed (with the column alignment restrictions).  If
+    // not, repack them.
     for (int i = 0; i < new_format->get_num_arrays(); ++i) {
       CPT(GeomVertexArrayFormat) orig_a = new_format->get_array(i);
       if (orig_a->count_unused_space() != 0) {
@@ -328,7 +339,8 @@ premunge_format_impl(const GeomVertexFormat *orig) {
         for (int j = 0; j < orig_a->get_num_columns(); ++j) {
           const GeomVertexColumn *column = orig_a->get_column(j);
           new_a->add_column(column->get_name(), column->get_num_components(),
-                            column->get_numeric_type(), column->get_contents());
+                            column->get_numeric_type(), column->get_contents(),
+                            -1, column->get_column_alignment());
         }
         new_format->set_array(i, new_a);
       }

+ 7 - 2
panda/src/gobj/config_gobj.cxx

@@ -273,10 +273,15 @@ ConfigVariableBool vertices_float64
           "slower."));
 
 ConfigVariableInt vertex_column_alignment
-("vertex-column-alignment", 0,
+("vertex-column-alignment", 1,
  PRC_DESC("This specifies the default byte alignment for each column of "
           "data within a GeomVertexData when it is assembled using the default "
-          "interfaces.  See GeomVertexFormat::set_column_alignment()."));
+          "interfaces.  Normally, you should not change this config variable "
+          "(which would change this value globally), but instead specify any "
+          "alignment requirements on a per-column basis as you construct a "
+          "GeomVertexFormat.  Setting this value globally could result in "
+          "much needless wasted space in all vertex data objects, but it "
+          "could be useful for simple experiments."));
 
 ConfigVariableEnum<AutoTextureScale> textures_power_2
 ("textures-power-2", ATS_down,

+ 4 - 4
panda/src/gobj/geomPrimitive.cxx

@@ -1425,10 +1425,10 @@ release_all() {
 CPT(GeomVertexArrayFormat) GeomPrimitive::
 get_index_format() const {
   PT(GeomVertexArrayFormat) format = new GeomVertexArrayFormat;
-  // It's important that the index format not attempt to have any kind
-  // of SSE2 alignment or whatever.  It needs to be tightly packed.
-  format->set_column_alignment(1);
-  format->add_column(InternalName::get_index(), 1, get_index_type(), C_index);
+  // It's important that the index format *not* respect the global
+  // setting of vertex-column-alignment.  It needs to be tightly
+  // packed, so we specify an explict column_alignment of 1.
+  format->add_column(InternalName::get_index(), 1, get_index_type(), C_index, 0, 1);
   return GeomVertexArrayFormat::register_format(format);
 }
 

+ 23 - 43
panda/src/gobj/geomVertexArrayFormat.I

@@ -72,44 +72,38 @@ set_stride(int stride) {
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: GeomVertexArrayFormat::get_column_alignment
+//     Function: GeomVertexArrayFormat::get_pad_to
 //       Access: Published
-//  Description: See set_column_alignment().
+//  Description: Returns the byte divisor to which the data record
+//               must be padded to meet hardware limitations.  For
+//               instance, if this is 4, the stride will be
+//               automatically rounded up to the next multiple of 4
+//               bytes.  This value is automatically increased as
+//               needed to ensure the individual numeric components in
+//               the array are word-aligned.
 ////////////////////////////////////////////////////////////////////
 INLINE int GeomVertexArrayFormat::
-get_column_alignment() const {
-  return _column_alignment;
+get_pad_to() const {
+  return _pad_to;
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: GeomVertexArrayFormat::set_column_alignment
+//     Function: GeomVertexArrayFormat::set_pad_to
 //       Access: Published
-//  Description: This specifies the byte alignment for each
-//               column of data within the format when add_column() is
-//               subsequently called with default parameters.
-//               Normally this is 0 or 1 to specify no particular
-//               alignment, but you may specify a higher number, for
-//               instace 4 to guarantee that all columns start at a
-//               word alignment, or 16 to align all columns for SSE2
-//               processing.  This will introduce unused bytes between
-//               columns as needed to guarantee the requested
-//               alignment.  
-//
-//               Note that this does not change existing columns, only
-//               subsequent columns; and if you specify the start byte
-//               explicitly in add_column(), it will override this
-//               setting.  Note also that there is no point in
-//               exceeding the memory alignment of Panda3D itself,
-//               which is compiled into Panda and can be determined by
-//               MemoryHook::get_memory_alignment().
-//
-//               Also see the config variable vertex-column-alignment
-//               for a way to change the global default.
+//  Description: Explicitly sets the byte divisor to which the data
+//               record must be padded to meet hardware limitations.
+//               See get_pad_to().  Normally it is not necessary to
+//               call this unless you have some specific requirements
+//               for row-to-row data alignment.  Note that this value
+//               may be automatically increased at each subsequent
+//               call to add_column().
 ////////////////////////////////////////////////////////////////////
 INLINE void GeomVertexArrayFormat::
-set_column_alignment(int column_alignment) {
-  nassertv(!_is_registered);
-  _column_alignment = column_alignment;
+set_pad_to(int pad_to) {
+  nassertv(pad_to >= 1);
+
+  _pad_to = pad_to;
+  _stride = ((_stride + _pad_to - 1) / _pad_to) * _pad_to;
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -124,20 +118,6 @@ get_total_bytes() const {
   return _total_bytes;
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: GeomVertexArrayFormat::get_pad_to
-//       Access: Published
-//  Description: Returns the byte divisor to which the data record
-//               must be padded to meet hardware limitations.  For
-//               instance, if this is 4, the stride will be
-//               automatically rounded up to the next multiple of 4
-//               bytes.
-////////////////////////////////////////////////////////////////////
-INLINE int GeomVertexArrayFormat::
-get_pad_to() const {
-  return _pad_to;
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexArrayFormat::get_num_columns
 //       Access: Published

+ 8 - 39
panda/src/gobj/geomVertexArrayFormat.cxx

@@ -34,7 +34,6 @@ GeomVertexArrayFormat::
 GeomVertexArrayFormat() :
   _is_registered(false),
   _stride(0),
-  _column_alignment(vertex_column_alignment),
   _total_bytes(0),
   _pad_to(1),
   _columns_unsorted(false)
@@ -52,7 +51,6 @@ GeomVertexArrayFormat(InternalName *name0, int num_components0,
                       GeomVertexArrayFormat::Contents contents0) :
   _is_registered(false),
   _stride(0),
-  _column_alignment(vertex_column_alignment),
   _total_bytes(0),
   _pad_to(1),
   _columns_unsorted(false)
@@ -74,7 +72,6 @@ GeomVertexArrayFormat(InternalName *name0, int num_components0,
                       GeomVertexArrayFormat::Contents contents1) :
   _is_registered(false),
   _stride(0),
-  _column_alignment(vertex_column_alignment),
   _total_bytes(0),
   _pad_to(1),
   _columns_unsorted(false)
@@ -100,7 +97,6 @@ GeomVertexArrayFormat(InternalName *name0, int num_components0,
                       GeomVertexArrayFormat::Contents contents2) :
   _is_registered(false),
   _stride(0),
-  _column_alignment(vertex_column_alignment),
   _total_bytes(0),
   _pad_to(1),
   _columns_unsorted(false)
@@ -130,7 +126,6 @@ GeomVertexArrayFormat(InternalName *name0, int num_components0,
                       GeomVertexArrayFormat::Contents contents3) :
   _is_registered(false),
   _stride(0),
-  _column_alignment(vertex_column_alignment),
   _total_bytes(0),
   _pad_to(1),
   _columns_unsorted(false)
@@ -150,7 +145,6 @@ GeomVertexArrayFormat::
 GeomVertexArrayFormat(const GeomVertexArrayFormat &copy) :
   _is_registered(false),
   _stride(copy._stride),
-  _column_alignment(copy._column_alignment),
   _total_bytes(copy._total_bytes),
   _pad_to(copy._pad_to),
   _columns_unsorted(copy._columns_unsorted)
@@ -170,7 +164,6 @@ void GeomVertexArrayFormat::
 operator = (const GeomVertexArrayFormat &copy) {
   nassertv(!_is_registered);
   _stride = copy._stride;
-  _column_alignment = copy._column_alignment;
   _total_bytes = copy._total_bytes;
   _pad_to = copy._pad_to;
 
@@ -237,22 +230,14 @@ unref() const {
 int GeomVertexArrayFormat::
 add_column(InternalName *name, int num_components, 
            GeomVertexArrayFormat::NumericType numeric_type, 
-           GeomVertexArrayFormat::Contents contents, int start) {
+           GeomVertexArrayFormat::Contents contents, int start,
+           int column_alignment) {
   if (start < 0) {
     start = _total_bytes;
-    if (_column_alignment > 1) {
-      // Round up to the next multiple of _column_alignment.
-      start = ((start + (_column_alignment - 1)) / _column_alignment) * _column_alignment;
-    }
-
-    GeomVertexColumn temp_column
-      (name, num_components, numeric_type, contents, 0);
-    int pad_to = temp_column.get_component_bytes();
-    start = ((start + pad_to - 1) / pad_to) * pad_to;
   }
 
-  return add_column(GeomVertexColumn(name, num_components, 
-                                     numeric_type, contents, start));
+  return add_column(GeomVertexColumn(name, num_components, numeric_type, contents, 
+                                     start, column_alignment));
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -286,18 +271,11 @@ add_column(const GeomVertexColumn &column) {
     orig_column = get_column(column.get_start(), column.get_total_bytes());
   }
 
-  int column_bytes = column.get_total_bytes();
-  if (_column_alignment > 1 && (column.get_start() % _column_alignment == 0)) {
-    // Round up to the next multiple of _column_alignment.
-    column_bytes = ((column_bytes + _column_alignment - 1) / _column_alignment) * _column_alignment;
-  }
-
-  _total_bytes = max(_total_bytes, column.get_start() + column_bytes);
-  _pad_to = max(_pad_to, column.get_component_bytes());
+  _total_bytes = max(_total_bytes, column.get_start() + column.get_total_bytes());
+  _pad_to = max(_pad_to, column.get_column_alignment());
   _stride = max(_stride, _total_bytes);
-  _stride = ((_stride + _pad_to - 1) / _pad_to) * _pad_to;
-  if (_column_alignment > 1) {
-    _stride = ((_stride + _column_alignment - 1) / _column_alignment) * _column_alignment;
+  if (_pad_to > 1) {
+    _stride = ((_stride + _pad_to - 1) / _pad_to) * _pad_to;
   }
 
   GeomVertexColumn *new_column = new GeomVertexColumn(column);
@@ -575,14 +553,6 @@ compare_to(const GeomVertexArrayFormat &other) const {
   if (_stride != other._stride) {
     return _stride - other._stride;
   }
-  /*
-    // We don't compare column_alignment.  That's a setting used only
-    // when constructing the format, and no longer relevant after it's
-    // been constructed.
-  if (_column_alignment != other._column_alignment) {
-    return _column_alignment - other._column_alignment;
-  }
-  */
   if (_total_bytes != other._total_bytes) {
     return _total_bytes - other._total_bytes;
   }
@@ -760,7 +730,6 @@ fillin(DatagramIterator &scan, BamReader *manager) {
   TypedWritableReferenceCount::fillin(scan, manager);
   nassertv(!_is_registered);
 
-  // Maybe we should record _column_alignment, but we don't.
   _stride = scan.get_uint16();
   _total_bytes = scan.get_uint16();
   _pad_to = scan.get_uint8();

+ 3 - 5
panda/src/gobj/geomVertexArrayFormat.h

@@ -85,15 +85,14 @@ PUBLISHED:
   INLINE int get_stride() const;
   INLINE void set_stride(int stride);
 
-  INLINE int get_column_alignment() const;
-  INLINE void set_column_alignment(int column_alignment);
+  INLINE int get_pad_to() const;
+  INLINE void set_pad_to(int pad_to);
 
   INLINE int get_total_bytes() const;
-  INLINE int get_pad_to() const;
 
   int add_column(InternalName *name, int num_components,
                  NumericType numeric_type, Contents contents,
-                 int start = -1);
+                 int start = -1, int column_alignment = 0);
   int add_column(const GeomVertexColumn &column);
   void remove_column(const InternalName *name);
   void clear_columns();
@@ -131,7 +130,6 @@ private:
 
   bool _is_registered;
   int _stride;
-  int _column_alignment;
   int _total_bytes;
   int _pad_to;
 

+ 21 - 1
panda/src/gobj/geomVertexColumn.I

@@ -33,12 +33,13 @@ GeomVertexColumn() :
 INLINE GeomVertexColumn::
 GeomVertexColumn(InternalName *name, int num_components,
                  NumericType numeric_type, Contents contents,
-                 int start) :
+                 int start, int column_alignment) :
   _name(name),
   _num_components(num_components),
   _numeric_type(numeric_type),
   _contents(contents),
   _start(start),
+  _column_alignment(column_alignment),
   _packer(NULL)
 {
   setup();
@@ -56,6 +57,7 @@ GeomVertexColumn(const GeomVertexColumn &copy) :
   _numeric_type(copy._numeric_type),
   _contents(copy._contents),
   _start(copy._start),
+  _column_alignment(copy._column_alignment),
   _packer(NULL)
 {
   setup();
@@ -148,6 +150,21 @@ get_start() const {
   return _start;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexColumn::get_column_alignment
+//       Access: Published
+//  Description: Returns the alignment requirements for this column.
+//               If this is greater than 1, it restricts the column to
+//               appear only on memory addresses that are integer
+//               multiples of this value; this has implications for
+//               this column's start value, as well as the stride of
+//               the resulting array.
+////////////////////////////////////////////////////////////////////
+INLINE int GeomVertexColumn::
+get_column_alignment() const {
+  return _column_alignment;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexColumn::get_component_bytes
 //       Access: Published
@@ -274,6 +291,9 @@ compare_to(const GeomVertexColumn &other) const {
   if (_start != other._start) {
     return _start - other._start;
   }
+  if (_column_alignment != other._column_alignment) {
+    return _column_alignment - other._column_alignment;
+  }
   return 0;
 }
 

+ 11 - 0
panda/src/gobj/geomVertexColumn.cxx

@@ -29,6 +29,7 @@ operator = (const GeomVertexColumn &copy) {
   _numeric_type = copy._numeric_type;
   _contents = copy._contents;
   _start = copy._start;
+  _column_alignment = copy._column_alignment;
 
   delete _packer;
   _packer = NULL;
@@ -133,6 +134,16 @@ setup() {
     break;
   }
 
+  if (_column_alignment < 1) {
+    // The default column alignment is to align to the individual
+    // numeric components, or to vertex_column_alignment, whichever is
+    // greater.
+    _column_alignment = max(_component_bytes, (int)vertex_column_alignment);
+  }
+
+  // Enforce the column alignment requirements on the _start byte.
+  _start = ((_start + _column_alignment - 1) / _column_alignment) * _column_alignment;
+
   _total_bytes = _component_bytes * _num_components;
 
   _packer = make_packer();

+ 3 - 1
panda/src/gobj/geomVertexColumn.h

@@ -44,7 +44,7 @@ private:
 PUBLISHED:
   INLINE GeomVertexColumn(InternalName *name, int num_components,
                           NumericType numeric_type, Contents contents,
-                          int start);
+                          int start, int column_alignment = 0);
   INLINE GeomVertexColumn(const GeomVertexColumn &copy);
   void operator = (const GeomVertexColumn &copy);
   INLINE ~GeomVertexColumn();
@@ -55,6 +55,7 @@ PUBLISHED:
   INLINE NumericType get_numeric_type() const;
   INLINE Contents get_contents() const;
   INLINE int get_start() const;
+  INLINE int get_column_alignment() const;
   INLINE int get_component_bytes() const;
   INLINE int get_total_bytes() const;
   INLINE bool has_homogeneous_coord() const;
@@ -91,6 +92,7 @@ private:
   NumericType _numeric_type;
   Contents _contents;
   int _start;
+  int _column_alignment;
   int _component_bytes;
   int _total_bytes;
   Packer *_packer;

+ 90 - 21
panda/src/gobj/geomVertexData.cxx

@@ -1802,6 +1802,7 @@ update_animated_vertices(GeomVertexData::CData *cdata, Thread *current_thread) {
   }
 }
 
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexData::do_transform_point_column
 //       Access: Private
@@ -1812,22 +1813,24 @@ void GeomVertexData::
 do_transform_point_column(const GeomVertexFormat *format, GeomVertexRewriter &data,
                           const LMatrix4 &mat, int begin_row, int end_row) {
   const GeomVertexColumn *data_column = data.get_column();
+  PT(GeomVertexArrayDataHandle) data_handle = data.get_array_handle();
 
-  if (data_column->get_num_values() == 3 &&
+  if ((data_column->get_num_values() == 3 || data_column->get_num_values() == 4) &&
       data_column->get_numeric_type() == NT_float32) {
-    // The table of points is a table of LPoint3f's.  Optimize this
-    // common case.
+    // The table of points is a table of LPoint3f's or LPoint4f's.
+    // Optimize this common case.
     PT(GeomVertexArrayDataHandle) data_handle = data.get_array_handle();
-    PT(GeomVertexArrayData) data_array = data_handle->get_object();
 
+    size_t stride = data.get_stride();
+    size_t num_rows = end_row - begin_row;
     unsigned char *datat = data_handle->get_write_pointer();
-    datat += data_column->get_start();
-    size_t stride = data_array->get_array_format()->get_stride();
-
+    datat += data_column->get_start() + begin_row * stride;
     LMatrix4f matf = LCAST(float, mat);
-    for (int j = begin_row; j < end_row; ++j) {
-      LPoint3f &vertex = *(LPoint3f *)(&datat[j * stride]);
-      vertex = vertex * matf;
+
+    if (data_column->get_num_values() == 3) {
+      table_xform_point3f(datat, num_rows, stride, matf);
+    } else {
+      table_xform_vecbase4f(datat, num_rows, stride, matf);
     }
     
   } else if (data_column->get_num_values() == 4) {
@@ -1863,21 +1866,22 @@ do_transform_vector_column(const GeomVertexFormat *format, GeomVertexRewriter &d
                           const LMatrix4 &mat, int begin_row, int end_row) {
   const GeomVertexColumn *data_column = data.get_column();
 
-  if (data_column->get_num_values() == 3 &&
+  if ((data_column->get_num_values() == 3 || data_column->get_num_values() == 4) &&
       data_column->get_numeric_type() == NT_float32) {
-    // The table of vectors is a table of LVector3f's.  Optimize this
-    // common case.
+    // The table of vectors is a table of LVector3f's or LVector4f's.
+    // Optimize this common case.
     PT(GeomVertexArrayDataHandle) data_handle = data.get_array_handle();
-    PT(GeomVertexArrayData) data_array = data_handle->get_object();
 
+    size_t stride = data.get_stride();
+    size_t num_rows = end_row - begin_row;
     unsigned char *datat = data_handle->get_write_pointer();
-    datat += data_column->get_start();
-    size_t stride = data_array->get_array_format()->get_stride();
-
+    datat += data_column->get_start() + begin_row * stride;
     LMatrix4f matf = LCAST(float, mat);
-    for (int j = begin_row; j < end_row; ++j) {
-      LVector3f &vector = *(LVector3f *)(&datat[j * stride]);
-      vector = vector * matf;
+
+    if (data_column->get_num_values() == 3) {
+      table_xform_vector3f(datat, num_rows, stride, matf);
+    } else {
+      table_xform_vecbase4f(datat, num_rows, stride, matf);
     }
 
   } else {
@@ -1891,6 +1895,72 @@ do_transform_vector_column(const GeomVertexFormat *format, GeomVertexRewriter &d
   }
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexData::table_xform_point3f
+//       Access: Private, Static
+//  Description: Transforms each of the LPoint3f objects in the
+//               indicated table by the indicated matrix.
+////////////////////////////////////////////////////////////////////
+void GeomVertexData::
+table_xform_point3f(unsigned char *datat, size_t num_rows, size_t stride,
+                    const LMatrix4f &matf) {
+  // We don't bother checking for the unaligned case here, because in
+  // practice it doesn't matter with a 3-component point.
+  for (size_t i = 0; i < num_rows; ++i) {
+    LPoint3f &vertex = *(LPoint3f *)(&datat[i * stride]);
+    vertex *= matf;
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexData::table_xform_vector3f
+//       Access: Private, Static
+//  Description: Transforms each of the LVector3f objects in the
+//               indicated table by the indicated matrix.
+////////////////////////////////////////////////////////////////////
+void GeomVertexData::
+table_xform_vector3f(unsigned char *datat, size_t num_rows, size_t stride,
+                     const LMatrix4f &matf) {
+  // We don't bother checking for the unaligned case here, because in
+  // practice it doesn't matter with a 3-component vector.
+  for (size_t i = 0; i < num_rows; ++i) {
+    LVector3f &vertex = *(LVector3f *)(&datat[i * stride]);
+    vertex *= matf;
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexData::table_xform_vecbase4f
+//       Access: Private, Static
+//  Description: Transforms each of the LVecBase4f objects in the
+//               indicated table by the indicated matrix.
+////////////////////////////////////////////////////////////////////
+void GeomVertexData::
+table_xform_vecbase4f(unsigned char *datat, size_t num_rows, size_t stride,
+                      const LMatrix4f &matf) {
+#if defined(HAVE_EIGEN) && defined(LINMATH_VECTORIZE)
+  // Check if the table is unaligned.  If it is, we can't use the
+  // LVecBase4f object directly, which assumes 16-byte alignment.
+  if (((size_t)datat & 0xf) != 0 || (stride & 0xf) != 0) {
+    // Instead, we'll use low-level Eigen calls to multiply out the
+    // unaligned memory.
+    Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, 4, Eigen::RowMajor>, Eigen::Unaligned, Eigen::OuterStride<> > table((float *)datat, num_rows, 4, Eigen::OuterStride<>(stride / sizeof(float)));
+    for (size_t i = 0; i < num_rows; ++i) {
+      table.row(i) *= matf._m;
+    }
+    return;
+  }
+#endif  // HAVE_EIGEN
+
+  // If the table is properly aligned (or we don't require alignment),
+  // we can directly use the high-level LVecBase4f object, which will
+  // do the right thing.
+  for (size_t i = 0; i < num_rows; ++i) {
+    LVecBase4f &vertex = *(LVecBase4f *)(&datat[i * stride]);
+    vertex *= matf;
+  }
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexData::register_with_read_factory
 //       Access: Public, Static
@@ -2273,7 +2343,6 @@ get_normal_info(const GeomVertexArrayDataHandle *&array_reader,
   int array_index = _cdata->_format->get_normal_array_index();
   if (array_index >= 0) {
     const GeomVertexColumn *column = _cdata->_format->get_normal_column();
-    nassertr(column->get_num_values() == 3, false);
 
     array_reader = _array_readers[array_index];
     numeric_type = column->get_numeric_type();

+ 6 - 0
panda/src/gobj/geomVertexData.h

@@ -322,6 +322,12 @@ private:
                                  const LMatrix4 &mat, int begin_row, int end_row);
   void do_transform_vector_column(const GeomVertexFormat *format, GeomVertexRewriter &data,
                                   const LMatrix4 &mat, int begin_row, int end_row);
+  static void table_xform_point3f(unsigned char *datat, size_t num_rows, 
+                                  size_t stride, const LMatrix4f &matf);
+  static void table_xform_vector3f(unsigned char *datat, size_t num_rows, 
+                                   size_t stride, const LMatrix4f &matf);
+  static void table_xform_vecbase4f(unsigned char *datat, size_t num_rows, 
+                                    size_t stride, const LMatrix4f &matf);
 
   static PStatCollector _convert_pcollector;
   static PStatCollector _scale_color_pcollector;

+ 13 - 0
panda/src/gobj/geomVertexReader.I

@@ -218,6 +218,19 @@ get_array_handle() const {
   return _handle;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexReader::get_stride
+//       Access: Published
+//  Description: Returns the per-row stride (bytes between consecutive
+//               rows) of the underlying vertex array.  This low-level
+//               information is normally not needed to use the
+//               GeomVertexReader directly.
+////////////////////////////////////////////////////////////////////
+INLINE size_t GeomVertexReader::
+get_stride() const {
+  return _stride;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexReader::get_current_thread
 //       Access: Published

+ 1 - 0
panda/src/gobj/geomVertexReader.h

@@ -83,6 +83,7 @@ PUBLISHED:
   INLINE const GeomVertexData *get_vertex_data() const;
   INLINE const GeomVertexArrayData *get_array_data() const;
   INLINE const GeomVertexArrayDataHandle *get_array_handle() const;
+  INLINE size_t get_stride() const;
   INLINE Thread *get_current_thread() const;
 
   INLINE void set_force(bool force);

+ 27 - 0
panda/src/gobj/geomVertexRewriter.I

@@ -173,6 +173,33 @@ get_array_handle() const {
   return GeomVertexWriter::get_array_handle();
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexRewriter::get_stride
+//       Access: Published
+//  Description: Returns the per-row stride (bytes between consecutive
+//               rows) of the underlying vertex array.  This low-level
+//               information is normally not needed to use the
+//               GeomVertexRewriter directly.
+////////////////////////////////////////////////////////////////////
+INLINE size_t GeomVertexRewriter::
+get_stride() const {
+  nassertr(GeomVertexWriter::get_stride() == GeomVertexReader::get_stride(), 0);
+  return GeomVertexWriter::get_stride();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexRewriter::get_current_thread
+//       Access: Published
+//  Description: Returns the Thread pointer of the currently-executing
+//               thread, as passed to the constructor of this object.
+////////////////////////////////////////////////////////////////////
+INLINE Thread *GeomVertexRewriter::
+get_current_thread() const {
+  nassertr(GeomVertexWriter::get_current_thread() == 
+           GeomVertexReader::get_current_thread(), NULL);
+  return GeomVertexWriter::get_current_thread();
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexRewriter::set_column
 //       Access: Published

+ 2 - 0
panda/src/gobj/geomVertexRewriter.h

@@ -54,6 +54,8 @@ PUBLISHED:
   INLINE GeomVertexData *get_vertex_data() const;
   INLINE GeomVertexArrayData *get_array_data() const;
   INLINE GeomVertexArrayDataHandle *get_array_handle() const;
+  INLINE size_t get_stride() const;
+  INLINE Thread *get_current_thread() const;
 
   INLINE bool set_column(int column);
   INLINE bool set_column(const string &name);

+ 13 - 0
panda/src/gobj/geomVertexWriter.I

@@ -215,6 +215,19 @@ get_array_handle() const {
   return _handle;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GeomVertexWriter::get_stride
+//       Access: Published
+//  Description: Returns the per-row stride (bytes between consecutive
+//               rows) of the underlying vertex array.  This low-level
+//               information is normally not needed to use the
+//               GeomVertexWriter directly.
+////////////////////////////////////////////////////////////////////
+INLINE size_t GeomVertexWriter::
+get_stride() const {
+  return _stride;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GeomVertexWriter::get_current_thread
 //       Access: Published

+ 1 - 0
panda/src/gobj/geomVertexWriter.h

@@ -95,6 +95,7 @@ PUBLISHED:
   INLINE GeomVertexData *get_vertex_data() const;
   INLINE GeomVertexArrayData *get_array_data() const;
   INLINE GeomVertexArrayDataHandle *get_array_handle() const;
+  INLINE size_t get_stride() const;
   INLINE Thread *get_current_thread() const;
 
   INLINE bool set_column(int column);

+ 32 - 1
panda/src/linmath/lmat_ops_src.I

@@ -22,16 +22,28 @@ INLINE_LINMATH FLOATNAME(LVecBase3)
 operator * (const FLOATNAME(LVecBase3) &v, const FLOATNAME(LMatrix3) &m) {
   return m.xform(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVecBase3) &v, const FLOATNAME(LMatrix3) &m) {
+  m.xform_in_place(v);
+}
 
 INLINE_LINMATH FLOATNAME(LVector3)
 operator * (const FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix3) &m) {
   return m.xform(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix3) &m) {
+  m.xform_in_place(v);
+}
 
 INLINE_LINMATH FLOATNAME(LPoint3)
 operator * (const FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix3) &m) {
   return m.xform(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix3) &m) {
+  m.xform_in_place(v);
+}
 
 ////////////////////////////////////////////////////////////////////
 //     Function: LVector2 times LMatrix3
@@ -42,6 +54,10 @@ INLINE_LINMATH FLOATNAME(LVector2)
 operator * (const FLOATNAME(LVector2) &v, const FLOATNAME(LMatrix3) &m) {
   return m.xform_vec(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVector2) &v, const FLOATNAME(LMatrix3) &m) {
+  m.xform_vec_in_place(v);
+}
 
 ////////////////////////////////////////////////////////////////////
 //     Function: LPoint2 times LMatrix3
@@ -52,6 +68,10 @@ INLINE_LINMATH FLOATNAME(LPoint2)
 operator * (const FLOATNAME(LPoint2) &v, const FLOATNAME(LMatrix3) &m) {
   return m.xform_point(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LPoint2) &v, const FLOATNAME(LMatrix3) &m) {
+  m.xform_point_in_place(v);
+}
 
 
 ////////////////////////////////////////////////////////////////////
@@ -63,12 +83,15 @@ INLINE_LINMATH FLOATNAME(LVecBase4)
 operator * (const FLOATNAME(LVecBase4) &v, const FLOATNAME(LMatrix4) &m) {
   return m.xform(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVecBase4) &v, const FLOATNAME(LMatrix4) &m) {
+  m.xform_in_place(v);
+}
 
 INLINE_LINMATH FLOATNAME(LVector4)
 operator * (const FLOATNAME(LVector4) &v, const FLOATNAME(LMatrix4) &m) {
   return m.xform(v);
 }
-
 INLINE_LINMATH FLOATNAME(LPoint4)
 operator * (const FLOATNAME(LPoint4) &v, const FLOATNAME(LMatrix4) &m) {
   return m.xform(v);
@@ -84,6 +107,10 @@ INLINE_LINMATH FLOATNAME(LVector3)
 operator * (const FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix4) &m) {
   return m.xform_vec(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix4) &m) {
+  m.xform_vec_in_place(v);
+}
 
 ////////////////////////////////////////////////////////////////////
 //     Function: LPoint3 times LMatrix4
@@ -94,6 +121,10 @@ INLINE_LINMATH FLOATNAME(LPoint3)
 operator * (const FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix4) &m) {
   return m.xform_point(v);
 }
+INLINE_LINMATH void
+operator *= (FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix4) &m) {
+  m.xform_point_in_place(v);
+}
 
 
 ////////////////////////////////////////////////////////////////////

+ 20 - 0
panda/src/linmath/lmat_ops_src.h

@@ -18,30 +18,50 @@ BEGIN_PUBLISH
 
 INLINE_LINMATH FLOATNAME(LVecBase3)
 operator * (const FLOATNAME(LVecBase3) &v, const FLOATNAME(LMatrix3) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVecBase3) &v, const FLOATNAME(LMatrix3) &m);
 
 INLINE_LINMATH FLOATNAME(LVector3)
 operator * (const FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix3) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix3) &m);
 
 INLINE_LINMATH FLOATNAME(LPoint3)
 operator * (const FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix3) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix3) &m);
 
 INLINE_LINMATH FLOATNAME(LVector2)
 operator * (const FLOATNAME(LVector2) &v, const FLOATNAME(LMatrix3) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVector2) &v, const FLOATNAME(LMatrix3) &m);
 
 INLINE_LINMATH FLOATNAME(LPoint2)
 operator * (const FLOATNAME(LPoint2) &v, const FLOATNAME(LMatrix3) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LPoint2) &v, const FLOATNAME(LMatrix3) &m);
 
 
 // vector times matrix4
 
 INLINE_LINMATH FLOATNAME(LVecBase4)
 operator * (const FLOATNAME(LVecBase4) &v, const FLOATNAME(LMatrix4) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVecBase4) &v, const FLOATNAME(LMatrix4) &m);
+INLINE_LINMATH FLOATNAME(LPoint4)
+operator * (const FLOATNAME(LPoint4) &v, const FLOATNAME(LMatrix4) &m);
+INLINE_LINMATH FLOATNAME(LVector4)
+operator * (const FLOATNAME(LVector4) &v, const FLOATNAME(LMatrix4) &m);
 
 INLINE_LINMATH FLOATNAME(LVector3)
 operator * (const FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix4) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LVector3) &v, const FLOATNAME(LMatrix4) &m);
 
 INLINE_LINMATH FLOATNAME(LPoint3)
 operator * (const FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix4) &m);
+INLINE_LINMATH void
+operator *= (FLOATNAME(LPoint3) &v, const FLOATNAME(LMatrix4) &m);
 
 INLINE_LINMATH void
 generic_write_datagram(Datagram &dest, const FLOATNAME(LMatrix3) &value);

+ 83 - 0
panda/src/linmath/lmatrix3_src.I

@@ -680,6 +680,89 @@ xform_vec_general(const FLOATNAME(LVecBase3) &v) const {
 #endif  // HAVE_EIGEN
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix3::xform_in_place
+//       Access: Published
+//  Description: 3-component vector or point times matrix.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix3)::
+xform_in_place(FLOATNAME(LVecBase3) &v) const {
+  TAU_PROFILE("void LMatrix3::xform_in_place(LVecBase3 &)", " ", TAU_USER);
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m;
+#else  
+  v = xform(v);
+#endif  // HAVE_EIGEN
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix3::xform_point_in_place
+//       Access: Published
+//  Description: The matrix transforms a 2-component point (including
+//               translation component).  This assumes the matrix is
+//               an affine transform.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix3)::
+xform_point_in_place(FLOATNAME(LVecBase2) &v) const {
+  TAU_PROFILE("void LMatrix3::xform_point_in_place(LVecBase3 &)", " ", TAU_USER);
+  // v._v(2) == 1.0f for this case
+  
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m.block<2, 2>(0, 0) + _m.block<1, 2>(2, 0);
+#else  
+  v = xform_point(v);
+#endif  // HAVE_EIGEN
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix3::xform_vec_in_place
+//       Access: Published
+//  Description: The matrix transforms a 2-component vector (without
+//               translation component).  This assumes the matrix is
+//               an affine transform.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix3)::
+xform_vec_in_place(FLOATNAME(LVecBase2) &v) const {
+  TAU_PROFILE("void LMatrix3::xform_vec_in_place(LVecBase3 &)", " ", TAU_USER);
+  // v._v(2) == 0.0f for this case
+  
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m.block<2, 2>(0, 0);
+#else  
+  v = xform_vec(v);
+#endif  // HAVE_EIGEN
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix3::xform_vec_in_place
+//       Access: Published
+//  Description: The matrix transforms a 3-component vector.  This
+//               assumes the matrix is an orthonormal transform.
+//
+//               In practice, this is the same computation as xform().
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix3)::
+xform_vec_in_place(FLOATNAME(LVecBase3) &v) const {
+  TAU_PROFILE("void LMatrix3::xform_vec_in_place(LVecBase3 &)", " ", TAU_USER);
+  xform_in_place(v);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix3::xform_vec_general_in_place
+//       Access: Published
+//  Description: The matrix transforms a 3-component vector (without
+//               translation component), as a fully general operation.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix3)::
+xform_vec_general_in_place(FLOATNAME(LVecBase3) &v) const {
+  TAU_PROFILE("void LMatrix3::xform_vec_general_in_place(LVecBase3 &)", " ", TAU_USER);
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m.inverse().transpose();
+#else
+  v = xform_vec(v);
+#endif  // HAVE_EIGEN
+}
+
 #define MATRIX3_PRODUCT(res, a, b)                   \
 res._m(0, 0) = a._m(0, 0)*b._m(0, 0) + a._m(0, 1)*b._m(1, 0) + a._m(0, 2)*b._m(2, 0);   \
 res._m(0, 1) = a._m(0, 0)*b._m(0, 1) + a._m(0, 1)*b._m(1, 1) + a._m(0, 2)*b._m(2, 1);   \

+ 15 - 0
panda/src/linmath/lmatrix3_src.h

@@ -140,6 +140,21 @@ PUBLISHED:
   INLINE_LINMATH FLOATNAME(LVecBase3)
   xform_vec_general(const FLOATNAME(LVecBase3) &v) const;
 
+  INLINE_LINMATH void
+  xform_in_place(FLOATNAME(LVecBase3) &v) const;
+
+  INLINE_LINMATH void
+  xform_point_in_place(FLOATNAME(LVecBase2) &v) const;
+
+  INLINE_LINMATH void
+  xform_vec_in_place(FLOATNAME(LVecBase2) &v) const;
+
+  INLINE_LINMATH void
+  xform_vec_in_place(FLOATNAME(LVecBase3) &v) const;
+
+  INLINE_LINMATH void
+  xform_vec_general_in_place(FLOATNAME(LVecBase3) &v) const;
+
   // this = other1 * other2
   INLINE_LINMATH void multiply(
     const FLOATNAME(LMatrix3) &other1, const FLOATNAME(LMatrix3) &other2);

+ 71 - 0
panda/src/linmath/lmatrix4_src.I

@@ -884,6 +884,77 @@ xform_vec_general(const FLOATNAME(LVecBase3) &v) const {
 #endif  // HAVE_EIGEN
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix4::xform_in_place
+//       Access: Public
+//  Description: 4-component vector or point times matrix.  This is a
+//               fully general operation.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix4)::
+xform_in_place(FLOATNAME(LVecBase4) &v) const {
+  TAU_PROFILE("void LMatrix4::xform_in_place(LVecBase3 &)", " ", TAU_USER);
+
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m;
+#else  
+  v = xform(v);
+#endif  // HAVE_EIGEN
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix4::xform_point_in_place
+//       Access: Public
+//  Description: The matrix transforms a 3-component point (including
+//               translation component).  This assumes the matrix is
+//               an affine transform.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix4)::
+xform_point_in_place(FLOATNAME(LVecBase3) &v) const {
+  TAU_PROFILE("void LMatrix4::xform_point_in_place(LVecBase3 &)", " ", TAU_USER);
+  // v._v(3) == 1.0f for this case
+
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m.block<3, 3>(0, 0) + _m.block<1, 3>(3, 0);
+#else  
+  v = xform_point(v);
+#endif  // HAVE_EIGEN
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix4::xform_vec_in_place
+//       Access: Public
+//  Description: The matrix transforms a 3-component vector (without
+//               translation component).  This assumes the matrix is
+//               an orthonormal transform.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix4)::
+xform_vec_in_place(FLOATNAME(LVecBase3) &v) const {
+  TAU_PROFILE("void LMatrix4::xform_vec_in_place(LVecBase3 &)", " ", TAU_USER);
+  // v._v(3) == 0.0f for this case
+  
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m.block<3, 3>(0, 0);
+#else  
+  v = xform_vec(v);
+#endif  // HAVE_EIGEN
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LMatrix4::xform_vec_general_in_place
+//       Access: Public
+//  Description: The matrix transforms a 3-component vector (without
+//               translation component), as a fully general operation.
+////////////////////////////////////////////////////////////////////
+INLINE_LINMATH void FLOATNAME(LMatrix4)::
+xform_vec_general_in_place(FLOATNAME(LVecBase3) &v) const {
+  TAU_PROFILE("void LMatrix4::xform_vec_general_in_place(LVecBase3 &)", " ", TAU_USER);
+#ifdef HAVE_EIGEN
+  v._v = v._v * _m.block<3, 3>(0, 0).inverse().transpose();
+#else
+  v = xform_vec_general(v);
+#endif  // HAVE_EIGEN
+}
+
 #define MATRIX4_PRODUCT(res, a, b)                                          \
 res._m(0, 0) = a._m(0, 0)*b._m(0, 0) + a._m(0, 1)*b._m(1, 0) + a._m(0, 2)*b._m(2, 0) + a._m(0, 3)*b._m(3, 0);   \
 res._m(0, 1) = a._m(0, 0)*b._m(0, 1) + a._m(0, 1)*b._m(1, 1) + a._m(0, 2)*b._m(2, 1) + a._m(0, 3)*b._m(3, 1);   \

+ 12 - 0
panda/src/linmath/lmatrix4_src.h

@@ -138,6 +138,18 @@ PUBLISHED:
   INLINE_LINMATH FLOATNAME(LVecBase3)
   xform_vec_general(const FLOATNAME(LVecBase3) &v) const;
 
+  INLINE_LINMATH void
+  xform_in_place(FLOATNAME(LVecBase4) &v) const;
+
+  INLINE_LINMATH void
+  xform_point_in_place(FLOATNAME(LVecBase3) &v) const;
+
+  INLINE_LINMATH void
+  xform_vec_in_place(FLOATNAME(LVecBase3) &v) const;
+
+  INLINE_LINMATH void
+  xform_vec_general_in_place(FLOATNAME(LVecBase3) &v) const;
+
   // this = other1 * other2
   INLINE_LINMATH void multiply(const FLOATNAME(LMatrix4) &other1, const FLOATNAME(LMatrix4) &other2);