Browse Source

shaderpipeline: Sort out proper rules for type size/alignment

GL without UBOs is really the odd one out, being the only one not needing

The std140 rules work well with DX9 as well, even though it allows packing a vec3+float, since that only applies in the non-arrayed case and we specify separate struct members in DX9 in separate calls
rdb 1 year ago
parent
commit
eb0c0f57b0

+ 85 - 82
panda/src/display/shaderInputBinding_impls.cxx

@@ -324,23 +324,23 @@ make_shader_input(const ShaderType *type, CPT_InternalName name) {
       uint32_t num_rows = matrix->get_num_rows();
       if (num_rows == 4) {
         if (matrix->get_scalar_type() == ShaderType::ST_double) {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, *(LMatrix4d *)into);
           });
         } else {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, *(LMatrix4f *)into);
           });
         }
       } else {
         if (matrix->get_scalar_type() == ShaderType::ST_double) {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             LMatrix4d tmp;
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, tmp);
             memcpy(into, tmp.get_data(), num_rows * sizeof(double) * 4);
           });
         } else {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             LMatrix4f tmp;
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, tmp);
             memcpy(into, tmp.get_data(), num_rows * sizeof(float) * 4);
@@ -353,20 +353,20 @@ make_shader_input(const ShaderType *type, CPT_InternalName name) {
       if (num_rows == 3) {
         // Short-cut for most common case
         if (matrix->get_scalar_type() == ShaderType::ST_double) {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             LMatrix4d tmp;
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, tmp);
-            if (pad_rows) {
+            if (!packed) {
               memcpy(into, tmp.get_data(), sizeof(double) * 4 * 3);
             } else {
               *((LMatrix3d *)into) = tmp.get_upper_3();
             }
           });
         } else {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             LMatrix4f tmp;
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, tmp);
-            if (pad_rows) {
+            if (!packed) {
               memcpy(into, tmp.get_data(), sizeof(float) * 4 * 3);
             } else {
               *((LMatrix3f *)into) = tmp.get_upper_3();
@@ -375,10 +375,10 @@ make_shader_input(const ShaderType *type, CPT_InternalName name) {
         }
       } else {
         if (matrix->get_scalar_type() == ShaderType::ST_double) {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             LMatrix4d tmp;
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, tmp);
-            if (pad_rows) {
+            if (!packed) {
               memcpy(into, tmp.get_data(), num_rows * sizeof(double) * 4);
             } else {
               for (uint32_t i = 0; i < num_rows; ++i) {
@@ -387,10 +387,10 @@ make_shader_input(const ShaderType *type, CPT_InternalName name) {
             }
           });
         } else {
-          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool pad_rows) {
+          return ShaderInputBinding::make_data(dep, [=](const State &state, void *into, bool packed) {
             LMatrix4f tmp;
             state.gsg->get_target_shader_attrib()->get_shader_input_matrix(name, tmp);
-            if (pad_rows) {
+            if (!packed) {
               memcpy(into, tmp.get_data(), num_rows * sizeof(float) * 4);
             } else {
               for (uint32_t i = 0; i < num_rows; ++i) {
@@ -537,7 +537,7 @@ make_transform_table(const ShaderType *type, bool transpose) {
 
   if (num_rows == 4) {
     return ShaderInputBinding::make_data(Shader::D_vertex_data,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
 
       const TransformTable *table = state.gsg->get_data_reader()->get_transform_table();
       LMatrix4f *matrices = (LMatrix4f *)into;
@@ -564,7 +564,7 @@ make_transform_table(const ShaderType *type, bool transpose) {
     nassertr(transpose, nullptr);
 
     return ShaderInputBinding::make_data(Shader::D_vertex_data,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
 
       const TransformTable *table = state.gsg->get_data_reader()->get_transform_table();
       LVecBase4f *vectors = (LVecBase4f *)into;
@@ -608,7 +608,7 @@ make_slider_table(const ShaderType *type) {
   nassertr(element_type == ShaderType::float_type, nullptr);
 
   return ShaderInputBinding::make_data(Shader::D_vertex_data,
-                                       [=](const State &state, void *into, bool pad_rows) {
+                                       [=](const State &state, void *into, bool packed) {
 
     const SliderTable *table = state.gsg->get_data_reader()->get_slider_table();
     float *sliders = (float *)into;
@@ -630,13 +630,13 @@ static ShaderInputBinding *
 make_frame_time(const ShaderType *type) {
   if (type == ShaderType::float_type) {
     return ShaderInputBinding::make_data(Shader::D_frame,
-                                         [](const State &state, void *into, bool pad_rows) {
+                                         [](const State &state, void *into, bool packed) {
       *(float *)into = ClockObject::get_global_clock()->get_frame_time();
     });
   }
   if (type == ShaderType::double_type) {
     return ShaderInputBinding::make_data(Shader::D_frame,
-                                         [](const State &state, void *into, bool pad_rows) {
+                                         [](const State &state, void *into, bool packed) {
       *(double *)into = ClockObject::get_global_clock()->get_frame_time();
     });
   }
@@ -649,7 +649,7 @@ make_frame_time(const ShaderType *type) {
 static ShaderInputBinding *
 make_color(const ShaderType *type) {
   return ShaderInputBinding::make_data(Shader::D_color,
-                                       [](const State &state, void *into, bool pad_rows) {
+                                       [](const State &state, void *into, bool packed) {
 
     const ColorAttrib *target_color = (const ColorAttrib *)
       state.gsg->get_target_state()->get_attrib_def(ColorAttrib::get_class_slot());
@@ -667,7 +667,7 @@ make_color(const ShaderType *type) {
 static ShaderInputBinding *
 make_color_scale(const ShaderType *type) {
   return ShaderInputBinding::make_data(Shader::D_colorscale,
-                                       [](const State &state, void *into, bool pad_rows) {
+                                       [](const State &state, void *into, bool packed) {
 
     const ColorScaleAttrib *target_color_scale = (const ColorScaleAttrib *)
       state.gsg->get_target_state()->get_attrib_def(ColorScaleAttrib::get_class_slot());
@@ -733,7 +733,7 @@ make_texture_matrix(const ShaderType *type, size_t index, bool inverse, bool tra
   }
 
   return ShaderInputBinding::make_data(Shader::D_tex_matrix,
-                                       [=](const State &state, void *into, bool pad_rows) {
+                                       [=](const State &state, void *into, bool packed) {
 
     const TexMatrixAttrib *tma;
     const TextureAttrib *ta;
@@ -812,7 +812,7 @@ make_fog(const ShaderType *type) {
   }
 
   return ShaderInputBinding::make_data(Shader::D_fog | Shader::D_frame,
-                                       [=](const State &state, void *into, bool pad_rows) {
+                                       [=](const State &state, void *into, bool packed) {
 
     LVecBase4f color(1, 1, 1, 1);
     PN_stdfloat density = 0, start = 1, end = 1, scale = 1;
@@ -912,7 +912,7 @@ make_material(const ShaderType *type) {
   }
 
   return ShaderInputBinding::make_data(Shader::D_material | Shader::D_frame,
-                                       [=](const State &state, void *into, bool pad_rows) {
+                                       [=](const State &state, void *into, bool packed) {
 
     LVecBase4f base_color(0, 0, 0, 0);
     LVecBase4f ambient(1, 1, 1, 1);
@@ -957,7 +957,7 @@ make_material(const ShaderType *type) {
 static ShaderInputBinding *
 make_light_ambient(const ShaderType *type) {
   return ShaderInputBinding::make_data(Shader::D_frame | Shader::D_light,
-                                       [](const State &state, void *into, bool pad_rows) {
+                                       [](const State &state, void *into, bool packed) {
     const LightAttrib *target_light;
     if (state.gsg->get_target_state()->get_attrib(target_light) && target_light->has_any_on_light()) {
       *(LVecBase4f *)into = LCAST(float, target_light->get_ambient_contribution());
@@ -991,17 +991,17 @@ setup(Shader *shader) {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderMatrixBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   LMatrix4f m = LCAST(float, state.matrix_cache[_cache_index]);
   if (_transpose) {
     m.transpose_in_place();
   }
-  if (pad_rows || _num_cols == 4) {
-    memcpy(into, m.get_data(), _num_cols * 4 * sizeof(float));
-  } else {
+  if (packed && _num_cols != 4) {
     for (size_t i = 0; i < _num_rows; ++i) {
       memcpy((float *)into + i * _num_cols, m.get_data() + i * 4, _num_cols * sizeof(float));
     }
+  } else {
+    memcpy(into, m.get_data(), _num_cols * 4 * sizeof(float));
   }
 }
 
@@ -1029,19 +1029,19 @@ setup(Shader *shader) {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderMatrixComposeBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   LMatrix4f m;
   m.multiply(LCAST(float, state.matrix_cache[_cache_index0]),
              LCAST(float, state.matrix_cache[_cache_index1]));
   if (_transpose) {
     m.transpose_in_place();
   }
-  if (pad_rows || _num_cols == 4) {
-    memcpy(into, m.get_data(), _num_rows * 4 * sizeof(float));
-  } else {
+  if (packed && _num_cols != 4) {
     for (size_t i = 0; i < _num_rows; ++i) {
       memcpy((float *)into + i * _num_cols, m.get_data() + i * 4, _num_cols * sizeof(float));
     }
+  } else {
+    memcpy(into, m.get_data(), _num_rows * 4 * sizeof(float));
   }
 }
 
@@ -1071,7 +1071,7 @@ setup(Shader *shader) {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderPointParamsBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   const RenderModeAttrib *target_render_mode;
   state.gsg->get_target_state()->get_attrib_def(target_render_mode);
 
@@ -1112,7 +1112,7 @@ setup(Shader *shader) {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderPackedLightBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   // The light matrix contains COLOR, ATTENUATION, VIEWVECTOR, POSITION
   LVecBase4f *data = (LVecBase4f *)into;
 
@@ -1200,7 +1200,7 @@ setup(Shader *shader) {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderLegacyDirectionalLightBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   // The dlight matrix contains COLOR, SPECULAR, DIRECTION, PSEUDOHALFANGLE
   const NodePath &np = state.gsg->get_target_shader_attrib()->get_shader_input_nodepath(_input);
   nassertv(!np.is_empty());
@@ -1230,7 +1230,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderLegacyPointLightBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   // The plight matrix contains COLOR, SPECULAR, POINT, ATTENUATION
   const NodePath &np = state.gsg->get_target_shader_attrib()->get_shader_input_nodepath(_input);
   nassertv(!np.is_empty());
@@ -1257,7 +1257,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderLegacySpotlightBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   // The slight matrix contains COLOR, SPECULAR, POINT, DIRECTION
   const NodePath &np = state.gsg->get_target_shader_attrib()->get_shader_input_nodepath(_input);
   nassertv(!np.is_empty());
@@ -1398,7 +1398,7 @@ setup(Shader *shader) {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderLightStructBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   if (_input != nullptr) {
     // Fetch shader input.
     if (state.gsg->get_target_shader_attrib()->has_shader_input(_input)) {
@@ -1803,15 +1803,15 @@ get_state_dep() const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderFloatBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   Shader::ShaderPtrData ptr_data;
   if (!state.gsg->get_target_shader_attrib()->get_shader_input_ptr(_input, ptr_data)) {
     return;
   }
 
   int total_rows = std::min(_num_elements * _num_rows, (int)ptr_data._size / _num_cols);
-  if (total_rows == 1) {
-    pad_rows = false;
+  if (total_rows == 1 || _num_cols == 4) {
+    packed = true;
   }
 
   float *data = (float *)into;
@@ -1819,7 +1819,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
   switch (ptr_data._type) {
   case ShaderType::ST_int:
     // Convert int data to float data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (float)(((int *)ptr_data._ptr)[i]);
       }
@@ -1835,7 +1835,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
 
   case ShaderType::ST_uint:
     // Convert unsigned int data to float data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (float)(((unsigned int *)ptr_data._ptr)[i]);
       }
@@ -1851,7 +1851,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
 
   case ShaderType::ST_double:
     // Downgrade double data to float data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (float)(((double *)ptr_data._ptr)[i]);
       }
@@ -1866,7 +1866,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
     return;
 
   case ShaderType::ST_float:
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       // No conversion needed.
       memcpy(data, ptr_data._ptr, total_rows * _num_cols * sizeof(float));
       return;
@@ -1899,15 +1899,15 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderDoubleBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   Shader::ShaderPtrData ptr_data;
   if (!state.gsg->get_target_shader_attrib()->get_shader_input_ptr(_input, ptr_data)) {
     return;
   }
 
   int total_rows = std::min(_num_elements * _num_rows, (int)ptr_data._size / _num_cols);
-  if (total_rows == 1) {
-    pad_rows = false;
+  if (total_rows == 1 || _num_cols == 4) {
+    packed = true;
   }
 
   double *data = (double *)into;
@@ -1915,7 +1915,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
   switch (ptr_data._type) {
   case ShaderType::ST_int:
     // Convert int data to double data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (double)(((int *)ptr_data._ptr)[i]);
       }
@@ -1931,7 +1931,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
 
   case ShaderType::ST_uint:
     // Convert int data to double data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (double)(((unsigned int *)ptr_data._ptr)[i]);
       }
@@ -1946,7 +1946,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
     return;
 
   case ShaderType::ST_double:
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       // No conversion needed.
       //if (always_copy) {
         memcpy(data, ptr_data._ptr, total_rows * _num_cols * sizeof(double));
@@ -1966,7 +1966,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
 
   case ShaderType::ST_float:
     // Upgrade float data to double data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (double)(((float *)ptr_data._ptr)[i]);
       }
@@ -1993,7 +1993,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderIntBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   Shader::ShaderPtrData ptr_data;
   if (!state.gsg->get_target_shader_attrib()->get_shader_input_ptr(_input, ptr_data)) {
     return;
@@ -2008,11 +2008,11 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
   }
 
   int total_rows = std::min(_num_elements * _num_rows, (int)ptr_data._size / _num_cols);
-  if (total_rows == 1) {
-    pad_rows = false;
+  if (total_rows == 1 || _num_cols == 4) {
+    packed = true;
   }
 
-  if (!pad_rows || _num_cols == 4) {
+  if (packed) {
     memcpy(into, ptr_data._ptr, total_rows * _num_cols * sizeof(int));
   } else {
     int *data = (int *)into;
@@ -2029,15 +2029,15 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderBoolBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
   Shader::ShaderPtrData ptr_data;
   if (!state.gsg->get_target_shader_attrib()->get_shader_input_ptr(_input, ptr_data)) {
     return;
   }
 
   int total_rows = std::min(_num_elements * _num_rows, (int)ptr_data._size / _num_cols);
-  if (total_rows == 1) {
-    pad_rows = false;
+  if (total_rows == 1 || _num_cols == 4) {
+    packed = true;
   }
 
   uint32_t *data = (uint32_t *)into;
@@ -2047,7 +2047,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
   case ShaderType::ST_uint:
   case ShaderType::ST_bool:
     // Convert int data to bool data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (uint32_t)(((unsigned int *)ptr_data._ptr)[i] != 0);
       }
@@ -2063,7 +2063,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
 
   case ShaderType::ST_double:
     // Convert double data to bool data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (uint32_t)(((double *)ptr_data._ptr)[i] != 0.0);
       }
@@ -2079,7 +2079,7 @@ fetch_data(const State &state, void *into, bool pad_rows) const {
 
   case ShaderType::ST_float:
     // Convert float data to bool data.
-    if (!pad_rows || _num_cols == 4) {
+    if (packed) {
       for (int i = 0; i < total_rows * _num_cols; ++i) {
         data[i] = (uint32_t)(((float *)ptr_data._ptr)[i] != 0.0f);
       }
@@ -2115,9 +2115,12 @@ get_state_dep() const {
  * Fetches the part of the shader input that is plain numeric data.
  */
 void ShaderAggregateBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
+  // Note that the offsets are calculated for a non-packed layout.  That means
+  // we have too much padding if we want packed data.  It's probably not worth
+  // engineering a solution for that.
   for (const DataMember &member : _data_members) {
-    member._binding->fetch_data(state, (unsigned char *)into + member._offset, pad_rows);
+    member._binding->fetch_data(state, (unsigned char *)into + member._offset, packed);
   }
 }
 
@@ -2430,7 +2433,7 @@ make_binding_glsl(const InternalName *name, const ShaderType *type) {
       }
 
       return ShaderInputBinding::make_data(Shader::D_clip_planes | Shader::D_view_transform,
-                                           [=](const State &state, void *into, bool pad_rows) {
+                                           [=](const State &state, void *into, bool packed) {
 
         LPlanef *planes = (LPlanef *)into;
 
@@ -2468,7 +2471,7 @@ make_binding_glsl(const InternalName *name, const ShaderType *type) {
       type->unwrap_array(element_type, num_elements);
 
       return ShaderInputBinding::make_data(Shader::D_texture | Shader::D_frame,
-                                           [=](const State &state, void *into, bool pad_rows) {
+                                           [=](const State &state, void *into, bool packed) {
 
         const TextureAttrib *ta;
 
@@ -2593,13 +2596,13 @@ make_binding_glsl(const InternalName *name, const ShaderType *type) {
     else if (pieces[1] == "DeltaFrameTime") {
       if (type == ShaderType::float_type) {
         return ShaderInputBinding::make_data(Shader::D_frame,
-                                             [](const State &state, void *into, bool pad_rows) {
+                                             [](const State &state, void *into, bool packed) {
           *(float *)into = ClockObject::get_global_clock()->get_dt();
         });
       }
       else if (type == ShaderType::double_type) {
         return ShaderInputBinding::make_data(Shader::D_frame,
-                                             [](const State &state, void *into, bool pad_rows) {
+                                             [](const State &state, void *into, bool packed) {
           *(double *)into = ClockObject::get_global_clock()->get_dt();
         });
       }
@@ -2610,7 +2613,7 @@ make_binding_glsl(const InternalName *name, const ShaderType *type) {
     else if (pieces[1] == "FrameNumber") {
       if (type == ShaderType::int_type) {
         return ShaderInputBinding::make_data(Shader::D_frame,
-                                             [](const State &state, void *into, bool pad_rows) {
+                                             [](const State &state, void *into, bool packed) {
           *(int *)into = ClockObject::get_global_clock()->get_frame_count();
         });
       } else {
@@ -2915,7 +2918,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
         return nullptr;
       }
       return ShaderInputBinding::make_data(Shader::D_material | Shader::D_frame,
-                                           [=](const State &state, void *into, bool pad_rows) {
+                                           [=](const State &state, void *into, bool packed) {
 
         LVecBase4f &ambient = ((LVecBase4f *)into)[0];
         LVecBase4f &diffuse = ((LVecBase4f *)into)[1];
@@ -2955,7 +2958,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
         return nullptr;
       }
       return ShaderInputBinding::make_data(Shader::D_fog | Shader::D_frame,
-                                           [](const State &state, void *into, bool pad_rows) {
+                                           [](const State &state, void *into, bool packed) {
 
         LVecBase4f &params = *(LVecBase4f *)into;
 
@@ -2975,7 +2978,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
         return nullptr;
       }
       return ShaderInputBinding::make_data(Shader::D_fog | Shader::D_frame,
-                                           [](const State &state, void *into, bool pad_rows) {
+                                           [](const State &state, void *into, bool packed) {
 
         LVecBase4f &color = *(LVecBase4f *)into;
 
@@ -3007,7 +3010,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
         return nullptr;
       }
       return ShaderInputBinding::make_data(Shader::D_light | Shader::D_frame,
-                                           [=](const State &state, void *into, bool pad_rows) {
+                                           [=](const State &state, void *into, bool packed) {
 
         // We don't count ambient lights, which would be pretty silly to handle
         // via this mechanism.
@@ -3046,7 +3049,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
 
     CPT(InternalName) input = InternalName::make(pieces[1]);
     return ShaderInputBinding::make_data(Shader::D_shader_inputs | Shader::D_frame,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
       const NodePath &np = state.gsg->get_target_shader_attrib()->get_shader_input_nodepath(input);
       nassertv(!np.is_empty());
       Light *light = np.node()->as_light();
@@ -3063,7 +3066,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
 
     CPT(InternalName) input = InternalName::make(pieces[1]);
     return ShaderInputBinding::make_data(Shader::D_shader_inputs | Shader::D_frame,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
       const NodePath &np = state.gsg->get_target_shader_attrib()->get_shader_input_nodepath(input);
       nassertv(!np.is_empty());
       Light *light = np.node()->as_light();
@@ -3125,7 +3128,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
 
     int stage = atoi(pieces[1].c_str());
     return ShaderInputBinding::make_data(Shader::D_texture | Shader::D_tex_matrix,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
 
       const TextureAttrib *ta;
       const TexMatrixAttrib *tma;
@@ -3148,7 +3151,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
     // state change
     int stage = atoi(pieces[1].c_str());
     return ShaderInputBinding::make_data(Shader::D_texture | Shader::D_frame,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
 
       const TextureAttrib *ta;
       if (state.gsg->get_target_state()->get_attrib(ta) && stage < ta->get_num_on_stages()) {
@@ -3170,7 +3173,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
     // a state change
     int stage = atoi(pieces[1].c_str());
     return ShaderInputBinding::make_data(Shader::D_texture | Shader::D_tex_gen | Shader::D_frame,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
 
       const TextureAttrib *ta;
       const TexGenAttrib *tga;
@@ -3193,7 +3196,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
     // state change
     CPT(InternalName) input = InternalName::make(pieces[1]);
     return ShaderInputBinding::make_data(Shader::D_frame | Shader::D_shader_inputs,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
       const NodePath &np = state.gsg->get_target_shader_attrib()->get_shader_input_nodepath(name);
       nassertv(!np.is_empty());
       const PlaneNode *plane_node;
@@ -3212,7 +3215,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
     // state change
     int index = atoi(pieces[1].c_str());
     return ShaderInputBinding::make_data(Shader::D_clip_planes | Shader::D_frame,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
       const ClipPlaneAttrib *cpa;
       state.gsg->get_target_state()->get_attrib_def(cpa);
       if (index >= cpa->get_num_on_planes()) {
@@ -3244,7 +3247,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
         return nullptr;
       }
       return ShaderInputBinding::make_data(Shader::D_scene,
-                                           [=](const State &state, void *into, bool pad_rows) {
+                                           [=](const State &state, void *into, bool packed) {
         const DisplayRegion *region = state.gsg->get_current_display_region();
         *(LVecBase2f *)into = LCAST(float, region->get_pixel_size());
       });
@@ -3313,7 +3316,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
     }
     CPT(InternalName) input = InternalName::make(pieces[1]);
     return ShaderInputBinding::make_data(Shader::D_frame | Shader::D_shader_inputs,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
       Texture *tex = state.gsg->get_target_shader_attrib()->get_shader_input_texture(input);
       nassertv(tex != nullptr);
       int sx = tex->get_x_size() - tex->get_pad_x_size();
@@ -3333,7 +3336,7 @@ make_binding_cg(const InternalName *name, const ShaderType *type) {
     }
     CPT(InternalName) input = InternalName::make(pieces[1]);
     return ShaderInputBinding::make_data(Shader::D_frame | Shader::D_shader_inputs,
-                                         [=](const State &state, void *into, bool pad_rows) {
+                                         [=](const State &state, void *into, bool packed) {
       Texture *tex = state.gsg->get_target_shader_attrib()->get_shader_input_texture(input);
       nassertv(tex != nullptr);
       double px = 1.0 / tex->get_x_size();

+ 14 - 14
panda/src/display/shaderInputBinding_impls.h

@@ -31,7 +31,7 @@ public:
   virtual int get_state_dep() const override;
   virtual void setup(Shader *shader) override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
 protected:
   size_t _cache_index = 0;
@@ -57,7 +57,7 @@ public:
   virtual int get_state_dep() const override;
   virtual void setup(Shader *shader) override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
 private:
   size_t _cache_index0 = 0;
@@ -80,7 +80,7 @@ public:
   virtual int get_state_dep() const override;
   virtual void setup(Shader *shader) override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
 protected:
   size_t _cache_index = 0;
@@ -96,7 +96,7 @@ public:
   virtual int get_state_dep() const override;
   virtual void setup(Shader *shader) override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
 protected:
   size_t _index;
@@ -129,7 +129,7 @@ class EXPCL_PANDA_DISPLAY ShaderLegacyDirectionalLightBinding : public ShaderLeg
 public:
   using ShaderLegacyLightBinding::ShaderLegacyLightBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -139,7 +139,7 @@ class EXPCL_PANDA_DISPLAY ShaderLegacyPointLightBinding : public ShaderLegacyLig
 public:
   using ShaderLegacyLightBinding::ShaderLegacyLightBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -149,7 +149,7 @@ class EXPCL_PANDA_DISPLAY ShaderLegacySpotlightBinding : public ShaderLegacyLigh
 public:
   using ShaderLegacyLightBinding::ShaderLegacyLightBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -163,7 +163,7 @@ public:
   virtual int get_state_dep() const override;
   virtual void setup(Shader *shader) override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
   virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
   virtual PT(Texture) fetch_texture(const State &state,
@@ -258,7 +258,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override=0;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override=0;
 
 protected:
   CPT_InternalName _input;
@@ -274,7 +274,7 @@ class EXPCL_PANDA_DISPLAY ShaderFloatBinding : public ShaderDataBinding {
 public:
   using ShaderDataBinding::ShaderDataBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -284,7 +284,7 @@ class EXPCL_PANDA_DISPLAY ShaderDoubleBinding : public ShaderDataBinding {
 public:
   using ShaderDataBinding::ShaderDataBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -294,7 +294,7 @@ class EXPCL_PANDA_DISPLAY ShaderIntBinding : public ShaderDataBinding {
 public:
   using ShaderDataBinding::ShaderDataBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -304,7 +304,7 @@ class EXPCL_PANDA_DISPLAY ShaderBoolBinding : public ShaderDataBinding {
 public:
   using ShaderDataBinding::ShaderDataBinding;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 };
 
 /**
@@ -317,7 +317,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual void fetch_data(const State &state, void *into, bool pad_rows) const override;
+  virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
   virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
   virtual PT(Texture) fetch_texture(const State &state,

+ 5 - 3
panda/src/dxgsg9/dxShaderContext9.cxx

@@ -364,8 +364,10 @@ query_constants(const ShaderModule *module, DWORD *data) {
           _constant_deps |= binding._dep;
           _data_bindings.push_back(std::move(binding));
 
-          // Pad space to 16-byte boundary
-          uint32_t size = param._type->get_size_bytes(true);
+          // Pad space to 16-byte boundary, since DX9 wants everything as a
+          // vec4, and otherwise we may end up copying out-of-bounds data if
+          // the last field is smaller than a vec4.
+          uint32_t size = param._type->get_size_bytes();
           size = (size + 15) & ~15;
           _scratch_space_size += size;
         }
@@ -620,7 +622,7 @@ issue_parameters(GSG *gsg, int altered) {
 
     for (const Binding &binding : _data_bindings) {
       if (altered & binding._dep) {
-        binding._binding->fetch_data(state, scratch + binding._offset, true);
+        binding._binding->fetch_data(state, scratch + binding._offset, false);
       }
     }
 

+ 17 - 3
panda/src/glstuff/glShaderContext_src.cxx

@@ -131,8 +131,22 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext
       _uniform_data_deps |= block._dep;
       _uniform_blocks.push_back(std::move(block));
 
-      // Pad space to 16-byte boundary
-      uint32_t size = param._type->get_size_bytes();
+      // We ideally want the tightly packed size, since we are not using UBOs
+      // and the regular glUniform calls use tight packing.
+      uint32_t size;
+      ShaderType::ScalarType scalar_type;
+      uint32_t num_elements;
+      uint32_t num_rows;
+      uint32_t num_cols;
+      if (param._type->as_scalar_type(scalar_type, num_elements, num_rows, num_cols)) {
+        size = num_elements * num_rows * num_cols * ShaderType::get_scalar_size_bytes(scalar_type);
+      } else {
+        // If it's a struct, we just use the regular size.  It's too much, but
+        // since we're using the original offsets from the struct, I can't be
+        // bothered right now to write code to repack the entire struct.
+        size = param._type->get_size_bytes();
+      }
+
       size = (size + 15) & ~15;
       _scratch_space_size = std::max(_scratch_space_size, (size_t)size);
     }
@@ -1457,7 +1471,7 @@ issue_parameters(int altered) {
       }
 
       for (const UniformBlock::Binding &binding : block._bindings) {
-        binding._binding->fetch_data(state, scratch + binding._offset, false);
+        binding._binding->fetch_data(state, scratch + binding._offset, true);
       }
 
       for (const UniformBlock::Call &call : block._matrices) {

+ 2 - 2
panda/src/gobj/shaderInputBinding.I

@@ -29,8 +29,8 @@ make_data(int dep, Callable callable) {
       return _dep;
     }
 
-    virtual void fetch_data(const State &state, void *scratch, bool pad_rows) const override final {
-      _callable(state, scratch, pad_rows);
+    virtual void fetch_data(const State &state, void *into, bool packed) const override final {
+      _callable(state, into, packed);
     }
 
   private:

+ 3 - 1
panda/src/gobj/shaderInputBinding.cxx

@@ -54,9 +54,11 @@ setup(Shader *shader) {
 
 /**
  * Fetches the part of the shader input that is plain numeric data.
+ * If packed is true, the data is tightly packed, even if the type originally
+ * contained padding.
  */
 void ShaderInputBinding::
-fetch_data(const State &state, void *into, bool pad_rows) const {
+fetch_data(const State &state, void *into, bool packed) const {
 }
 
 /**

+ 1 - 1
panda/src/gobj/shaderInputBinding.h

@@ -52,7 +52,7 @@ public:
   };
 
   virtual void fetch_data(const State &state, void *into,
-                          bool pad_rows = false) const;
+                          bool packed = false) const;
 
   typedef uintptr_t ResourceId;
   virtual ResourceId get_resource_id(int index, const ShaderType *type) const;

+ 8 - 0
panda/src/gobj/shaderType.I

@@ -52,6 +52,14 @@ compare_to(const ShaderType &other) const {
   return (this_type > other_type) - (this_type < other_type);
 }
 
+/**
+ * Returns the size of the given scalar type, in bytes.
+ */
+INLINE constexpr uint32_t ShaderType::
+get_scalar_size_bytes(ScalarType scalar_type) {
+  return (scalar_type == ST_double) ? 8 : 4;
+}
+
 /**
  * Constructs a scalar type.
  */

+ 64 - 53
panda/src/gobj/shaderType.cxx

@@ -151,29 +151,6 @@ std::ostream &operator << (std::ostream &out, ShaderType::ScalarType scalar_type
 }
 
 #ifndef CPPPARSER
-/**
- * Returns the size in bytes of this type in memory, if applicable.  Opaque
- * types will return 0.
- */
-int ShaderType::
-get_size_bytes(bool pad_rows) const {
-  ScalarType type;
-  uint32_t dim[3];
-  if (as_scalar_type(type, dim[0], dim[1], dim[2])) {
-    if (pad_rows) {
-      // std140 array element padding rules, also used in DX9.
-      dim[2] = (dim[2] + 3) & ~3;
-    }
-    if (type == ST_double) {
-      return 8 * dim[0] * dim[1] * dim[2];
-    } else {
-      return 4 * dim[0] * dim[1] * dim[2];
-    }
-  } else {
-    return 0;
-  }
-}
-
 /**
  *
  */
@@ -257,9 +234,18 @@ compare_to_impl(const ShaderType &other) const {
 /**
  * Returns the alignment in bytes of this type in memory, if applicable.
  */
-int ShaderType::Scalar::
+uint32_t ShaderType::Scalar::
 get_align_bytes() const {
-  return (_scalar_type == ST_double) ? 8 : 4;
+  return get_scalar_size_bytes(_scalar_type);
+}
+
+/**
+ * Returns the size in bytes of this type in memory, if applicable.  Opaque
+ * types will return 0.
+ */
+uint32_t ShaderType::Scalar::
+get_size_bytes() const {
+  return get_scalar_size_bytes(_scalar_type);
 }
 
 /**
@@ -327,11 +313,7 @@ replace_scalar_type(ScalarType a, ScalarType b) const {
  */
 int ShaderType::Vector::
 get_num_interface_locations() const {
-  if (_scalar_type == ST_double && _num_components > 2) {
-    return 2;
-  } else {
-    return 1;
-  }
+  return (get_scalar_size_bytes(_scalar_type) * _num_components + 15) / 16;
 }
 
 /**
@@ -359,10 +341,20 @@ compare_to_impl(const ShaderType &other) const {
 /**
  * Returns the alignment in bytes of this type in memory, if applicable.
  */
-int ShaderType::Vector::
+uint32_t ShaderType::Vector::
 get_align_bytes() const {
-  int component_align = (_scalar_type == ST_double) ? 8 : 4;
-  return component_align * ((_num_components == 3) ? 4 : _num_components);
+  return get_scalar_size_bytes(_scalar_type) * ((_num_components == 3) ? 4 : _num_components);
+}
+
+/**
+ * Returns the size in bytes of this type in memory, if applicable.  Opaque
+ * types will return 0.
+ */
+uint32_t ShaderType::Vector::
+get_size_bytes() const {
+  // Notably, a vec3 is vec4-aligned but not padded!  It is permissible for a
+  // scalar to directly follow a vec3 in a struct.
+  return get_scalar_size_bytes(_scalar_type) * _num_components;
 }
 
 /**
@@ -454,11 +446,21 @@ compare_to_impl(const ShaderType &other) const {
 /**
  * Returns the alignment in bytes of this type in memory, if applicable.
  */
-int ShaderType::Matrix::
+uint32_t ShaderType::Matrix::
 get_align_bytes() const {
-  //TODO: needs to be checked
-  int row_align = (_scalar_type == ST_double) ? 32 : 16;
-  return row_align * _num_rows;
+  return get_scalar_size_bytes(_scalar_type) * 4;
+}
+
+/**
+ * Returns the size in bytes of this type in memory, if applicable.  Opaque
+ * types will return 0.
+ */
+uint32_t ShaderType::Matrix::
+get_size_bytes() const {
+  // Pad rows to 16 bytes (std140 rules, but DX9 also expects that)
+  uint32_t row_size = _num_columns * get_scalar_size_bytes(_scalar_type);
+  row_size = (row_size + 15) & ~15;
+  return _num_rows * row_size;
 }
 
 /**
@@ -508,7 +510,7 @@ add_member(const ShaderType *type, std::string name) {
   member.type = type;
   member.name = std::move(name);
   member.offset = _members.empty() ? 0 : _members.back().offset + _members.back().type->get_size_bytes();
-  int alignment = type->get_align_bytes();
+  uint32_t alignment = type->get_align_bytes();
   if (alignment > 0) {
     member.offset += alignment - ((member.offset + (alignment - 1)) % alignment) - 1;
   }
@@ -563,11 +565,12 @@ contains_scalar_type(ScalarType type) const {
 const ShaderType *ShaderType::Struct::
 replace_scalar_type(ScalarType a, ScalarType b) const {
   if (contains_scalar_type(a)) {
+    bool recompute_offsets = get_scalar_size_bytes(a) != get_scalar_size_bytes(b);
+
     ShaderType::Struct copy;
     for (const Member &member : _members) {
       const ShaderType *type = member.type->replace_scalar_type(a, b);
-      if ((a == ST_double) != (b == ST_double)) {
-        // Recompute offsets.
+      if (recompute_offsets) {
         copy.add_member(type, member.name);
       } else {
         copy.add_member(type, member.name, member.offset);
@@ -627,22 +630,24 @@ compare_to_impl(const ShaderType &other) const {
 /**
  * Returns the alignment in bytes of this type in memory, if applicable.
  */
-int ShaderType::Struct::
+uint32_t ShaderType::Struct::
 get_align_bytes() const {
-  int align = 16;
+  uint32_t align = 16;
   for (const Member &member : _members) {
     align = std::max(align, member.type->get_align_bytes());
   }
-  return align;
+  return (align + 15) & ~15;
 }
 
 /**
  * Returns the size in bytes of this type in memory, if applicable.  Opaque
  * types will return 0.
  */
-int ShaderType::Struct::
-get_size_bytes(bool pad_rows) const {
-  return _members.empty() ? 0 : _members.back().offset + _members.back().type->get_size_bytes();
+uint32_t ShaderType::Struct::
+get_size_bytes() const {
+  // Structs are padded to the base alignment of a vec4.
+  uint32_t size = _members.empty() ? 0 : _members.back().offset + _members.back().type->get_size_bytes();
+  return (size + 15) & ~15;
 }
 
 /**
@@ -830,26 +835,32 @@ compare_to_impl(const ShaderType &other) const {
 /**
  * Returns the array stride in bytes.
  */
-int ShaderType::Array::
+uint32_t ShaderType::Array::
 get_stride_bytes() const {
-  int element_size = _element_type->get_size_bytes(true);
-  return (element_size + 15) & ~15;
+  // Array stride is always (at least) 16 bytes in std140 / DX9, even though
+  // this is (indeed) incredibly wasteful for arrays of scalars.
+  uint32_t size = _element_type->get_size_bytes();
+  return (size + 15) & ~15;
 }
 
 /**
  * Returns the alignment in bytes of this type in memory, if applicable.
  */
-int ShaderType::Array::
+uint32_t ShaderType::Array::
 get_align_bytes() const {
-  return get_stride_bytes();
+  uint32_t align = _element_type->get_align_bytes();
+  return (align + 15) & ~15;
 }
 
 /**
  * Returns the size in bytes of this type in memory, if applicable.  Opaque
  * types will return 0.
  */
-int ShaderType::Array::
-get_size_bytes(bool pad_rows) const {
+uint32_t ShaderType::Array::
+get_size_bytes() const {
+  // Arrays have padding at the end so that the next member is aligned to a
+  // 16-byte boundary.  This implies that a float may directly follow a vec3,
+  // but not a vec3[1]!  I didn't make up these rules.
   return get_stride_bytes() * _num_elements;
 }
 

+ 15 - 10
panda/src/gobj/shaderType.h

@@ -35,8 +35,8 @@ public:
 
   virtual void output(std::ostream &out) const=0;
 
-  virtual int get_align_bytes() const { return 1; }
-  virtual int get_size_bytes(bool pad_rows = false) const;
+  virtual uint32_t get_align_bytes() const { return 1; }
+  virtual uint32_t get_size_bytes() const { return 0; }
   virtual int get_num_interface_locations() const { return 1; }
   virtual int get_num_parameter_locations() const { return 1; }
   virtual int get_num_resources() const { return 0; }
@@ -100,6 +100,8 @@ public:
                               uint32_t &num_columns) const { return false; }
   virtual const ShaderType *replace_scalar_type(ScalarType a, ScalarType b) const { return this; }
 
+  INLINE static constexpr uint32_t get_scalar_size_bytes(ScalarType scalar_type);
+
   virtual const Scalar *as_scalar() const { return nullptr; }
   virtual const Vector *as_vector() const { return nullptr; }
   virtual const Matrix *as_matrix() const { return nullptr; }
@@ -194,7 +196,8 @@ public:
 private:
   virtual int compare_to_impl(const ShaderType &other) const override;
 
-  virtual int get_align_bytes() const override;
+  virtual uint32_t get_align_bytes() const override;
+  virtual uint32_t get_size_bytes() const override;
 
   const ScalarType _scalar_type;
 
@@ -241,7 +244,8 @@ public:
 private:
   virtual int compare_to_impl(const ShaderType &other) const override;
 
-  virtual int get_align_bytes() const override;
+  virtual uint32_t get_align_bytes() const override;
+  virtual uint32_t get_size_bytes() const override;
 
   const ScalarType _scalar_type;
   const uint32_t _num_components;
@@ -289,7 +293,8 @@ public:
 private:
   virtual int compare_to_impl(const ShaderType &other) const override;
 
-  virtual int get_align_bytes() const override;
+  virtual uint32_t get_align_bytes() const override;
+  virtual uint32_t get_size_bytes() const override;
 
   const ScalarType _scalar_type;
   const uint32_t _num_rows;
@@ -328,8 +333,8 @@ public:
   virtual void output(std::ostream &out) const override;
   virtual int compare_to_impl(const ShaderType &other) const override;
 
-  virtual int get_align_bytes() const override;
-  virtual int get_size_bytes(bool pad_rows = false) const override;
+  virtual uint32_t get_align_bytes() const override;
+  virtual uint32_t get_size_bytes() const override;
   virtual int get_num_interface_locations() const override;
   virtual int get_num_parameter_locations() const override;
   virtual int get_num_resources() const override;
@@ -392,9 +397,9 @@ public:
   virtual void output(std::ostream &out) const override;
   virtual int compare_to_impl(const ShaderType &other) const override;
 
-  int get_stride_bytes() const;
-  virtual int get_align_bytes() const override;
-  virtual int get_size_bytes(bool pad_rows = false) const override;
+  uint32_t get_stride_bytes() const;
+  virtual uint32_t get_align_bytes() const override;
+  virtual uint32_t get_size_bytes() const override;
   virtual int get_num_interface_locations() const override;
   virtual int get_num_parameter_locations() const override;
   virtual int get_num_resources() const override;

+ 0 - 341
panda/src/pgraph/shaderAttrib.cxx

@@ -480,347 +480,6 @@ get_shader_input_ptr(const InternalName *id, Shader::ShaderPtrData &data) const
   }
 }
 
-/**
- * Extracts the shader input data according to the given type expected by the
- * shader.  Returns the number of bytes written to "into".
- */
-size_t ShaderAttrib::
-get_shader_input_data(const InternalName *id, void *into,
-                      const ShaderType *type, bool pad_rows) const {
-  ShaderType::ScalarType scalar_type;
-  uint32_t num_elements;
-  uint32_t num_rows;
-  uint32_t num_columns;
-  if (type->as_scalar_type(scalar_type, num_elements, num_rows, num_columns)) {
-    Shader::ShaderPtrData data;
-    get_shader_input_data(id, into, scalar_type, num_elements, num_rows, num_columns, pad_rows, true);
-    return num_elements * num_rows * (pad_rows ? 16 : num_columns * 4);
-  }
-  else if (const ShaderType::Array *array_type = type->as_array()) {
-    size_t basename_size = id->get_basename().size();
-    char *buffer = (char *)alloca(basename_size + 14);
-    memcpy(buffer, id->get_basename().c_str(), basename_size);
-
-    size_t total_size = 0;
-    for (size_t i = 0; i < array_type->get_num_elements(); ++i) {
-      sprintf(buffer + basename_size, "[%d]", (int)i);
-
-      size_t size = get_shader_input_data(id->get_parent()->append(buffer), into, array_type->get_element_type(), pad_rows);
-      into = (char *)into + size;
-      total_size += size;
-    }
-    return total_size;
-  }
-  else if (const ShaderType::Struct *struct_type = type->as_struct()) {
-    size_t total_size = 0;
-    for (size_t i = 0; i < struct_type->get_num_members(); ++i) {
-      const ShaderType::Struct::Member &member = struct_type->get_member(i);
-
-      size_t size = get_shader_input_data(((InternalName *)id)->append(member.name), (char *)into + member.offset, member.type, pad_rows);
-      total_size += size;
-    }
-    return total_size;
-  }
-  else {
-    return 0;
-  }
-}
-
-/**
- * Extracts the shader input data, converting it as necessary.  The scratch
- * pointer must be large enough to contain the data, but may or may not be
- * filled by this function (depending on whether conversion is needed), unless
- * always_copy is true.
- */
-void *ShaderAttrib::
-get_shader_input_data(const InternalName *id, void *scratch,
-                      ShaderType::ScalarType scalar_type, int num_elements,
-                      int num_rows, int num_columns, bool pad_rows,
-                      bool always_copy) const {
-  Shader::ShaderPtrData ptr_data;
-  if (!get_shader_input_ptr(id, ptr_data)) {
-    return nullptr;
-  }
-
-  int total_rows = std::min(num_elements * num_rows, (int)ptr_data._size / num_columns);
-  if (total_rows == 1) {
-    pad_rows = false;
-  }
-  switch (scalar_type) {
-  case ShaderType::ST_float:
-    {
-      float *data = (float *)scratch;
-
-      switch (ptr_data._type) {
-      case ShaderType::ST_int:
-        // Convert int data to float data.
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = (float)(((int *)ptr_data._ptr)[i]);
-          }
-        } else {
-          const int *from_data = (const int *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (float)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_uint:
-        // Convert unsigned int data to float data.
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = (float)(((unsigned int *)ptr_data._ptr)[i]);
-          }
-        } else {
-          const unsigned int *from_data = (const unsigned int *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (float)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_double:
-        // Downgrade double data to float data.
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = (float)(((double *)ptr_data._ptr)[i]);
-          }
-        } else {
-          const double *from_data = (const double *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (float)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_float:
-        if (!pad_rows || num_columns == 4) {
-          // No conversion needed.
-          if (always_copy) {
-            memcpy(data, ptr_data._ptr, total_rows * num_columns * sizeof(float));
-            return data;
-          } else {
-            return (float *)ptr_data._ptr;
-          }
-        } else {
-          const float *from_data = (const float *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (float)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      default:
-#ifndef NDEBUG
-        pgraph_cat.error()
-          << "Invalid ShaderPtrData type " << (int)ptr_data._type
-          << " for shader input '" << *id << "'\n";
-#endif
-        return nullptr;
-      }
-
-      return data;
-    }
-    break;
-
-  case ShaderType::ST_int:
-    if (ptr_data._type != ShaderType::ST_int &&
-        ptr_data._type != ShaderType::ST_uint &&
-        ptr_data._type != ShaderType::ST_bool) {
-      pgraph_cat.error()
-        << "Cannot pass floating-point data to integer shader input '" << *id << "'\n";
-      return nullptr;
-    }
-    else if (always_copy) {
-      memcpy(scratch, ptr_data._ptr, total_rows * num_columns * sizeof(int));
-      return scratch;
-    }
-    else {
-      return ptr_data._ptr;
-    }
-    break;
-
-  case ShaderType::ST_uint:
-    if (ptr_data._type != ShaderType::ST_uint &&
-        ptr_data._type != ShaderType::ST_int &&
-        ptr_data._type != ShaderType::ST_bool) {
-      pgraph_cat.error()
-        << "Cannot pass floating-point data to integer shader input '" << *id << "'\n";
-      return nullptr;
-    }
-    else if (always_copy) {
-      memcpy(scratch, ptr_data._ptr, total_rows * num_columns * sizeof(unsigned int));
-      return scratch;
-    }
-    else {
-      return ptr_data._ptr;
-    }
-    break;
-
-  case ShaderType::ST_double:
-    {
-      double *data = (double *)scratch;
-
-      switch (ptr_data._type) {
-      case ShaderType::ST_int:
-        // Convert int data to double data.
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = (double)(((int *)ptr_data._ptr)[i]);
-          }
-        } else {
-          const int *from_data = (const int *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (double)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_uint:
-        // Convert int data to double data.
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = (double)(((unsigned int *)ptr_data._ptr)[i]);
-          }
-        } else {
-          const int *from_data = (const int *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (double)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_double:
-        if (!pad_rows || num_columns == 4) {
-          // No conversion needed.
-          if (always_copy) {
-            memcpy(data, ptr_data._ptr, total_rows * num_columns * sizeof(double));
-            return data;
-          } else {
-            return (double *)ptr_data._ptr;
-          }
-        } else {
-          const double *from_data = (const double *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (double)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_float:
-        // Upgrade float data to double data.
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = (double)(((float *)ptr_data._ptr)[i]);
-          }
-        } else {
-          const float *from_data = (const float *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (double)*from_data++;
-            }
-          }
-        }
-        return data;
-
-      default:
-  #ifndef NDEBUG
-        pgraph_cat.error()
-          << "Invalid ShaderPtrData type " << (int)ptr_data._type
-          << " for shader input '" << *id << "'\n";
-  #endif
-        return nullptr;
-      }
-
-      return data;
-    }
-    break;
-
-  case ShaderType::ST_bool:
-    {
-      unsigned int *data = (unsigned int *)scratch;
-
-      switch (ptr_data._type) {
-      case ShaderType::ST_int:
-      case ShaderType::ST_uint:
-      case ShaderType::ST_bool:
-        if (!pad_rows || num_columns == 4) {
-          // No conversion needed.
-          if (always_copy) {
-            memcpy(data, ptr_data._ptr, total_rows * num_columns * sizeof(unsigned int));
-            return data;
-          } else {
-            return (unsigned int *)ptr_data._ptr;
-          }
-        } else {
-          // Pad out rows.
-          const unsigned int *from_data = (const unsigned int *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (*from_data++) != 0;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_double:
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = ((double *)ptr_data._ptr)[i] != 0.0;
-          }
-        } else {
-          const double *from_data = (const double *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (*from_data++) != 0.0;
-            }
-          }
-        }
-        return data;
-
-      case ShaderType::ST_float:
-        if (!pad_rows || num_columns == 4) {
-          for (int i = 0; i < total_rows * num_columns; ++i) {
-            data[i] = ((float *)ptr_data._ptr)[i] != 0.0f;
-          }
-        } else {
-          const float *from_data = (const float *)ptr_data._ptr;
-          for (int i = 0; i < total_rows; ++i) {
-            for (int c = 0; c < num_columns; ++c) {
-              data[i * 4 + c] = (*from_data++) != 0.0f;
-            }
-          }
-        }
-        return data;
-
-      default:
-        break;
-      }
-    }
-    break;
-
-  case ShaderType::ST_unknown:
-    break;
-  }
-
-  return nullptr;
-}
-
 /**
  * Returns the ShaderInput as a texture.  Assertion fails if there is none, or
  * if it is not a texture.

+ 0 - 6
panda/src/pgraph/shaderAttrib.h

@@ -124,12 +124,6 @@ PUBLISHED:
   bool get_shader_input_ptr(const InternalName *id, Shader::ShaderPtrData &data) const;
   const LMatrix4f &get_shader_input_matrix(const InternalName *id, LMatrix4f &matrix) const;
   const LMatrix4d &get_shader_input_matrix(const InternalName *id, LMatrix4d &matrix) const;
-  size_t get_shader_input_data(const InternalName *id, void *into,
-                               const ShaderType *type, bool pad_rows) const;
-  void *get_shader_input_data(const InternalName *id, void *scratch,
-                              ShaderType::ScalarType scalar_type, int num_elements,
-                              int num_rows, int num_columns, bool pad_rows,
-                              bool always_copy=false) const;
   ShaderBuffer *get_shader_input_buffer(const InternalName *id) const;
 
 PUBLISHED:

+ 5 - 3
panda/src/shaderpipeline/spirVResultDatabase.cxx

@@ -201,11 +201,13 @@ parse_instruction(spv::Op opcode, const uint32_t *args, uint32_t nargs, uint32_t
 
   case spv::OpTypeMatrix:
     {
-      const ShaderType::Vector *column_type;
-      DCAST_INTO_V(column_type, _defs[args[1]]._type);
+      // SPIR-V uses GLSL parlance, in which a column is a row and a row is a
+      // column, compared to Panda conventions.  We flip it around here.
+      const ShaderType::Vector *row_type;
+      DCAST_INTO_V(row_type, _defs[args[1]]._type);
       uint32_t num_rows = args[2];
       record_type(args[0], ShaderType::register_type(
-        ShaderType::Matrix(column_type->get_scalar_type(), num_rows, column_type->get_num_components())));
+        ShaderType::Matrix(row_type->get_scalar_type(), num_rows, row_type->get_num_components())));
       _defs[args[0]]._type_id = args[1];
     }
     break;

+ 1 - 1
panda/src/shaderpipeline/spirVTransformPass.cxx

@@ -1001,7 +1001,7 @@ r_annotate_struct_layout(uint32_t type_id) {
 
     if (const ShaderType::Matrix *matrix_type = base_type->as_matrix()) {
       // Matrix types need to be explicitly laid out.
-      uint32_t stride = (matrix_type->get_scalar_type() == ShaderType::ST_double) ? 32u : 16u;
+      uint32_t stride = ShaderType::get_scalar_size_bytes(matrix_type->get_scalar_type()) * 4;
       add_annotation(spv::OpMemberDecorate,
         {type_id, i, spv::DecorationMatrixStride, stride});
       add_annotation(spv::OpMemberDecorate,