Browse Source

dxgsg9: Support hardware instancing in Direct3D 9

Includes support for matrix vertex inputs and proper default values for transform_index and instance_matrix varying inputs
rdb 5 years ago
parent
commit
a2116ef353

+ 84 - 40
panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx

@@ -133,7 +133,8 @@ DXGraphicsStateGuardian9(GraphicsEngine *engine, GraphicsPipe *pipe) :
 
   _last_fvf = 0;
   _num_bound_streams = 0;
-  _color_vbuffer = nullptr;
+  _instancing_enabled = false;
+  _constant_vbuffer = nullptr;
 
   _vertex_shader_version_major = 0;
   _vertex_shader_version_minor = 0;
@@ -1051,8 +1052,12 @@ end_scene() {
   _d3d_device->SetVertexDeclaration(nullptr);
   for (int array_index = 0; array_index < _num_bound_streams; ++array_index) {
     _d3d_device->SetStreamSource(array_index, nullptr, 0, 0);
+    if (_instancing_enabled) {
+      _d3d_device->SetStreamSourceFreq(array_index, 1);
+    }
   }
   _num_bound_streams = 0;
+  _instancing_enabled = false;
 
   if (_texture_binding_shader_context != 0) {
     _texture_binding_shader_context->disable_shader_texture_bindings(this);
@@ -1253,6 +1258,13 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
       return false;
     }
 
+    if (num_instances == 1 && _instancing_enabled) {
+      // Reset the divisors.
+      for (size_t i = 0; i < _num_bound_streams; ++i) {
+        _d3d_device->SetStreamSourceFreq(i, 1);
+      }
+    }
+
     // Prepare and bind the vertex buffers.
     size_t num_arrays = _data_reader->get_num_arrays();
     size_t i;
@@ -1266,37 +1278,49 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
       // Get the vertex buffer for this array.
       DXVertexBufferContext9 *dvbc;
       if (!setup_array_data(dvbc, array_reader, force)) {
-        dxgsg9_cat.error() << "Unable to setup vertex buffer for array " << i << "\n";
+        dxgsg9_cat.error()
+          << "Unable to setup vertex buffer for array " << i << "\n";
         _d3d_device->SetStreamSource(i, nullptr, 0, 0);
         continue;
       }
 
-      // Bind this array as the data source for the corresponding stream.
+      // Determine stride and divisor.
       const GeomVertexArrayFormat *array_format = array_reader->get_array_format();
-      hr = device->SetStreamSource(i, dvbc->_vbuffer, 0, array_format->get_stride());
+      int stride = array_format->get_stride();
+      int divisor = array_format->get_divisor();
+      if (num_instances == 1) {
+        if (divisor != 0) {
+          // With only one instance, this is equivalent.
+          stride = 0;
+        }
+      }
+      else if (divisor == 0) {
+        _d3d_device->SetStreamSourceFreq(i, D3DSTREAMSOURCE_INDEXEDDATA | num_instances);
+      }
+      else {
+        _d3d_device->SetStreamSourceFreq(i, D3DSTREAMSOURCE_INSTANCEDATA | divisor);
+      }
+
+      // Bind this array as the data source for the corresponding stream.
+      hr = device->SetStreamSource(i, dvbc->_vbuffer, 0, stride);
       if (FAILED(hr)) {
         dxgsg9_cat.error() << "SetStreamSource failed" << D3DERRORSTRING(hr);
       }
     }
 
-    if (format->get_color_array_index() < 0 && _current_shader_context->uses_vertex_color()) {
+    // The bit after the last array is set if the shader context wants us to
+    // bind the vertex buffer containing constants.
+    if (used_streams.get_bit(i)) {
       // Has no vertex colors, so bind a vertex buffer with stride 0 and write
       // our desired color value to it.
-      LPDIRECT3DVERTEXBUFFER9 vbuffer = get_color_vbuffer();
-      D3DCOLOR *local_pointer;
-      hr = vbuffer->Lock(0, sizeof(D3DCOLOR), (void **) &local_pointer, D3DLOCK_DISCARD);
-      if (FAILED(hr)) {
-        dxgsg9_cat.error()
-          << "VertexBuffer::Lock failed" << D3DERRORSTRING(hr);
-        return false;
-      }
-      *local_pointer = LColor_to_D3DCOLOR(_scene_graph_color);
-      vbuffer->Unlock();
-
+      LPDIRECT3DVERTEXBUFFER9 vbuffer = get_constant_vbuffer(_scene_graph_color);
       hr = device->SetStreamSource(i, vbuffer, 0, 0);
       if (FAILED(hr)) {
         dxgsg9_cat.error() << "SetStreamSource failed" << D3DERRORSTRING(hr);
       }
+      if (num_instances != 1) {
+        _d3d_device->SetStreamSourceFreq(i, (D3DSTREAMSOURCE_INSTANCEDATA | 1));
+      }
       ++i;
     }
 
@@ -1306,6 +1330,7 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
     }
 
     _num_bound_streams = used_streams.get_highest_on_bit() + 1;
+    _instancing_enabled = (num_instances != 1);
 
     // Update transform/slider tables.
     _current_shader_context->update_tables(this, _data_reader);
@@ -2306,6 +2331,8 @@ reset() {
                            | ShaderModule::C_matrix_non_square
                            | ShaderModule::C_integer
                            | ShaderModule::C_texture_lod;
+
+    _supports_geometry_instancing = true;
   }
 
   _vertex_shader_profile = (char *) D3DXGetVertexShaderProfile (_d3d_device);
@@ -4551,9 +4578,9 @@ reset_d3d_device(D3DPRESENT_PARAMETERS *presentation_params,
     release_all_vertex_buffers();
     release_all_index_buffers();
 
-    if (_color_vbuffer != nullptr) {
-      _color_vbuffer->Release();
-      _color_vbuffer = nullptr;
+    if (_constant_vbuffer != nullptr) {
+      _constant_vbuffer->Release();
+      _constant_vbuffer = nullptr;
     }
 
     // must be called before reset
@@ -5394,36 +5421,53 @@ atexit_function(void) {
 }
 
 /**
- * Returns a vertex buffer containing only a full-white color.
+ * Returns a vertex buffer containing certain constant values.
  */
 LPDIRECT3DVERTEXBUFFER9 DXGraphicsStateGuardian9::
-get_color_vbuffer() {
-  if (_color_vbuffer != nullptr) {
-    return _color_vbuffer;
-  }
+get_constant_vbuffer(const LColor &color) {
+  LPDIRECT3DVERTEXBUFFER9 vbuffer = _constant_vbuffer;
+
+  // The buffer consists of the following:
+  // 4 bytes of current vertex color
+  // 4 bytes of 0 1 2 3 (used for transform_index)
+  // 12 bytes of the identity matrix (used for instance_matrix)
+  // Last row of identity matrix is (0, 0, 0, 1), so doesn't need to be stored
+  static const size_t size = 4 + 4 + 4 * 3;
+
+  D3DCOLOR cval = LColor_to_D3DCOLOR(color);
 
-  LPDIRECT3DVERTEXBUFFER9 vbuffer;
   HRESULT hr;
-  hr = _screen->_d3d_device->CreateVertexBuffer(sizeof(D3DCOLOR), D3DUSAGE_WRITEONLY, D3DFVF_DIFFUSE, D3DPOOL_DEFAULT, &vbuffer, nullptr);
+  if (vbuffer == nullptr) {
+    hr = _screen->_d3d_device->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY, D3DFVF_DIFFUSE, D3DPOOL_DEFAULT, &vbuffer, nullptr);
 
-  if (FAILED(hr)) {
-    dxgsg9_cat.error()
-      << "CreateVertexBuffer failed" << D3DERRORSTRING(hr);
-    return nullptr;
-  }
+    if (FAILED(hr)) {
+      dxgsg9_cat.error()
+        << "CreateVertexBuffer failed" << D3DERRORSTRING(hr);
+      return nullptr;
+    }
 
-  D3DCOLOR *local_pointer;
-  hr = vbuffer->Lock(0, sizeof(D3DCOLOR), (void **) &local_pointer, D3DLOCK_DISCARD);
-  if (FAILED(hr)) {
-    dxgsg9_cat.error()
-      << "VertexBuffer::Lock failed" << D3DERRORSTRING(hr);
-    return false;
+    _constant_vbuffer = vbuffer;
+    _constant_vbuffer_color = ~cval;
   }
 
-  *local_pointer = D3DCOLOR_ARGB(255, 255, 255, 255);
+  if (cval != _constant_vbuffer_color) {
+    uint32_t *local_pointer;
+    hr = vbuffer->Lock(0, size, (void **)&local_pointer, D3DLOCK_DISCARD);
+    if (FAILED(hr)) {
+      dxgsg9_cat.error()
+        << "VertexBuffer::Lock failed" << D3DERRORSTRING(hr);
+      return false;
+    }
+
+    local_pointer[0] = cval;
+    local_pointer[1] = 0x03020100;
+    local_pointer[2] = 0x000000ff;
+    local_pointer[3] = 0x0000ff00;
+    local_pointer[4] = 0x00ff0000;
+    vbuffer->Unlock();
 
-  vbuffer->Unlock();
-  _color_vbuffer = vbuffer;
+    _constant_vbuffer_color = cval;
+  }
   return vbuffer;
 }
 

+ 4 - 2
panda/src/dxgsg9/dxGraphicsStateGuardian9.h

@@ -165,7 +165,7 @@ public:
   void restore_gamma();
   static void atexit_function(void);
 
-  LPDIRECT3DVERTEXBUFFER9 get_color_vbuffer();
+  LPDIRECT3DVERTEXBUFFER9 get_constant_vbuffer(const LColor &color);
 
 protected:
   void do_issue_transform();
@@ -309,7 +309,9 @@ protected:
 
   DWORD _last_fvf;
   int _num_bound_streams;
-  LPDIRECT3DVERTEXBUFFER9 _color_vbuffer;
+  bool _instancing_enabled;
+  LPDIRECT3DVERTEXBUFFER9 _constant_vbuffer;
+  D3DCOLOR _constant_vbuffer_color;
 
   // Cache the data necessary to bind each particular light each frame, so if
   // we bind a given light multiple times, we only have to compute its data

+ 0 - 8
panda/src/dxgsg9/dxShaderContext9.I

@@ -11,14 +11,6 @@
  * @date 2006-01
  */
 
-/**
- * Returns true if the shader needs vertex colors.
- */
-INLINE bool DXShaderContext9::
-uses_vertex_color() {
-  return _uses_vertex_color;
-}
-
 /**
  * Returns true if the shader is "valid", ie, if the compilation was
  * successful.  The compilation could fail if there is a syntax error in the

+ 99 - 60
panda/src/dxgsg9/dxShaderContext9.cxx

@@ -84,6 +84,7 @@ compile_module(const ShaderModule *module, DWORD *&data) {
   spirv_cross::CompilerHLSL compiler(std::vector<uint32_t>(spv->get_data(), spv->get_data() + spv->get_data_size()));
   spirv_cross::CompilerHLSL::Options options;
   options.shader_model = 30;
+  options.flatten_matrix_vertex_input_semantics = true;
   compiler.set_hlsl_options(options);
 
   // Bind certain known attributes to specific semantics.
@@ -110,14 +111,16 @@ compile_module(const ShaderModule *module, DWORD *&data) {
     }
     else if (spec._name == InternalName::get_color()) {
       compiler.add_vertex_attribute_remap({idx, "COLOR"});
-      _uses_vertex_color = true;
     }
     else {
       // The rest gets mapped to TEXCOORD + location.
-      char buffer[16];
-      sprintf(buffer, "TEXCOORD%d", (int)texcoord_index);
-      compiler.add_vertex_attribute_remap({idx, buffer});
-      ++texcoord_index;
+      for (size_t i = 0; i < spec._elements; ++i) {
+        char buffer[16];
+        sprintf(buffer, "TEXCOORD%d", (int)texcoord_index);
+        compiler.add_vertex_attribute_remap({idx, buffer});
+        ++texcoord_index;
+        ++idx;
+      }
     }
   }
 
@@ -1107,106 +1110,139 @@ get_vertex_declaration(GSG *gsg, const GeomVertexFormat *format, BitMask32 &used
   }
 
   used_streams = 0;
-
-  D3DVERTEXELEMENT9 *elements = (D3DVERTEXELEMENT9 *)
-    alloca(sizeof(D3DVERTEXELEMENT9) * (_shader->_var_spec.size() + 1));
-
   int texcoord_index = 0;
+  size_t const num_arrays = format->get_num_arrays();
+  std::vector<D3DVERTEXELEMENT9> elements;
 
-  size_t i = 0;
   for (const Shader::ShaderVarSpec &spec : _shader->_var_spec) {
-    elements[i].Method = D3DDECLMETHOD_DEFAULT;
-    elements[i].UsageIndex = 0;
-
+    D3DDECLUSAGE usage;
     if (spec._name == InternalName::get_vertex()) {
-      elements[i].Usage = D3DDECLUSAGE_POSITION;
+      usage = D3DDECLUSAGE_POSITION;
     }
     else if (spec._name == InternalName::get_transform_weight()) {
-      elements[i].Usage = D3DDECLUSAGE_BLENDWEIGHT;
+      usage = D3DDECLUSAGE_BLENDWEIGHT;
     }
     else if (spec._name == InternalName::get_transform_index()) {
-      elements[i].Usage = D3DDECLUSAGE_BLENDINDICES;
+      usage = D3DDECLUSAGE_BLENDINDICES;
     }
     else if (spec._name == InternalName::get_normal()) {
-      elements[i].Usage = D3DDECLUSAGE_NORMAL;
+      usage = D3DDECLUSAGE_NORMAL;
     }
     else if (spec._name == InternalName::get_tangent()) {
-      elements[i].Usage = D3DDECLUSAGE_TANGENT;
+      usage = D3DDECLUSAGE_TANGENT;
     }
     else if (spec._name == InternalName::get_binormal()) {
-      elements[i].Usage = D3DDECLUSAGE_BINORMAL;
+      usage = D3DDECLUSAGE_BINORMAL;
     }
     else if (spec._name == InternalName::get_color()) {
-      elements[i].Usage = D3DDECLUSAGE_COLOR;
+      usage = D3DDECLUSAGE_COLOR;
     }
     else {
-      elements[i].Usage = D3DDECLUSAGE_TEXCOORD;
-      elements[i].UsageIndex = texcoord_index++;
+      usage = D3DDECLUSAGE_TEXCOORD;
     }
 
     int array_index;
     const GeomVertexColumn *column;
     if (!format->get_array_info(spec._name, array_index, column)) {
+      // Certain missing ones need to be gotten from the "constant vbuffer",
+      // rather than receive the default value of (0, 0, 0, 1).
+      if (spec._name == InternalName::get_color()) {
+        elements.push_back({
+          (WORD)num_arrays,
+          (WORD)0,
+          (BYTE)D3DDECLTYPE_D3DCOLOR,
+          (BYTE)D3DDECLMETHOD_DEFAULT,
+          (BYTE)D3DDECLUSAGE_COLOR,
+          (BYTE)0,
+        });
+        used_streams.set_bit(num_arrays);
+      }
+      else if (spec._name == InternalName::get_transform_index()) {
+        elements.push_back({
+          (WORD)num_arrays,
+          (WORD)4,
+          (BYTE)D3DDECLTYPE_UBYTE4,
+          (BYTE)D3DDECLMETHOD_DEFAULT,
+          (BYTE)D3DDECLUSAGE_BLENDINDICES,
+          (BYTE)0,
+        });
+        used_streams.set_bit(num_arrays);
+      }
+      else if (spec._name == InternalName::get_instance_matrix()) {
+        // Binding the last row isn't necessary; the default is (0, 0, 0, 1)
+        for (size_t ei = 0; ei < 3 && ei < spec._elements; ++ei) {
+          elements.push_back({
+            (WORD)num_arrays,
+            (WORD)(8 + 4 * ei),
+            (BYTE)D3DDECLTYPE_UBYTE4N,
+            (BYTE)D3DDECLMETHOD_DEFAULT,
+            (BYTE)D3DDECLUSAGE_TEXCOORD,
+            (BYTE)(texcoord_index + ei),
+          });
+        }
+        used_streams.set_bit(num_arrays);
+        texcoord_index += spec._elements;
+      }
+      else if (usage == D3DDECLUSAGE_TEXCOORD) {
+        texcoord_index += spec._elements;
+      }
       continue;
     }
+    used_streams.set_bit(array_index);
 
-    elements[i].Stream = array_index;
-    elements[i].Offset = column->get_start();
-
+    if (column->get_contents() == GeomEnums::C_clip_point &&
+        column->get_name() == InternalName::get_vertex()) {
+      usage = D3DDECLUSAGE_POSITIONT;
+    }
+    size_t offset = column->get_start();
     bool normalized = (column->get_contents() == GeomEnums::C_color);
     int num_components = column->get_num_components();
+
+    D3DDECLTYPE type;
     switch (column->get_numeric_type()) {
     case GeomEnums::NT_uint8:
-      elements[i].Type = normalized ? D3DDECLTYPE_UBYTE4N : D3DDECLTYPE_UBYTE4;
+      type = normalized ? D3DDECLTYPE_UBYTE4N : D3DDECLTYPE_UBYTE4;
       break;
     case GeomEnums::NT_uint16:
-      elements[i].Type = (num_components > 2) ? D3DDECLTYPE_USHORT4N : D3DDECLTYPE_USHORT2N;
+      type = (num_components > 2) ? D3DDECLTYPE_USHORT4N : D3DDECLTYPE_USHORT2N;
       break;
     case GeomEnums::NT_packed_dcba:
-      elements[i].Type = normalized ? D3DDECLTYPE_UBYTE4N : D3DDECLTYPE_UBYTE4;
+      type = normalized ? D3DDECLTYPE_UBYTE4N : D3DDECLTYPE_UBYTE4;
       break;
     case GeomEnums::NT_packed_dabc:
-      elements[i].Type = normalized ? D3DDECLTYPE_D3DCOLOR : D3DDECLTYPE_UBYTE4;
+      type = normalized ? D3DDECLTYPE_D3DCOLOR : D3DDECLTYPE_UBYTE4;
       break;
 #ifndef STDFLOAT_DOUBLE
     case GeomEnums::NT_stdfloat:
 #endif
     case GeomEnums::NT_float32:
-      elements[i].Type = D3DDECLTYPE_FLOAT1 + num_components - 1;
+      type = (D3DDECLTYPE)(D3DDECLTYPE_FLOAT1 + num_components - 1);
       break;
     case GeomEnums::NT_int16:
-      elements[i].Type = (num_components > 2)
+      type = (num_components > 2)
         ? (normalized ? D3DDECLTYPE_SHORT4N : D3DDECLTYPE_SHORT4)
         : (normalized ? D3DDECLTYPE_SHORT2N : D3DDECLTYPE_SHORT2);
       break;
     default:
+      dxgsg9_cat.error()
+        << "Unsupported numeric type " << column->get_numeric_type()
+        << " for vertex column " << *spec._name << "\n";
       continue;
     }
 
-    if (column->get_contents() == GeomEnums::C_clip_point &&
-        column->get_name() == InternalName::get_vertex()) {
-      elements[i].Usage = D3DDECLUSAGE_POSITIONT;
+    for (size_t ei = 0; ei < spec._elements; ++ei) {
+      elements.push_back({
+        (WORD)array_index,
+        (WORD)offset,
+        (BYTE)type,
+        (BYTE)D3DDECLMETHOD_DEFAULT,
+        (BYTE)usage,
+        (BYTE)((usage == D3DDECLUSAGE_TEXCOORD) ? texcoord_index++ : 0),
+      });
+      offset += column->get_element_stride();
     }
-
-    used_streams.set_bit(array_index);
-    ++i;
-  }
-
-  if (format->get_color_array_index() < 0 && _uses_vertex_color) {
-    // This format lacks a vertex color column, so we make room for an extra
-    // stream that contains the vertex color.
-    elements[i].Stream = format->get_num_arrays();
-    elements[i].Offset = 0;
-    elements[i].Type = D3DDECLTYPE_D3DCOLOR;
-    elements[i].Method = D3DDECLMETHOD_DEFAULT;
-    elements[i].Usage = D3DDECLUSAGE_COLOR;
-    elements[i].UsageIndex = 0;
-    used_streams.set_bit(format->get_num_arrays());
-    ++i;
   }
 
-  elements[i] = D3DDECL_END();
-
   // Sort the elements, as D3D seems to require them to be in order.
   struct less_than {
     bool operator () (const D3DVERTEXELEMENT9 &a, const D3DVERTEXELEMENT9 &b) {
@@ -1216,7 +1252,7 @@ get_vertex_declaration(GSG *gsg, const GeomVertexFormat *format, BitMask32 &used
       return a.Offset < b.Offset;
     }
   };
-  std::sort(elements, elements + i, less_than());
+  std::sort(elements.begin(), elements.end(), less_than());
 
   // CreateVertexDeclaration is fickle so it helps to have some good debugging
   // info here.
@@ -1226,20 +1262,23 @@ get_vertex_declaration(GSG *gsg, const GeomVertexFormat *format, BitMask32 &used
     dxgsg9_cat.debug()
       << "Creating vertex declaration for format " << *format << ":\n";
 
-    for (D3DVERTEXELEMENT9 *element = elements; element->Stream != 0xFF; ++element) {
+    for (const D3DVERTEXELEMENT9 &element : elements) {
       dxgsg9_cat.debug()
-        << "  {" << element->Stream << ", " << element->Offset << ", "
-        << "D3DDECLTYPE_" << types[element->Type] << ", "
+        << "  {" << element.Stream << ", " << element.Offset << ", "
+        << "D3DDECLTYPE_" << types[element.Type] << ", "
         << "D3DDECLMETHOD_DEFAULT, "
-        << "D3DDECLUSAGE_" << usages[element->Usage] << ", "
-        << (int)element->UsageIndex << "}\n";
+        << "D3DDECLUSAGE_" << usages[element.Usage] << ", "
+        << (int)element.UsageIndex << "}\n";
     }
   }
 
+  elements.push_back(D3DDECL_END());
+
   LPDIRECT3DVERTEXDECLARATION9 decl;
-  HRESULT result = gsg->_d3d_device->CreateVertexDeclaration(elements, &decl);
+  HRESULT result = gsg->_d3d_device->CreateVertexDeclaration(elements.data(), &decl);
   if (FAILED(result)) {
-    dxgsg9_cat.error() << "CreateVertexDeclaration failed" << D3DERRORSTRING(result);
+    dxgsg9_cat.error()
+      << "CreateVertexDeclaration failed" << D3DERRORSTRING(result);
     return nullptr;
   }
 

+ 0 - 1
panda/src/dxgsg9/dxShaderContext9.h

@@ -65,7 +65,6 @@ private:
     UINT count = 0;
   };
 
-  bool _uses_vertex_color = false;
   int _half_pixel_register = -1;
   pvector<ConstantRegister> _register_map;