Przeglądaj źródła

Merge from 1.9, cleanup DX9/shader, fix built-in shadow mapping in DX9

rdb 9 lat temu
rodzic
commit
bfe7a7b1d3

+ 4 - 0
doc/ReleaseNotes

@@ -21,6 +21,10 @@ This issue fixes several bugs that were still found in 1.9.2.
 * Fix cull issue when rendering cube map (or any multi-lens setup)
 * Fix crash rendering with the same camera to different contexts
 * Fix compile error when making static build with DX9 renderer
+* Fix assertion when using aux render targets in DX9
+* Work around Cg bug generating invalid ASM for saturated tex loads
+* Fix issues with certain Cg shader inputs in DX9
+* Support uint8 index buffers in DX9
 
 ------------------------  RELEASE 1.9.2  ------------------------
 

+ 1 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -235,6 +235,7 @@ GraphicsStateGuardian(CoordinateSystem internal_coordinate_system,
   _supports_basic_shaders = false;
   _supports_geometry_shaders = false;
   _supports_tessellation_shaders = false;
+  _supports_compute_shaders = false;
   _supports_glsl = false;
   _supports_hlsl = false;
 

+ 55 - 28
panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx

@@ -403,10 +403,35 @@ extract_texture_data(Texture *tex) {
  */
 ShaderContext *DXGraphicsStateGuardian9::
 prepare_shader(Shader *se) {
+  PStatTimer timer(_prepare_shader_pcollector);
+
+  switch (se->get_language()) {
+  case Shader::SL_GLSL:
+    dxgsg9_cat.error()
+      << "Tried to load GLSL shader, but GLSL shaders not supported by Direct3D 9.\n";
+    return NULL;
+
+  case Shader::SL_Cg:
 #ifdef HAVE_CG
-  CLP(ShaderContext) *result = new CLP(ShaderContext)(se, this);
-  return result;
+    if (_supports_basic_shaders) {
+      return new DXShaderContext9(se, this);
+    } else {
+      dxgsg9_cat.error()
+        << "Tried to load Cg shader, but basic shaders not supported.\n";
+      return NULL;
+    }
+#else
+    dxgsg9_cat.error()
+      << "Tried to load Cg shader, but Cg support not compiled in.\n";
+    return NULL;
 #endif
+
+  default:
+    dxgsg9_cat.error()
+      << "Tried to load shader with unsupported shader language!\n";
+    return NULL;
+  }
+
   return NULL;
 }
 
@@ -415,7 +440,7 @@ prepare_shader(Shader *se) {
  */
 void DXGraphicsStateGuardian9::
 release_shader(ShaderContext *sc) {
-  CLP(ShaderContext) *gsc = DCAST(CLP(ShaderContext), sc);
+  DXShaderContext9 *gsc = DCAST(DXShaderContext9, sc);
   delete gsc;
 }
 
@@ -429,9 +454,9 @@ release_shader(ShaderContext *sc) {
  * This function should not be called directly to prepare a buffer.  Instead,
  * call Geom::prepare().
  */
-VertexBufferContext *CLP(GraphicsStateGuardian)::
+VertexBufferContext *DXGraphicsStateGuardian9::
 prepare_vertex_buffer(GeomVertexArrayData *data) {
-  CLP(VertexBufferContext) *dvbc = new CLP(VertexBufferContext)(this, _prepared_objects, data);
+  DXVertexBufferContext9 *dvbc = new DXVertexBufferContext9(this, _prepared_objects, data);
 
   DWORD usage;
   D3DPOOL pool;
@@ -458,7 +483,7 @@ prepare_vertex_buffer(GeomVertexArrayData *data) {
 
   if (!FAILED(hr)) {
     #if 0
-    if (dxgsg9_cat.is_debug() && CLP(debug_buffers)) {
+    if (dxgsg9_cat.is_debug() && DXdebug_buffers9) {
       dxgsg9_cat.debug()
         << "creating vertex buffer " << dvbc->_vbuffer << ": "
         << data->get_num_rows() << " vertices "
@@ -481,16 +506,16 @@ prepare_vertex_buffer(GeomVertexArrayData *data) {
  * Updates the vertex buffer with the current data, and makes it the current
  * vertex buffer for rendering.
  */
-bool CLP(GraphicsStateGuardian)::
+bool DXGraphicsStateGuardian9::
 apply_vertex_buffer(VertexBufferContext *vbc,
                     const GeomVertexArrayDataHandle *reader, bool force ) {
 
-  CLP(VertexBufferContext) *dvbc = DCAST(CLP(VertexBufferContext), vbc);
+  DXVertexBufferContext9 *dvbc = DCAST(DXVertexBufferContext9, vbc);
 
   if (dvbc->was_modified(reader)) {
     int num_bytes = reader->get_data_size_bytes();
     #if 0
-    if (dxgsg9_cat.is_debug() && CLP(debug_buffers)) {
+    if (dxgsg9_cat.is_debug() && DXdebug_buffers9) {
       dxgsg9_cat.debug()
         << "copying " << num_bytes
         << " bytes into vertex buffer " << dvbc->_vbuffer << "\n";
@@ -544,13 +569,13 @@ apply_vertex_buffer(VertexBufferContext *vbc,
  * should never be called directly; instead, call Data::release() (or simply
  * let the Data destruct).
  */
-void CLP(GraphicsStateGuardian)::
+void DXGraphicsStateGuardian9::
 release_vertex_buffer(VertexBufferContext *vbc) {
 
-  CLP(VertexBufferContext) *dvbc = DCAST(CLP(VertexBufferContext), vbc);
+  DXVertexBufferContext9 *dvbc = DCAST(DXVertexBufferContext9, vbc);
 
   #if 0
-  if (dxgsg9_cat.is_debug() && CLP(debug_buffers)) {
+  if (dxgsg9_cat.is_debug() && DXdebug_buffers9) {
     dxgsg9_cat.debug()
       << "deleting vertex buffer " << dvbc->_vbuffer << "\n";
   }
@@ -575,8 +600,8 @@ release_vertex_buffer(VertexBufferContext *vbc) {
  * If force is not true, the function may return false indicating the data is
  * not currently available.
  */
-bool CLP(GraphicsStateGuardian)::
-setup_array_data(CLP(VertexBufferContext)*& dvbc,
+bool DXGraphicsStateGuardian9::
+setup_array_data(DXVertexBufferContext9*& dvbc,
                  const GeomVertexArrayDataHandle* array_reader,
                  bool force) {
 
@@ -587,7 +612,7 @@ setup_array_data(CLP(VertexBufferContext)*& dvbc,
     return false;
   }
 
-  dvbc = (CLP(VertexBufferContext)*)vbc;
+  dvbc = (DXVertexBufferContext9*)vbc;
   return true;
 }
 
@@ -1028,17 +1053,17 @@ end_scene() {
   if (_vertex_array_shader_context != 0) {
     _vertex_array_shader_context->disable_shader_vertex_arrays(this);
     _vertex_array_shader = (Shader *)NULL;
-    _vertex_array_shader_context = (CLP(ShaderContext) *)NULL;
+    _vertex_array_shader_context = (DXShaderContext9 *)NULL;
   }
   if (_texture_binding_shader_context != 0) {
     _texture_binding_shader_context->disable_shader_texture_bindings(this);
     _texture_binding_shader = (Shader *)NULL;
-    _texture_binding_shader_context = (CLP(ShaderContext) *)NULL;
+    _texture_binding_shader_context = (DXShaderContext9 *)NULL;
   }
   if (_current_shader_context != 0) {
     _current_shader_context->unbind(this);
     _current_shader = (Shader *)NULL;
-    _current_shader_context = (CLP(ShaderContext) *)NULL;
+    _current_shader_context = (DXShaderContext9 *)NULL;
   }
 
   _dlights.clear();
@@ -1242,7 +1267,7 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
  * pipeline is about to be used - dxShaderContexts are responsible for setting
  * up their own vertex arrays.
  */
-bool CLP(GraphicsStateGuardian)::
+bool DXGraphicsStateGuardian9::
 update_standard_vertex_arrays(bool force) {
 
   int fvf = 0;
@@ -1257,7 +1282,7 @@ update_standard_vertex_arrays(bool force) {
     }
 
     // Get the vertex buffer for this array.
-    CLP(VertexBufferContext)* dvbc;
+    DXVertexBufferContext9* dvbc;
     if (!setup_array_data(dvbc, array_reader, force)) {
       dxgsg9_cat.error() << "Unable to setup vertex buffer for array " << array_index << "\n";
       return false;
@@ -1290,7 +1315,7 @@ update_standard_vertex_arrays(bool force) {
  * so, the standard streams need to be disabled to get them "out of the way."
  * Called only from begin_draw_primitives.
  */
-void CLP(GraphicsStateGuardian)::
+void DXGraphicsStateGuardian9::
 disable_standard_vertex_arrays() {
   for ( int array_index = 0; array_index < _num_bound_streams; ++array_index )
   {
@@ -2383,7 +2408,7 @@ reset() {
   // OVERRIDE SUPPORT SINCE IT DOES NOT WORK WELL
   _screen->_supports_automatic_mipmap_generation = false;
 
-  this -> reset_render_states ( );
+  reset_render_states();
 
   _max_vertices_per_array = d3d_caps.MaxVertexIndex;
   _max_vertices_per_primitive = d3d_caps.MaxPrimitiveCount;
@@ -2646,11 +2671,11 @@ reset() {
   set_render_state(D3DRS_BLENDOP, D3DBLENDOP_ADD);
 
   _current_shader = (Shader *)NULL;
-  _current_shader_context = (CLP(ShaderContext) *)NULL;
+  _current_shader_context = (DXShaderContext9 *)NULL;
   _vertex_array_shader = (Shader *)NULL;
-  _vertex_array_shader_context = (CLP(ShaderContext) *)NULL;
+  _vertex_array_shader_context = (DXShaderContext9 *)NULL;
   _texture_binding_shader = (Shader *)NULL;
-  _texture_binding_shader_context = (CLP(ShaderContext) *)NULL;
+  _texture_binding_shader_context = (DXShaderContext9 *)NULL;
 
   PRINT_REFCNT(dxgsg9, _d3d_device);
 
@@ -2775,13 +2800,13 @@ do_issue_alpha_test() {
 void DXGraphicsStateGuardian9::
 do_issue_shader() {
 
-  CLP(ShaderContext) *context = 0;
+  DXShaderContext9 *context = 0;
   Shader *shader = 0;
   if (_target_shader) {
     shader = (Shader *)(_target_shader->get_shader());
   }
   if (shader) {
-    context = (CLP(ShaderContext) *)(shader->prepare_now(get_prepared_objects(), this));
+    context = (DXShaderContext9 *)(shader->prepare_now(get_prepared_objects(), this));
   }
 
   if (context == 0 || (context && context -> valid (this) == false)) {
@@ -3395,6 +3420,8 @@ bind_light(Spotlight *light_obj, const NodePath &light, int light_id) {
 D3DFORMAT DXGraphicsStateGuardian9::
 get_index_type(Geom::NumericType numeric_type) {
   switch (numeric_type) {
+  // NT_uint8 is automatically promoted to uint16.
+  case Geom::NT_uint8:
   case Geom::NT_uint16:
     return D3DFMT_INDEX16;
 
@@ -4539,7 +4566,7 @@ reset_d3d_device(D3DPRESENT_PARAMETERS *presentation_params,
       }
     }
 
-    this -> mark_new();
+    mark_new();
     hr = _d3d_device->Reset(&_presentation_reset);
     if (FAILED(hr) && hr != D3DERR_DEVICELOST) {
       return hr;

+ 7 - 7
panda/src/dxgsg9/dxGraphicsStateGuardian9.h

@@ -79,7 +79,7 @@ public:
                            bool force);
   virtual void release_vertex_buffer(VertexBufferContext *vbc);
 
-  bool setup_array_data(CLP(VertexBufferContext)*& vbc,
+  bool setup_array_data(DXVertexBufferContext9 *&vbc,
                         const GeomVertexArrayDataHandle* data,
                         bool force);
 
@@ -295,12 +295,12 @@ protected:
   CullFaceAttrib::Mode _cull_face_mode;
   RenderModeAttrib::Mode _current_fill_mode;  //point/wireframe/solid
 
-  PT(Shader)  _current_shader;
-  CLP(ShaderContext)  *_current_shader_context;
-  PT(Shader)  _vertex_array_shader;
-  CLP(ShaderContext)  *_vertex_array_shader_context;
-  PT(Shader)  _texture_binding_shader;
-  CLP(ShaderContext)  *_texture_binding_shader_context;
+  PT(Shader) _current_shader;
+  DXShaderContext9 *_current_shader_context;
+  PT(Shader) _vertex_array_shader;
+  DXShaderContext9 *_vertex_array_shader_context;
+  PT(Shader) _texture_binding_shader;
+  DXShaderContext9 *_texture_binding_shader_context;
 
   const DXIndexBufferContext9 *_active_ibuffer;
 

+ 30 - 23
panda/src/dxgsg9/dxIndexBufferContext9.cxx

@@ -27,9 +27,8 @@ TypeHandle DXIndexBufferContext9::_type_handle;
 DXIndexBufferContext9::
 DXIndexBufferContext9(PreparedGraphicsObjects *pgo, GeomPrimitive *data) :
   IndexBufferContext(pgo, data),
-  _ibuffer(NULL)
-{
-  _managed = -1;
+  _ibuffer(NULL),
+  _managed(-1) {
 }
 
 /**
@@ -37,8 +36,7 @@ DXIndexBufferContext9(PreparedGraphicsObjects *pgo, GeomPrimitive *data) :
  */
 DXIndexBufferContext9::
 ~DXIndexBufferContext9() {
-
-  this -> free_ibuffer ( );
+  free_ibuffer();
 }
 
 /**
@@ -70,13 +68,10 @@ free_ibuffer(void) {
         << "deleting index buffer " << _ibuffer << "\n";
     }
 
-    if (DEBUG_INDEX_BUFFER)
-    {
+    if (DEBUG_INDEX_BUFFER) {
       RELEASE(_ibuffer, dxgsg9, "index buffer", RELEASE_ONCE);
-    }
-    else
-    {
-      _ibuffer -> Release ( );
+    } else {
+      _ibuffer->Release();
     }
 
     _ibuffer = NULL;
@@ -99,6 +94,11 @@ allocate_ibuffer(DXScreenData &scrn,
 
   data_size = reader->get_data_size_bytes();
 
+  if (reader->get_index_type() == GeomEnums::NT_uint8) {
+    // We widen 8-bits indices to 16-bits.
+    data_size *= 2;
+  }
+
   _managed = scrn._managed_index_buffers;
   if (_managed)
   {
@@ -147,16 +147,12 @@ create_ibuffer(DXScreenData &scrn,
   nassertv(reader->get_object() == get_data());
   Thread *current_thread = reader->get_current_thread();
 
-  this -> free_ibuffer ( );
+  free_ibuffer();
 
   PStatTimer timer(GraphicsStateGuardian::_create_index_buffer_pcollector,
                    current_thread);
 
-  int data_size;
-
-  data_size = reader->get_data_size_bytes();
-
-  this -> allocate_ibuffer(scrn, reader);
+  allocate_ibuffer(scrn, reader);
 }
 
 /**
@@ -175,6 +171,11 @@ upload_data(const GeomPrimitivePipelineReader *reader, bool force) {
   }
   int data_size = reader->get_data_size_bytes();
 
+  if (reader->get_index_type() == GeomEnums::NT_uint8) {
+    // We widen 8-bits indices to 16-bits.
+    data_size *= 2;
+  }
+
   if (dxgsg9_cat.is_spam()) {
     dxgsg9_cat.spam()
       << "copying " << data_size
@@ -186,12 +187,9 @@ upload_data(const GeomPrimitivePipelineReader *reader, bool force) {
   HRESULT hr;
   BYTE *local_pointer;
 
-  if (_managed)
-  {
+  if (_managed) {
     hr = _ibuffer->Lock(0, data_size, (void **) &local_pointer, 0);
-  }
-  else
-  {
+  } else {
     hr = _ibuffer->Lock(0, data_size, (void **) &local_pointer, D3DLOCK_DISCARD);
   }
   if (FAILED(hr)) {
@@ -201,7 +199,16 @@ upload_data(const GeomPrimitivePipelineReader *reader, bool force) {
   }
 
   GraphicsStateGuardian::_data_transferred_pcollector.add_level(data_size);
-  memcpy(local_pointer, data_pointer, data_size);
+
+  if (reader->get_index_type() == GeomEnums::NT_uint8) {
+    // Widen to 16-bits, as DirectX doesn't support 8-bits indices.
+    uint16_t *ptr = (uint16_t *)local_pointer;
+    for (size_t i = 0; i < data_size; i += 2) {
+      *ptr++ = (uint16_t)*data_pointer++;
+    }
+  } else {
+    memcpy(local_pointer, data_pointer, data_size);
+  }
 
   _ibuffer->Unlock();
   return true;

+ 1 - 1
panda/src/dxgsg9/dxOcclusionQueryContext9.h

@@ -41,7 +41,7 @@ public:
   }
   static void init_type() {
     OcclusionQueryContext::init_type();
-    register_type(_type_handle, CLASSPREFIX_QUOTED "OcclusionQueryContext",
+    register_type(_type_handle, "DXOcclusionQueryContext9",
                   OcclusionQueryContext::get_class_type());
   }
   virtual TypeHandle get_type() const {

+ 1 - 1
panda/src/dxgsg9/dxShaderContext9.I

@@ -17,7 +17,7 @@
  * shader, or if the current video card isn't shader-capable, or if no shader
  * languages are compiled into panda.
  */
-INLINE bool CLP(ShaderContext)::
+INLINE bool DXShaderContext9::
 valid(GSG *gsg) {
 #ifdef HAVE_CG
   return (_cg_program != 0);

+ 180 - 221
panda/src/dxgsg9/dxShaderContext9.cxx

@@ -28,13 +28,13 @@
 
 #define DEBUG_SHADER 0
 
-TypeHandle CLP(ShaderContext)::_type_handle;
+TypeHandle DXShaderContext9::_type_handle;
 
 /**
  * xyz
  */
-CLP(ShaderContext)::
-CLP(ShaderContext)(Shader *s, GSG *gsg) : ShaderContext(s) {
+DXShaderContext9::
+DXShaderContext9(Shader *s, GSG *gsg) : ShaderContext(s) {
   _vertex_element_array = NULL;
   _vertex_declaration = NULL;
 
@@ -46,7 +46,6 @@ CLP(ShaderContext)(Shader *s, GSG *gsg) : ShaderContext(s) {
   CGcontext context = DCAST(DXGraphicsStateGuardian9, gsg)->_cg_context;
 
   if (s->get_language() == Shader::SL_Cg) {
-
     // Ask the shader to compile itself for us and to give us the resulting Cg
     // program objects.
     if (!s->cg_compile_for(gsg->_shader_caps, context,
@@ -63,7 +62,7 @@ CLP(ShaderContext)(Shader *s, GSG *gsg) : ShaderContext(s) {
     HRESULT hr;
     bool success = true;
     hr = cgD3D9LoadProgram(_cg_program, FALSE, assembly_flags);
-    if (FAILED (hr)) {
+    if (FAILED(hr)) {
       dxgsg9_cat.error()
         << "cgD3D9LoadProgram failed " << D3DERRORSTRING(hr);
 
@@ -80,46 +79,26 @@ CLP(ShaderContext)(Shader *s, GSG *gsg) : ShaderContext(s) {
 /**
  * xyz
  */
-CLP(ShaderContext)::
-~CLP(ShaderContext)() {
+DXShaderContext9::
+~DXShaderContext9() {
   release_resources();
 
-  if ( _vertex_declaration != NULL ) {
+  if (_vertex_declaration != NULL) {
     _vertex_declaration->Release();
     _vertex_declaration = NULL;
   }
 
-  if ( _vertex_element_array != NULL ) {
+  if (_vertex_element_array != NULL) {
     delete _vertex_element_array;
     _vertex_element_array = NULL;
   }
 }
 
-/*
- * int save_file (int size, void *data, char *file_path) { int state; int
- * file_handle; state = false; file_handle = _open (file_path, _O_CREAT |
- * _O_RDWR | _O_TRUNC, _S_IREAD | _S_IWRITE); if (file_handle != -1) { if
- * (_write (file_handle, data, size) == size) { state = true; } _close
- * (file_handle); } return state; } if (dxgsg9_cat.is_debug()) { DEBUG: output
- * the generated program const char *vertex_program; const char
- * *pixel_program; vertex_program = cgGetProgramString (_cg_program[0],
- * CG_COMPILED_PROGRAM); pixel_program = cgGetProgramString (_cg_program[1],
- * CG_COMPILED_PROGRAM); dxgsg9_cat.debug() << vertex_program << "\n";
- * dxgsg9_cat.debug() << pixel_program << "\n"; save the generated program to
- * a file int size; char file_path [512]; char drive[_MAX_DRIVE]; char
- * dir[_MAX_DIR]; char fname[_MAX_FNAME]; char ext[_MAX_EXT]; _splitpath
- * (_name.c_str ( ), drive, dir, fname, ext); size = strlen (vertex_program);
- * sprintf (file_path, "%s.vasm", fname); save_file (size, (void *)
- * vertex_program, file_path); size = strlen (pixel_program); sprintf
- * (file_path, "%s.pasm", fname); save_file (size, (void *) pixel_program,
- * file_path); }
- */
-
 /**
  * Should deallocate all system resources (such as vertex program handles or
  * Cg contexts).
  */
-void CLP(ShaderContext)::
+void DXShaderContext9::
 release_resources() {
 #ifdef HAVE_CG
   if (_cg_program) {
@@ -139,9 +118,8 @@ release_resources() {
  * This function is to be called to enable a new shader.  It also initializes
  * all of the shader's input parameters.
  */
-bool CLP(ShaderContext)::
+bool DXShaderContext9::
 bind(GSG *gsg) {
-
   bool bind_state = false;
 
 #ifdef HAVE_CG
@@ -149,7 +127,7 @@ bind(GSG *gsg) {
     // clear the last cached FVF to make sure the next SetFVF call goes
     // through
 
-    gsg -> _last_fvf = 0;
+    gsg->_last_fvf = 0;
 
     // Pass in k-parameters and transform-parameters
     issue_parameters(gsg, Shader::SSD_general);
@@ -159,7 +137,7 @@ bind(GSG *gsg) {
     // Bind the shaders.
     bind_state = true;
     hr = cgD3D9BindProgram(_cg_program);
-    if (FAILED (hr)) {
+    if (FAILED(hr)) {
       dxgsg9_cat.error() << "cgD3D9BindProgram failed " << D3DERRORSTRING(hr);
 
       CGerror error = cgGetError();
@@ -178,9 +156,8 @@ bind(GSG *gsg) {
 /**
  * This function disables a currently-bound shader.
  */
-void CLP(ShaderContext)::
+void DXShaderContext9::
 unbind(GSG *gsg) {
-
 #ifdef HAVE_CG
   if (_cg_program) {
     HRESULT hr;
@@ -211,48 +188,59 @@ InternalName *global_internal_name_0 = 0;
 InternalName *global_internal_name_1 = 0;
 #endif
 
-void CLP(ShaderContext)::
+void DXShaderContext9::
 issue_parameters(GSG *gsg, int altered) {
 #ifdef HAVE_CG
   if (_cg_program) {
 
-  // Iterate through _ptr parameters
-    for (int i=0; i<(int)_shader->_ptr_spec.size(); i++) {
-      if(altered & (_shader->_ptr_spec[i]._dep[0] | _shader->_ptr_spec[i]._dep[1])){
-#ifdef HAVE_CG
-        const Shader::ShaderPtrSpec& _ptr = _shader->_ptr_spec[i];
-        Shader::ShaderPtrData* _ptr_data =
-          const_cast< Shader::ShaderPtrData*>(gsg->fetch_ptr_parameter(_ptr));
+    // Iterate through _ptr parameters
+    for (size_t i = 0; i < _shader->_ptr_spec.size(); ++i) {
+      const Shader::ShaderPtrSpec &spec = _shader->_ptr_spec[i];
+
+      if (altered & (spec._dep[0] | spec._dep[1])) {
+        const Shader::ShaderPtrData *ptr_data = gsg->fetch_ptr_parameter(spec);
 
-        if (_ptr_data == NULL){ //the input is not contained in ShaderPtrData
+        if (ptr_data == NULL) { //the input is not contained in ShaderPtrData
           release_resources();
           return;
         }
 
-        CGparameter p = _cg_parameter_map[_ptr._id._seqno];
+        // Calculate how many elements to transfer; no more than it expects,
+        // but certainly no more than we have.
+        int input_size = min(abs(spec._dim[0] * spec._dim[1] * spec._dim[2]), ptr_data->_size);
+
+        CGparameter p = _cg_parameter_map[spec._id._seqno];
+        switch (ptr_data->_type) {
+        case Shader::SPT_int:
+          cgSetParameterValueic(p, input_size, (int *)ptr_data->_ptr);
+          break;
+
+        case Shader::SPT_double:
+          cgSetParameterValuedc(p, input_size, (double *)ptr_data->_ptr);
+          break;
 
-        switch(_ptr_data->_type) {
         case Shader::SPT_float:
-          cgD3D9SetUniform(p, (PN_stdfloat*)_ptr_data->_ptr);
+          cgSetParameterValuefc(p, input_size, (float *)ptr_data->_ptr);
           break;
 
         default:
           dxgsg9_cat.error()
-            << _ptr._id._name << ":" << "unrecognized parameter type\n";
+            << spec._id._name << ": unrecognized parameter type\n";
           release_resources();
           return;
         }
       }
-#endif
     }
 
-    for (int i=0; i<(int)_shader->_mat_spec.size(); i++) {
-      if (altered & (_shader->_mat_spec[i]._dep[0] | _shader->_mat_spec[i]._dep[1])) {
-        CGparameter p = _cg_parameter_map[_shader->_mat_spec[i]._id._seqno];
+    for (size_t i = 0; i < _shader->_mat_spec.size(); ++i) {
+      Shader::ShaderMatSpec &spec = _shader->_mat_spec[i];
+
+      if (altered & (spec._dep[0] | spec._dep[1])) {
+        CGparameter p = _cg_parameter_map[spec._id._seqno];
         if (p == NULL) {
           continue;
         }
-        const LMatrix4 *val = gsg->fetch_specified_value(_shader->_mat_spec[i], altered);
+        const LMatrix4 *val = gsg->fetch_specified_value(spec, altered);
         if (val) {
           HRESULT hr;
           PN_stdfloat v [4];
@@ -263,83 +251,79 @@ issue_parameters(GSG *gsg, int altered) {
           const float *data;
           data = temp_matrix.get_data();
 
-          #if DEBUG_SHADER
+#if DEBUG_SHADER
           // DEBUG
-          global_data = (PN_stdfloat *) data;
-          global_shader_mat_spec = &_shader->_mat_spec[i];
-          global_internal_name_0 = global_shader_mat_spec -> _arg [0];
-          global_internal_name_1 = global_shader_mat_spec -> _arg [1];
-          #endif
+          global_data = (PN_stdfloat *)data;
+          global_shader_mat_spec = &spec;
+          global_internal_name_0 = global_shader_mat_spec->_arg[0];
+          global_internal_name_1 = global_shader_mat_spec->_arg[1];
+#endif
 
-          switch (_shader->_mat_spec[i]._piece) {
+          switch (spec._piece) {
           case Shader::SMP_whole:
             // TRANSPOSE REQUIRED
             temp_matrix.transpose_in_place();
             data = temp_matrix.get_data();
 
-            hr = cgD3D9SetUniform (p, data);
+            hr = cgD3D9SetUniform(p, data);
             break;
 
           case Shader::SMP_transpose:
             // NO TRANSPOSE REQUIRED
-            hr = cgD3D9SetUniform (p, data);
+            hr = cgD3D9SetUniform(p, data);
             break;
 
           case Shader::SMP_row0:
-            hr = cgD3D9SetUniform (p, data + 0);
+            hr = cgD3D9SetUniform(p, data + 0);
             break;
           case Shader::SMP_row1:
-            hr = cgD3D9SetUniform (p, data + 4);
+            hr = cgD3D9SetUniform(p, data + 4);
             break;
           case Shader::SMP_row2:
-            hr = cgD3D9SetUniform (p, data + 8);
+            hr = cgD3D9SetUniform(p, data + 8);
             break;
           case Shader::SMP_row3x1:
           case Shader::SMP_row3x2:
           case Shader::SMP_row3x3:
           case Shader::SMP_row3:
-            hr = cgD3D9SetUniform (p, data + 12);
+            hr = cgD3D9SetUniform(p, data + 12);
             break;
 
           case Shader::SMP_col0:
             v[0] = data[0]; v[1] = data[4]; v[2] = data[8]; v[3] = data[12];
-            hr = cgD3D9SetUniform (p, v);
+            hr = cgD3D9SetUniform(p, v);
             break;
           case Shader::SMP_col1:
             v[0] = data[1]; v[1] = data[5]; v[2] = data[9]; v[3] = data[13];
-            hr = cgD3D9SetUniform (p, v);
+            hr = cgD3D9SetUniform(p, v);
             break;
           case Shader::SMP_col2:
             v[0] = data[2]; v[1] = data[6]; v[2] = data[10]; v[3] = data[14];
-            hr = cgD3D9SetUniform (p, v);
+            hr = cgD3D9SetUniform(p, v);
             break;
           case Shader::SMP_col3:
             v[0] = data[3]; v[1] = data[7]; v[2] = data[11]; v[3] = data[15];
-            hr = cgD3D9SetUniform (p, v);
+            hr = cgD3D9SetUniform(p, v);
             break;
 
           default:
             dxgsg9_cat.error()
-              << "issue_parameters ( ) SMP parameter type not implemented " << _shader->_mat_spec[i]._piece << "\n";
+              << "issue_parameters () SMP parameter type not implemented " << spec._piece << "\n";
             break;
           }
 
-          if (FAILED (hr)) {
-
+          if (FAILED(hr)) {
             string name = "unnamed";
 
-            if (_shader->_mat_spec[i]._arg [0]) {
-              name = _shader->_mat_spec[i]._arg [0] -> get_basename ( );
+            if (spec._arg[0]) {
+              name = spec._arg[0]->get_basename();
             }
 
             dxgsg9_cat.error()
-              << "NAME  " << name << "\n"
-              << "MAT TYPE  "
-              << _shader->_mat_spec[i]._piece
-              << " cgD3D9SetUniform failed "
-              << D3DERRORSTRING(hr);
+              << "NAME  " << name << "\n" << "MAT TYPE  " << spec._piece
+              << " cgD3D9SetUniform failed " << D3DERRORSTRING(hr);
 
-            CGerror error = cgGetError ();
+            CGerror error = cgGetError();
             if (error != CG_NO_ERROR) {
               dxgsg9_cat.error() << "  CG ERROR: " << cgGetErrorString(error) << "\n";
             }
@@ -354,13 +338,12 @@ issue_parameters(GSG *gsg, int altered) {
 /**
  * Disable all the vertex arrays used by this shader.
  */
-void CLP(ShaderContext)::
+void DXShaderContext9::
 disable_shader_vertex_arrays(GSG *gsg) {
   LPDIRECT3DDEVICE9 device = gsg->_screen->_d3d_device;
 
-  for ( int array_index = 0; array_index < _num_bound_streams; ++array_index )
-  {
-    device->SetStreamSource( array_index, NULL, 0, 0 );
+  for (int array_index = 0; array_index < _num_bound_streams; ++array_index) {
+    device->SetStreamSource(array_index, NULL, 0, 0);
   }
   _num_bound_streams = 0;
 }
@@ -372,8 +355,8 @@ disable_shader_vertex_arrays(GSG *gsg) {
  * because it may unnecessarily disable arrays then immediately reenable them.
  * We may optimize this someday.
  */
-bool CLP(ShaderContext)::
-update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
+bool DXShaderContext9::
+update_shader_vertex_arrays(DXShaderContext9 *prev, GSG *gsg, bool force) {
   if (prev) prev->disable_shader_vertex_arrays(gsg);
 #ifdef HAVE_CG
   if (!_cg_program) {
@@ -394,7 +377,7 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
 
     // Discard and recreate the VertexElementArray.  This thrashes pretty
     // bad....
-    if ( _vertex_element_array != NULL ) {
+    if (_vertex_element_array != NULL) {
       delete _vertex_element_array;
     }
     _vertex_element_array = new VertexElementArray(nvarying + 2);
@@ -408,17 +391,17 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
     // out only those for a single stream.
 
     int number_of_arrays = gsg->_data_reader->get_num_arrays();
-    for ( int array_index = 0; array_index < number_of_arrays; ++array_index ) {
+    for (int array_index = 0; array_index < number_of_arrays; ++array_index) {
       const GeomVertexArrayDataHandle* array_reader =
-        gsg->_data_reader->get_array_reader( array_index );
-      if ( array_reader == NULL ) {
+        gsg->_data_reader->get_array_reader(array_index);
+      if (array_reader == NULL) {
         dxgsg9_cat.error() << "Unable to get reader for array " << array_index << "\n";
         continue;
       }
 
-      for ( int var_index = 0; var_index < nvarying; ++var_index ) {
+      for (int var_index = 0; var_index < nvarying; ++var_index) {
         CGparameter p = _cg_parameter_map[_shader->_var_spec[var_index]._id._seqno];
-        if ( p == NULL ) {
+        if (p == NULL) {
           dxgsg9_cat.info() <<
             "No parameter in map for parameter " << var_index <<
             " (probably optimized away)\n";
@@ -440,14 +423,12 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
           }
         }
 
-        const GeomVertexArrayDataHandle* param_array_reader;
+        const GeomVertexArrayDataHandle *param_array_reader;
         Geom::NumericType numeric_type;
-        int num_values;
-        int start;
-        int stride;
-        if ( gsg->_data_reader->get_array_info( name,
-                                                param_array_reader, num_values, numeric_type,
-                                                start, stride ) == false ) {
+        int num_values, start, stride;
+        if (!gsg->_data_reader->get_array_info(name, param_array_reader,
+                                               num_values, numeric_type,
+                                               start, stride)) {
           // This is apparently not an error (actually I think it is, just not
           // a fatal one). The GL implementation fails silently in this case,
           // but the net result is that we end up not supplying input for a
@@ -458,105 +439,105 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
         }
 
         // If not associated with the array we're working on, move on.
-        if ( param_array_reader != array_reader ) {
+        if (param_array_reader != array_reader) {
           continue;
         }
 
-        const char* semantic = cgGetParameterSemantic( p );
-        if ( semantic == NULL ) {
+        const char *semantic = cgGetParameterSemantic(p);
+        if (semantic == NULL) {
           dxgsg9_cat.error() << "Unable to retrieve semantic for parameter " << var_index << "\n";
           continue;
         }
 
-        if ( strncmp( semantic, "POSITION", strlen( "POSITION" ) ) == 0 ) {
+        if (strncmp(semantic, "POSITION", strlen("POSITION")) == 0) {
           if (numeric_type == Geom::NT_float32) {
             switch (num_values) {
-              case 3:
-                vertex_element_array->add_position_xyz_vertex_element(array_index, start);
-                break;
-              case 4:
-                vertex_element_array->add_position_xyzw_vertex_element(array_index, start);
-                break;
-              default:
-                dxgsg9_cat.error() << "VE ERROR: invalid number of vertex coordinate elements " << num_values << "\n";
-                break;
+            case 3:
+              vertex_element_array->add_position_xyz_vertex_element(array_index, start);
+              break;
+            case 4:
+              vertex_element_array->add_position_xyzw_vertex_element(array_index, start);
+              break;
+            default:
+              dxgsg9_cat.error() << "VE ERROR: invalid number of vertex coordinate elements " << num_values << "\n";
+              break;
             }
           } else {
             dxgsg9_cat.error() << "VE ERROR: invalid vertex type " << numeric_type << "\n";
           }
-        } else if ( strncmp( semantic, "TEXCOORD", strlen( "TEXCOORD" ) ) == 0 ) {
-          int slot = atoi( semantic + strlen( "TEXCOORD" ) );
+        } else if (strncmp(semantic, "TEXCOORD", strlen("TEXCOORD")) == 0) {
+          int slot = atoi(semantic + strlen("TEXCOORD"));
           if (numeric_type == Geom::NT_float32) {
             switch (num_values) {
-              case 1:
-                vertex_element_array->add_u_vertex_element(array_index, start, slot);
-                break;
-              case 2:
-                vertex_element_array->add_uv_vertex_element(array_index, start, slot);
-                break;
-              case 3:
-                vertex_element_array->add_uvw_vertex_element(array_index, start, slot);
-                break;
-              case 4:
-                vertex_element_array->add_xyzw_vertex_element(array_index, start, slot);
-                break;
-              default:
-                dxgsg9_cat.error() << "VE ERROR: invalid number of vertex texture coordinate elements " << num_values <<  "\n";
-                break;
+            case 1:
+              vertex_element_array->add_u_vertex_element(array_index, start, slot);
+              break;
+            case 2:
+              vertex_element_array->add_uv_vertex_element(array_index, start, slot);
+              break;
+            case 3:
+              vertex_element_array->add_uvw_vertex_element(array_index, start, slot);
+              break;
+            case 4:
+              vertex_element_array->add_xyzw_vertex_element(array_index, start, slot);
+              break;
+            default:
+              dxgsg9_cat.error() << "VE ERROR: invalid number of vertex texture coordinate elements " << num_values <<  "\n";
+              break;
             }
           } else {
             dxgsg9_cat.error() << "VE ERROR: invalid texture coordinate type " << numeric_type << "\n";
           }
-        } else if ( strncmp( semantic, "COLOR", strlen( "COLOR" ) ) == 0 ) {
+        } else if (strncmp(semantic, "COLOR", strlen("COLOR")) == 0) {
           if (numeric_type == Geom::NT_packed_dcba ||
               numeric_type == Geom::NT_packed_dabc ||
               numeric_type == Geom::NT_uint8) {
             switch (num_values) {
-              case 4:
-                vertex_element_array->add_diffuse_color_vertex_element(array_index, start);
-                break;
-              default:
-                dxgsg9_cat.error() << "VE ERROR: invalid color coordinates " << num_values << "\n";
-                break;
+            case 4:
+              vertex_element_array->add_diffuse_color_vertex_element(array_index, start);
+              break;
+            default:
+              dxgsg9_cat.error() << "VE ERROR: invalid color coordinates " << num_values << "\n";
+              break;
             }
           } else {
             dxgsg9_cat.error() << "VE ERROR: invalid color type " << numeric_type << "\n";
           }
-        } else if ( strncmp( semantic, "NORMAL", strlen( "NORMAL" ) ) == 0 ) {
+        } else if (strncmp(semantic, "NORMAL", strlen("NORMAL")) == 0) {
           if (numeric_type == Geom::NT_float32) {
             switch (num_values) {
-              case 3:
-                vertex_element_array->add_normal_vertex_element(array_index, start);
-                break;
-              default:
-                dxgsg9_cat.error() << "VE ERROR: invalid number of normal coordinate elements " << num_values << "\n";
-                break;
+            case 3:
+              vertex_element_array->add_normal_vertex_element(array_index, start);
+              break;
+            default:
+              dxgsg9_cat.error() << "VE ERROR: invalid number of normal coordinate elements " << num_values << "\n";
+              break;
             }
           } else {
             dxgsg9_cat.error() << "VE ERROR: invalid normal type " << numeric_type << "\n";
           }
-        } else if ( strncmp( semantic, "BINORMAL", strlen( "BINORMAL" ) ) == 0 ) {
+        } else if (strncmp(semantic, "BINORMAL", strlen("BINORMAL")) == 0) {
           if (numeric_type == Geom::NT_float32) {
             switch (num_values) {
-              case 3:
-                vertex_element_array->add_binormal_vertex_element(array_index, start);
-                break;
-              default:
-                dxgsg9_cat.error() << "VE ERROR: invalid number of binormal coordinate elements " << num_values << "\n";
-                break;
+            case 3:
+              vertex_element_array->add_binormal_vertex_element(array_index, start);
+              break;
+            default:
+              dxgsg9_cat.error() << "VE ERROR: invalid number of binormal coordinate elements " << num_values << "\n";
+              break;
             }
           } else {
             dxgsg9_cat.error() << "VE ERROR: invalid binormal type " << numeric_type << "\n";
           }
-        } else if ( strncmp( semantic, "TANGENT", strlen( "TANGENT" ) ) == 0 ) {
+        } else if (strncmp(semantic, "TANGENT", strlen("TANGENT")) == 0) {
           if (numeric_type == Geom::NT_float32) {
             switch (num_values) {
-              case 3:
-                vertex_element_array->add_tangent_vertex_element(array_index, start);
-                break;
-              default:
-                dxgsg9_cat.error() << "VE ERROR: invalid number of tangent coordinate elements " << num_values << "\n";
-                break;
+            case 3:
+              vertex_element_array->add_tangent_vertex_element(array_index, start);
+              break;
+            default:
+              dxgsg9_cat.error() << "VE ERROR: invalid number of tangent coordinate elements " << num_values << "\n";
+              break;
             }
           } else {
             dxgsg9_cat.error() << "VE ERROR: invalid tangent type " << numeric_type << "\n";
@@ -567,15 +548,15 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
       }
 
       // Get the vertex buffer for this array.
-      CLP(VertexBufferContext)* dvbc;
+      DXVertexBufferContext9 *dvbc;
       if (!gsg->setup_array_data(dvbc, array_reader, force)) {
         dxgsg9_cat.error() << "Unable to setup vertex buffer for array " << array_index << "\n";
         continue;
       }
 
       // Bind this array as the data source for the corresponding stream.
-      const GeomVertexArrayFormat* array_format = array_reader->get_array_format();
-      hr = device->SetStreamSource( array_index, dvbc->_vbuffer, 0, array_format->get_stride() );
+      const GeomVertexArrayFormat *array_format = array_reader->get_array_format();
+      hr = device->SetStreamSource(array_index, dvbc->_vbuffer, 0, array_format->get_stride());
       if (FAILED(hr)) {
         dxgsg9_cat.error() << "SetStreamSource failed" << D3DERRORSTRING(hr);
       }
@@ -583,8 +564,8 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
 
     _num_bound_streams = number_of_arrays;
 
-    if (( _vertex_element_array != NULL ) &&
-        ( _vertex_element_array->add_end_vertex_element() != false )) {
+    if (_vertex_element_array != NULL &&
+        _vertex_element_array->add_end_vertex_element()) {
       if (dxgsg9_cat.is_debug()) {
         // Note that the currently generated vertex declaration works but
         // never validates.  My theory is that this is due to the shader
@@ -599,17 +580,17 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
       }
 
       // Discard the old VertexDeclaration.  This thrashes pretty bad....
-      if ( _vertex_declaration != NULL ) {
+      if (_vertex_declaration != NULL) {
         _vertex_declaration->Release();
         _vertex_declaration = NULL;
       }
 
-      hr = device->CreateVertexDeclaration( _vertex_element_array->_vertex_element_array,
-                                            &_vertex_declaration );
-      if (FAILED (hr)) {
+      hr = device->CreateVertexDeclaration(_vertex_element_array->_vertex_element_array,
+                                           &_vertex_declaration);
+      if (FAILED(hr)) {
         dxgsg9_cat.error() << "CreateVertexDeclaration failed" << D3DERRORSTRING(hr);
       } else {
-        hr = device->SetVertexDeclaration( _vertex_declaration );
+        hr = device->SetVertexDeclaration(_vertex_declaration);
         if (FAILED(hr)) {
           dxgsg9_cat.error() << "SetVertexDeclaration failed" << D3DERRORSTRING(hr);
         }
@@ -626,12 +607,11 @@ update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg, bool force) {
 /**
  * Disable all the texture bindings used by this shader.
  */
-void CLP(ShaderContext)::
-disable_shader_texture_bindings(GSG *gsg)
-{
+void DXShaderContext9::
+disable_shader_texture_bindings(GSG *gsg) {
 #ifdef HAVE_CG
   if (_cg_program) {
-    for (int i=0; i<(int)_shader->_tex_spec.size(); i++) {
+    for (size_t i = 0; i < _shader->_tex_spec.size(); ++i) {
       CGparameter p = _cg_parameter_map[_shader->_tex_spec[i]._id._seqno];
       if (p == NULL) {
         continue;
@@ -640,12 +620,10 @@ disable_shader_texture_bindings(GSG *gsg)
 
       HRESULT hr;
 
-      hr = gsg -> _d3d_device -> SetTexture (texunit, NULL);
-      if (FAILED (hr)) {
+      hr = gsg->_d3d_device->SetTexture(texunit, NULL);
+      if (FAILED(hr)) {
         dxgsg9_cat.error()
-          << "SetTexture ("
-          << texunit
-          << ", NULL) failed "
+          << "SetTexture(" << texunit << ", NULL) failed "
           << D3DERRORSTRING(hr);
       }
     }
@@ -660,60 +638,44 @@ disable_shader_texture_bindings(GSG *gsg)
  * because it may unnecessarily disable textures then immediately reenable
  * them.  We may optimize this someday.
  */
-void CLP(ShaderContext)::
-update_shader_texture_bindings(CLP(ShaderContext) *prev, GSG *gsg)
-{
-  if (prev) prev->disable_shader_texture_bindings(gsg);
+void DXShaderContext9::
+update_shader_texture_bindings(DXShaderContext9 *prev, GSG *gsg) {
+  if (prev) {
+    prev->disable_shader_texture_bindings(gsg);
+  }
 
 #ifdef HAVE_CG
   if (_cg_program) {
-
-    for (int i=0; i<(int)_shader->_tex_spec.size(); i++) {
-      CGparameter p = _cg_parameter_map[_shader->_tex_spec[i]._id._seqno];
+    for (size_t i = 0; i < _shader->_tex_spec.size(); ++i) {
+      Shader::ShaderTexSpec &spec = _shader->_tex_spec[i];
+      CGparameter p = _cg_parameter_map[spec._id._seqno];
       if (p == NULL) {
         continue;
       }
-      Texture *tex = NULL;
+
       int view = gsg->get_current_tex_view_offset();
-      InternalName *id = _shader->_tex_spec[i]._name;
       SamplerState sampler;
 
-      if (id != NULL) {
-        const ShaderInput *input = gsg->_target_shader->get_shader_input(id);
-        tex = input->get_texture();
-        sampler = input->get_sampler();
-
-      } else {
-        // We get the TextureAttrib directly from the _target_rs, not the
-        // filtered TextureAttrib in _target_texture.
-        const TextureAttrib *texattrib = DCAST(TextureAttrib, gsg->_target_rs->get_attrib_def(TextureAttrib::get_class_slot()));
-        nassertv(texattrib != (TextureAttrib *)NULL);
-
-        if (_shader->_tex_spec[i]._stage >= texattrib->get_num_on_stages()) {
-          continue;
-        }
-        TextureStage *stage = texattrib->get_on_stage(_shader->_tex_spec[i]._stage);
-        tex = texattrib->get_on_texture(stage);
-        sampler = texattrib->get_on_sampler(stage);
-        view += stage->get_tex_view_offset();
+      PT(Texture) tex = gsg->fetch_specified_texture(spec, sampler, view);
+      if (tex.is_null()) {
+        continue;
       }
-      if (_shader->_tex_spec[i]._suffix != 0) {
+
+      if (spec._suffix != 0) {
         // The suffix feature is inefficient.  It is a temporary hack.
-        if (tex == 0) {
-          continue;
-        }
-        tex = tex->load_related(_shader->_tex_spec[i]._suffix);
+        tex = tex->load_related(spec._suffix);
       }
-      if ((tex == 0) || (tex->get_texture_type() != _shader->_tex_spec[i]._desired_type)) {
+
+      if (tex->get_texture_type() != spec._desired_type) {
         continue;
       }
+
       TextureContext *tc = tex->prepare_now(view, gsg->_prepared_objects, gsg);
       if (tc == (TextureContext*)NULL) {
         continue;
       }
 
       int texunit = cgGetParameterResourceIndex(p);
-
       gsg->apply_texture(texunit, tc, sampler);
     }
   }
@@ -721,8 +683,7 @@ update_shader_texture_bindings(CLP(ShaderContext) *prev, GSG *gsg)
 }
 
 // DEBUG CODE TO TEST ASM CODE GENERATED BY Cg
-void assemble_shader_test(char *file_path)
-{
+void assemble_shader_test(char *file_path) {
   int flags;
   D3DXMACRO *defines;
   LPD3DXINCLUDE include;
@@ -735,17 +696,15 @@ void assemble_shader_test(char *file_path)
   shader = 0;
   error_messages = 0;
 
-  D3DXAssembleShaderFromFile (file_path, defines, include, flags, &shader, &error_messages);
-  if (error_messages)
-  {
+  D3DXAssembleShaderFromFile(file_path, defines, include, flags, &shader, &error_messages);
+  if (error_messages) {
     char *error_message;
 
-    error_message = (char *) (error_messages -> GetBufferPointer ( ));
-    if (error_message)
-    {
+    error_message = (char *)error_messages->GetBufferPointer();
+    if (error_message) {
       dxgsg9_cat.error() << error_message;
     }
 
-    error_messages -> Release ( );
+    error_messages->Release();
   }
 }

+ 36 - 23
panda/src/dxgsg9/dxShaderContext9.h

@@ -21,33 +21,46 @@
 #include "shader.h"
 #include "shaderContext.h"
 
-#define CLP(name) DX##name##9
-#define CLASSPREFIX_QUOTED "DX"
-
 class VertexElementArray;
-class CLP(GraphicsStateGuardian);
-
-/*
- * Caution: adding HLSL support is going to be tricky, as the parsing needs to
- * be done in the cull thread, which cannot use the DX API.  - Josh typedef
- * struct { int vertex_shader; int total_constant_descriptions;
- * D3DXCONSTANT_DESC *constant_description_array; } DX_PARAMETER; typedef
- * struct { int state; union { DIRECT_3D_VERTEX_SHADER
- * direct_3d_vertex_shader; DIRECT_3D_PIXEL_SHADER direct_3d_pixel_shader; };
- * LPD3DXCONSTANTTABLE constant_table; D3DXCONSTANTTABLE_DESC
- * constant_table_description; int total_semantics; D3DXSEMANTIC
- * *semantic_array; } DIRECT_3D_SHADER;
- */
+class DXGraphicsStateGuardian9;
+
+// Caution: adding HLSL support is going to be tricky, as the parsing needs to
+// be done in the cull thread, which cannot use the DX API.  - Josh
+//
+//
+// typedef struct
+// {
+//   int vertex_shader;
+//   int total_constant_descriptions;
+//   D3DXCONSTANT_DESC *constant_description_array;
+// }
+// DX_PARAMETER;
+//
+// typedef struct
+// {
+//   int state;
+//   union
+//   {
+//     DIRECT_3D_VERTEX_SHADER direct_3d_vertex_shader;
+//     DIRECT_3D_PIXEL_SHADER direct_3d_pixel_shader;
+//   };
+//   LPD3DXCONSTANTTABLE constant_table;
+//   D3DXCONSTANTTABLE_DESC constant_table_description;
+//
+//   int total_semantics;
+//   D3DXSEMANTIC *semantic_array;
+// }
+// DIRECT_3D_SHADER;
 
 /**
  * xyz
  */
-class EXPCL_PANDADX CLP(ShaderContext) : public ShaderContext {
+class EXPCL_PANDADX DXShaderContext9 : public ShaderContext {
 public:
-  typedef CLP(GraphicsStateGuardian) GSG;
+  typedef DXGraphicsStateGuardian9 GSG;
 
-  CLP(ShaderContext)(Shader *s, GSG *gsg);
-  ~CLP(ShaderContext)();
+  DXShaderContext9(Shader *s, GSG *gsg);
+  ~DXShaderContext9();
 
   INLINE bool valid(GSG *gsg);
   bool bind(GSG *gsg);
@@ -55,10 +68,10 @@ public:
   void issue_parameters(GSG *gsg, int altered);
   void issue_transform(GSG *gsg);
   void disable_shader_vertex_arrays(GSG *gsg);
-  bool update_shader_vertex_arrays(CLP(ShaderContext) *prev, GSG *gsg,
+  bool update_shader_vertex_arrays(DXShaderContext9 *prev, GSG *gsg,
                                    bool force);
   void disable_shader_texture_bindings(GSG *gsg);
-  void update_shader_texture_bindings(CLP(ShaderContext) *prev, GSG *gsg);
+  void update_shader_texture_bindings(DXShaderContext9 *prev, GSG *gsg);
 
   class VertexElementArray* _vertex_element_array;
   LPDIRECT3DVERTEXDECLARATION9 _vertex_declaration;
@@ -83,7 +96,7 @@ public:
   }
   static void init_type() {
     TypedObject::init_type();
-    register_type(_type_handle, CLASSPREFIX_QUOTED "ShaderContext",
+    register_type(_type_handle, "DXShaderContext9",
                   TypedObject::get_class_type());
   }
   virtual TypeHandle get_type() const {

+ 5 - 5
panda/src/dxgsg9/dxVertexBufferContext9.cxx

@@ -26,10 +26,10 @@ TypeHandle DXVertexBufferContext9::_type_handle;
 /**
  *
  */
-CLP(VertexBufferContext)::
-CLP(VertexBufferContext)(CLP(GraphicsStateGuardian) *dxgsg,
-                         PreparedGraphicsObjects *pgo,
-                         GeomVertexArrayData *data) :
+DXVertexBufferContext9::
+DXVertexBufferContext9(DXGraphicsStateGuardian9 *dxgsg,
+                       PreparedGraphicsObjects *pgo,
+                       GeomVertexArrayData *data) :
   VertexBufferContext(pgo, data),
   _vbuffer(NULL)
 {
@@ -170,7 +170,7 @@ CLP(VertexBufferContext)(CLP(GraphicsStateGuardian) *dxgsg,
  * epoch), or requeue itself on the tail of the queue (in which case the
  * eviction will be requested again much later).
  */
-void CLP(VertexBufferContext)::
+void DXVertexBufferContext9::
 evict_lru() {
   dequeue_lru();
 

+ 4 - 4
panda/src/dxgsg9/dxVertexBufferContext9.h

@@ -19,17 +19,17 @@
 #include "vertexBufferContext.h"
 #include "deletedChain.h"
 
-class CLP(GraphicsStateGuardian);
+class DXGraphicsStateGuardian9;
 
 /**
  * Caches a GeomVertexArrayData in the DirectX device as a vertex buffer.
  */
-class EXPCL_PANDADX CLP(VertexBufferContext) : public VertexBufferContext {
+class EXPCL_PANDADX DXVertexBufferContext9 : public VertexBufferContext {
 public:
-  CLP(VertexBufferContext)(CLP(GraphicsStateGuardian) *dxgsg,
+  DXVertexBufferContext9(DXGraphicsStateGuardian9 *dxgsg,
                            PreparedGraphicsObjects *pgo,
                            GeomVertexArrayData *data);
-  ALLOC_DELETED_CHAIN(CLP(VertexBufferContext));
+  ALLOC_DELETED_CHAIN(DXVertexBufferContext9);
 
   virtual void evict_lru();
 

+ 4 - 2
panda/src/dxgsg9/wdxGraphicsBuffer9.cxx

@@ -312,16 +312,18 @@ rebuild_bitplanes() {
         case RTP_aux_float_3:
           {
             CDWriter cdataw(_cycler, cdata, false);
-            nassertr(cdata->_textures.size() == cdataw->_textures.size(), false);
             cdataw->_textures[i]._rtm_mode = RTM_none;
           }
+          // Creating the CDWriter invalidated the CDLockedReader.
+          cdata = CDLockedReader(_cycler);
           break;
         default:
           {
             CDWriter cdataw(_cycler, cdata, false);
-            nassertr(cdata->_textures.size() == cdataw->_textures.size(), false);
             cdataw->_textures[i]._rtm_mode = RTM_copy_texture;
           }
+          // Creating the CDWriter invalidated the CDLockedReader.
+          cdata = CDLockedReader(_cycler);
           break;
         }
       }

+ 141 - 42
panda/src/gobj/shader.cxx

@@ -1765,6 +1765,69 @@ cg_compile_shader(const ShaderCaps &caps, CGcontext context) {
     return false;
   }
 
+  // This is present to work around a bug in the Cg compiler for Direct3D 9.
+  // It generates "texld_sat" instructions that the result in an
+  // D3DXERR_INVALIDDATA error when trying to load the shader, since the _sat
+  // modifier may not be used on tex* instructions.
+  if (_cg_fprofile == CG_PROFILE_PS_2_0 ||
+      _cg_fprofile == CG_PROFILE_PS_2_X ||
+      _cg_fprofile == CG_PROFILE_PS_3_0) {
+    vector_string lines;
+    tokenize(cgGetProgramString(_cg_fprogram, CG_COMPILED_PROGRAM), lines, "\n");
+
+    ostringstream out;
+    int num_modified = 0;
+
+    for (size_t i = 0; i < lines.size(); ++i) {
+      const string &line = lines[i];
+
+      size_t space = line.find(' ');
+      if (space == string::npos) {
+        out << line << '\n';
+        continue;
+      }
+
+      string instr = line.substr(0, space);
+
+      // Look for a texld instruction with _sat modifier.
+      if (instr.compare(0, 5, "texld") == 0 &&
+          instr.compare(instr.size() - 4, 4, "_sat") == 0) {
+        // Which destination register are we operating on?
+        string reg = line.substr(space + 1, line.find(',', space) - space - 1);
+
+        // Move the saturation operation to a separate instruction.
+        instr.resize(instr.size() - 4);
+        out << instr << ' ' << line.substr(space + 1) << '\n';
+        out << "mov_sat " << reg << ", " << reg << '\n';
+        ++num_modified;
+      } else {
+        out << line << '\n';
+      }
+    }
+
+    if (num_modified > 0) {
+      string result = out.str();
+      CGprogram new_program;
+      new_program = cgCreateProgram(context, CG_OBJECT, result.c_str(),
+                                    (CGprofile)_cg_fprofile, "fshader",
+                                    (const char**)NULL);
+      if (new_program) {
+        cgDestroyProgram(_cg_fprogram);
+        _cg_fprogram = new_program;
+
+        if (shader_cat.is_debug()) {
+          shader_cat.debug()
+            << "Replaced " << num_modified << " invalid texld_sat instruction"
+            << ((num_modified == 1) ? "" : "s") << " in compiled shader\n";
+        }
+      } else {
+        shader_cat.warning()
+          << "Failed to load shader with fixed texld_sat instructions: "
+          << cgGetErrorString(cgGetError()) << "\n";
+      }
+    }
+  }
+
   // DEBUG: output the generated program
   if (shader_cat.is_debug()) {
     const char *vertex_program;
@@ -1881,59 +1944,95 @@ cg_analyze_shader(const ShaderCaps &caps) {
   // Assign sequence numbers to all parameters.  GLCgShaderContext relies on
   // the fact that the varyings start at seqno 0.
   int seqno = 0;
-  for (int i=0; i<(int)_var_spec.size(); i++) {
+  for (size_t i = 0; i < _var_spec.size(); ++i) {
     _var_spec[i]._id._seqno = seqno++;
   }
-  for (int i=0; i<(int)_mat_spec.size(); i++) {
+  for (size_t i = 0; i < _mat_spec.size(); ++i) {
     _mat_spec[i]._id._seqno = seqno++;
   }
-  for (int i=0; i<(int)_tex_spec.size(); i++) {
+  for (size_t i = 0; i < _tex_spec.size(); ++i) {
     _tex_spec[i]._id._seqno = seqno++;
   }
 
-  for (int i=0; i<(int)_ptr_spec.size(); i++) {
+  for (size_t i = 0; i < _ptr_spec.size(); ++i) {
     _ptr_spec[i]._id._seqno = seqno++;
     _ptr_spec[i]._info._id = _ptr_spec[i]._id;
   }
 
-/*
- * The following code is present to work around a bug in the Cg compiler.  It
- * does not generate correct code for shadow map lookups when using arbfp1.
- * This is a particularly onerous limitation, given that arbfp1 is the only Cg
- * target that works on radeons.  I suspect this is an intentional omission on
- * nvidia's part.  The following code fetches the output listing, detects the
- * error, repairs the code, and resumbits the repaired code to Cg.  if
- * ((_cg_fprofile == CG_PROFILE_ARBFP1) && (gsghint->_supports_shadow_filter))
- * { bool shadowunit[32]; bool anyshadow = false; memset(shadowunit, 0,
- * sizeof(shadowunit)); vector_string lines;
- * tokenize(cgGetProgramString(_cg_program[SHADER_type_frag],
- * CG_COMPILED_PROGRAM), lines, "\n"); figure out which texture units contain
- * shadow maps.  for (int lineno=0; lineno<(int)lines.size(); lineno++) { if
- * (lines[lineno].compare(0,21,"#var sampler2DSHADOW ")) { continue; }
- * vector_string fields; tokenize(lines[lineno], fields, ":"); if
- * (fields.size()!=5) { continue; } vector_string words;
- * tokenize(trim(fields[2]), words, " "); if (words.size()!=2) { continue; }
- * int unit = atoi(words[1].c_str()); if ((unit < 0)||(unit >= 32)) {
- * continue; } anyshadow = true; shadowunit[unit] = true; } modify all TEX
- * statements that use the relevant texture units.  if (anyshadow) { for (int
- * lineno=0; lineno<(int)lines.size(); lineno++) { if
- * (lines[lineno].compare(0,4,"TEX ")) { continue; } vector_string fields;
- * tokenize(lines[lineno], fields, ","); if
- * ((fields.size()!=4)||(trim(fields[3]) != "2D;")) { continue; }
- * vector_string texunitf; tokenize(trim(fields[2]), texunitf, "[]"); if
- * ((texunitf.size()!=3)||(texunitf[0] != "texture")||(texunitf[2]!="")) {
- * continue; } int unit = atoi(texunitf[1].c_str()); if ((unit < 0) || (unit
- * >= 32) || (shadowunit[unit]==false)) { continue; } lines[lineno] =
- * fields[0]+","+fields[1]+","+fields[2]+", SHADOW2D;"; } string result =
- * "!!ARBfp1.0\nOPTION ARB_fragment_program_shadow;\n"; for (int lineno=1;
- * lineno<(int)lines.size(); lineno++) { result += (lines[lineno] + "\n"); }
- * _cg_program[2] = _cg_program[SHADER_type_frag];
- * _cg_program[SHADER_type_frag] = cgCreateProgram(_cg_context, CG_OBJECT,
- * result.c_str(), _cg_profile[SHADER_type_frag], "fshader", (const
- * char**)NULL); cg_report_errors(s->get_name(), _cg_context); if
- * (_cg_program[SHADER_type_frag]==0) { release_resources(); return false; } }
- * }
- */
+  /*
+  // The following code is present to work around a bug in the Cg compiler.
+  // It does not generate correct code for shadow map lookups when using arbfp1.
+  // This is a particularly onerous limitation, given that arbfp1 is the only
+  // Cg target that works on radeons.  I suspect this is an intentional
+  // omission on nvidia's part.  The following code fetches the output listing,
+  // detects the error, repairs the code, and resumbits the repaired code to Cg.
+  if ((_cg_fprofile == CG_PROFILE_ARBFP1) && (gsghint->_supports_shadow_filter)) {
+    bool shadowunit[32];
+    bool anyshadow = false;
+    memset(shadowunit, 0, sizeof(shadowunit));
+    vector_string lines;
+    tokenize(cgGetProgramString(_cg_program[SHADER_type_frag],
+                                CG_COMPILED_PROGRAM), lines, "\n");
+    // figure out which texture units contain shadow maps.
+    for (int lineno=0; lineno<(int)lines.size(); lineno++) {
+      if (lines[lineno].compare(0,21,"#var sampler2DSHADOW ")) {
+        continue;
+      }
+      vector_string fields;
+      tokenize(lines[lineno], fields, ":");
+      if (fields.size()!=5) {
+        continue;
+      }
+      vector_string words;
+      tokenize(trim(fields[2]), words, " ");
+      if (words.size()!=2) {
+        continue;
+      }
+      int unit = atoi(words[1].c_str());
+      if ((unit < 0)||(unit >= 32)) {
+        continue;
+      }
+      anyshadow = true;
+      shadowunit[unit] = true;
+    }
+    // modify all TEX statements that use the relevant texture units.
+    if (anyshadow) {
+      for (int lineno=0; lineno<(int)lines.size(); lineno++) {
+        if (lines[lineno].compare(0,4,"TEX ")) {
+          continue;
+        }
+        vector_string fields;
+        tokenize(lines[lineno], fields, ",");
+        if ((fields.size()!=4)||(trim(fields[3]) != "2D;")) {
+          continue;
+        }
+        vector_string texunitf;
+        tokenize(trim(fields[2]), texunitf, "[]");
+        if ((texunitf.size()!=3)||(texunitf[0] != "texture")||(texunitf[2]!="")) {
+          continue;
+        }
+        int unit = atoi(texunitf[1].c_str());
+        if ((unit < 0) || (unit >= 32) || (shadowunit[unit]==false)) {
+          continue;
+        }
+        lines[lineno] = fields[0]+","+fields[1]+","+fields[2]+", SHADOW2D;";
+      }
+      string result = "!!ARBfp1.0\nOPTION ARB_fragment_program_shadow;\n";
+      for (int lineno=1; lineno<(int)lines.size(); lineno++) {
+        result += (lines[lineno] + "\n");
+      }
+      _cg_program[2] = _cg_program[SHADER_type_frag];
+      _cg_program[SHADER_type_frag] =
+        cgCreateProgram(_cg_context, CG_OBJECT, result.c_str(),
+                        _cg_profile[SHADER_type_frag], "fshader", (const char**)NULL);
+      cg_report_errors(s->get_name(), _cg_context);
+      if (_cg_program[SHADER_type_frag]==0) {
+        release_resources();
+        return false;
+      }
+    }
+  }
+  */
 
   cg_release_resources();
   return true;

+ 9 - 3
panda/src/pgraph/shaderInput.cxx

@@ -100,9 +100,15 @@ get_texture() const {
  */
 const SamplerState &ShaderInput::
 get_sampler() const {
-  return (_type == M_texture_sampler)
-    ? DCAST(ParamTextureSampler, _value)->get_sampler()
-    : get_texture()->get_default_sampler();
+  if (_type == M_texture_sampler) {
+    return DCAST(ParamTextureSampler, _value)->get_sampler();
+
+  } else if (!_value.is_null()) {
+    return get_texture()->get_default_sampler();
+
+  } else {
+    return SamplerState::get_default();
+  }
 }
 
 /**