Browse Source

Improvements for ShaderGenerator-based programs:
* Disable state munger, which isn't needed
* Reflect updates to TextureStage properties (Fixes #178)
* Also respect combine mode and rgb scale changes
* Allow rehashing and clearing generated shader cache

State munger is now disabled whenever any shader is applied. Panda assumes that any custom shader will apply color scale by itself.

rdb 8 years ago
parent
commit
410049fd4b

+ 50 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -3039,6 +3039,19 @@ determine_target_texture() {
   nassertv(_target_texture->get_num_on_stages() <= max_texture_stages);
 }
 
+/**
+ * Assigns _target_shader based on the _target_rs.
+ */
+void GraphicsStateGuardian::
+determine_target_shader() {
+  if (_target_rs->_generated_shader != nullptr) {
+    _target_shader = (const ShaderAttrib *)_target_rs->_generated_shader.p();
+  } else {
+    _target_shader = (const ShaderAttrib *)
+      _target_rs->get_attrib_def(ShaderAttrib::get_class_slot());
+  }
+}
+
 /**
  * Frees some memory that was explicitly allocated within the glgsg.
  */
@@ -3382,6 +3395,43 @@ make_shadow_buffer(const NodePath &light_np, GraphicsOutputBase *host) {
   return tex;
 }
 
+/**
+ * Ensures that an appropriate shader has been generated for the given state.
+ * This is stored in the _generated_shader field on the RenderState.
+ */
+void GraphicsStateGuardian::
+ensure_generated_shader(const RenderState *state) {
+#ifdef HAVE_CG
+  const ShaderAttrib *shader_attrib;
+  state->get_attrib_def(shader_attrib);
+
+  if (shader_attrib->auto_shader()) {
+    if (_shader_generator == nullptr) {
+      if (!_supports_basic_shaders) {
+        return;
+      }
+      _shader_generator = new ShaderGenerator(this);
+    }
+    if (state->_generated_shader == nullptr ||
+        state->_generated_shader_seq != _generated_shader_seq) {
+      GeomVertexAnimationSpec spec;
+
+      // Currently we overload this flag to request vertex animation for the
+      // shader generator.
+      const ShaderAttrib *sattr;
+      state->get_attrib_def(sattr);
+      if (sattr->get_flag(ShaderAttrib::F_hardware_skinning)) {
+        spec.set_hardware(4, true);
+      }
+
+      // Cache the generated ShaderAttrib on the shader state.
+      state->_generated_shader = _shader_generator->synthesize_shader(state, spec);
+      state->_generated_shader_seq = _generated_shader_seq;
+    }
+  }
+#endif
+}
+
 /**
  * Returns true if the GSG implements the extension identified by the given
  * string.  This currently is only implemented by the OpenGL back-end.

+ 3 - 0
panda/src/display/graphicsStateGuardian.h

@@ -426,6 +426,8 @@ public:
   PT(Texture) get_dummy_shadow_map(Texture::TextureType texture_type) const;
   PT(Texture) make_shadow_buffer(const NodePath &light_np, GraphicsOutputBase *host);
 
+  virtual void ensure_generated_shader(const RenderState *state);
+
 #ifdef DO_PSTATS
   static void init_frame_pstats();
 #endif
@@ -446,6 +448,7 @@ protected:
   virtual void end_bind_clip_planes();
 
   void determine_target_texture();
+  void determine_target_shader();
 
   virtual void free_pointers();
   virtual void close_gsg();

+ 13 - 38
panda/src/display/standardMunger.cxx

@@ -38,7 +38,17 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state,
   _auto_shader(false),
   _shader_skinning(false)
 {
-  if (!get_gsg()->get_runtime_color_scale()) {
+  const ShaderAttrib *shader_attrib;
+  state->get_attrib_def(shader_attrib);
+#ifdef HAVE_CG
+  _auto_shader = shader_attrib->auto_shader();
+#endif
+  if (shader_attrib->get_flag(ShaderAttrib::F_hardware_skinning)) {
+    _shader_skinning = true;
+  }
+
+  if (!get_gsg()->get_runtime_color_scale() && !_auto_shader &&
+      shader_attrib->get_shader() == nullptr) {
     // We might need to munge the colors.
     const ColorAttrib *color_attrib;
     const ColorScaleAttrib *color_scale_attrib;
@@ -60,6 +70,7 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state,
                      _color[3] * cs[3]);
         }
         _munge_color = true;
+        _should_munge_state = true;
       }
 
     } else if (state->get_attrib(color_scale_attrib) &&
@@ -74,6 +85,7 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state,
       if ((color_scale_attrib->has_rgb_scale() && !get_gsg()->get_color_scale_via_lighting()) ||
           (color_scale_attrib->has_alpha_scale() && !get_gsg()->get_alpha_scale_via_texture(tex_attrib))) {
         _munge_color_scale = true;
+        _should_munge_state = true;
       }
 
       // Known bug: if there is a material on an object that would obscure the
@@ -82,15 +94,6 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state,
       // effort to detect this contrived situation and handle it correctly.
     }
   }
-
-  const ShaderAttrib *shader_attrib = (const ShaderAttrib *)
-    state->get_attrib_def(ShaderAttrib::get_class_slot());
-  if (shader_attrib->auto_shader()) {
-    _auto_shader = true;
-  }
-  if (shader_attrib->get_flag(ShaderAttrib::F_hardware_skinning)) {
-    _shader_skinning = true;
-  }
 }
 
 /**
@@ -341,33 +344,5 @@ munge_state_impl(const RenderState *state) {
     munged_state = munged_state->remove_attrib(ColorScaleAttrib::get_class_slot());
   }
 
-#ifdef HAVE_CG
-  if (_auto_shader) {
-    GraphicsStateGuardian *gsg = get_gsg();
-    ShaderGenerator *shader_generator = gsg->get_shader_generator();
-    if (shader_generator == nullptr) {
-      shader_generator = new ShaderGenerator(gsg);
-      gsg->set_shader_generator(shader_generator);
-    }
-    if (munged_state->_generated_shader == nullptr) {
-      // Cache the generated ShaderAttrib on the shader state.
-      GeomVertexAnimationSpec spec;
-
-      // Currently we overload this flag to request vertex animation for the
-      // shader generator.
-      const ShaderAttrib *sattr;
-      munged_state->get_attrib_def(sattr);
-      if (sattr->get_flag(ShaderAttrib::F_hardware_skinning)) {
-        spec.set_hardware(4, true);
-      }
-
-      munged_state->_generated_shader = shader_generator->synthesize_shader(munged_state, spec);
-    }
-    if (munged_state->_generated_shader != nullptr) {
-      munged_state = munged_state->set_attrib(munged_state->_generated_shader);
-    }
-  }
-#endif
-
   return munged_state;
 }

+ 1 - 1
panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx

@@ -3074,7 +3074,7 @@ set_state_and_transform(const RenderState *target,
   }
   _target_rs = target;
 
-  _target_shader = DCAST(ShaderAttrib, _target_rs->get_attrib_def(ShaderAttrib::get_class_slot()));
+  determine_target_shader();
 
   int alpha_test_slot = AlphaTestAttrib::get_class_slot();
   if (_target_rs->get_attrib(alpha_test_slot) != _state_rs->get_attrib(alpha_test_slot) ||

+ 6 - 3
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -174,7 +174,7 @@ static const string default_vshader =
   "void main(void) {\n"
   "  gl_Position = p3d_ModelViewProjectionMatrix * p3d_Vertex;\n"
   "  texcoord = p3d_MultiTexCoord0;\n"
-  "  color = p3d_Color;\n"
+  "  color = p3d_Color * p3d_ColorScale;\n"
   "}\n";
 
 static const string default_fshader =
@@ -3127,6 +3127,10 @@ reset() {
   }
 #endif
 
+  // Do we guarantee that we can apply the color scale via a shader?  We set
+  // this false if there is a chance that the fixed-function pipeline is used.
+  _runtime_color_scale = !has_fixed_function_pipeline();
+
 #ifndef OPENGLES
   if (_gl_shadlang_ver_major >= 4 || has_extension("GL_NV_gpu_program5")) {
     // gp5fp - OpenGL fragment profile for GeForce 400 Series and up
@@ -10341,8 +10345,7 @@ set_state_and_transform(const RenderState *target,
   _target_rs = target;
 
 #ifndef OPENGLES_1
-  _target_shader = (const ShaderAttrib *)
-    _target_rs->get_attrib_def(ShaderAttrib::get_class_slot());
+  determine_target_shader();
   _instance_count = _target_shader->get_instance_count();
 
   if (_target_shader != _state_shader) {

+ 60 - 13
panda/src/gobj/textureStage.I

@@ -15,7 +15,7 @@
  * Initialize the texture stage from other
  */
 INLINE TextureStage::
-TextureStage(TextureStage &copy) {
+TextureStage(const TextureStage &copy) {
   (*this) = copy;
 }
 
@@ -52,6 +52,10 @@ set_sort(int sort) {
   // Update the global flag to indicate that all TextureAttribs in the world
   // must now re-sort their lists.
   _sort_seq++;
+
+  if (_used_by_auto_shader) {
+    GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+  }
 }
 
 /**
@@ -80,6 +84,10 @@ set_priority(int priority) {
   // Update the global flag to indicate that all TextureAttribs in the world
   // must now re-sort their lists.
   _sort_seq++;
+
+  if (_used_by_auto_shader) {
+    GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+  }
 }
 
 /**
@@ -99,7 +107,13 @@ get_priority() const {
  */
 INLINE void TextureStage::
 set_texcoord_name(InternalName *name) {
-  _texcoord_name = name;
+  if (name != _texcoord_name) {
+    _texcoord_name = name;
+
+    if (_used_by_auto_shader) {
+      GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+    }
+  }
 }
 
 /**
@@ -108,7 +122,7 @@ set_texcoord_name(InternalName *name) {
  */
 INLINE void TextureStage::
 set_texcoord_name(const string &name) {
-  _texcoord_name = InternalName::get_texcoord_name(name);
+  set_texcoord_name(InternalName::get_texcoord_name(name));
 }
 
 /**
@@ -150,13 +164,16 @@ get_binormal_name() const {
  */
 INLINE void TextureStage::
 set_mode(TextureStage::Mode mode) {
-  _mode = mode;
+  if (mode != _mode) {
+    _mode = mode;
 
-  if (_mode != M_combine) {
-    _num_combine_rgb_operands = 0;
-    _num_combine_alpha_operands = 0;
+    if (_mode != M_combine) {
+      _num_combine_rgb_operands = 0;
+      _num_combine_alpha_operands = 0;
+    }
+
+    update_color_flags();
   }
-  update_color_flags();
 }
 
 /**
@@ -202,8 +219,14 @@ get_color() const {
  */
 INLINE void TextureStage::
 set_rgb_scale(int rgb_scale) {
-  nassertv(rgb_scale == 1 || rgb_scale == 2 || rgb_scale == 4);
-  _rgb_scale = rgb_scale;
+  if (rgb_scale != _rgb_scale) {
+    nassertv(rgb_scale == 1 || rgb_scale == 2 || rgb_scale == 4);
+    _rgb_scale = rgb_scale;
+
+    if (_used_by_auto_shader) {
+      GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+    }
+  }
 }
 
 /**
@@ -222,8 +245,14 @@ get_rgb_scale() const {
  */
 INLINE void TextureStage::
 set_alpha_scale(int alpha_scale) {
-  nassertv(alpha_scale == 1 || alpha_scale == 2 || alpha_scale == 4);
-  _alpha_scale = alpha_scale;
+  if (alpha_scale != _alpha_scale) {
+    nassertv(alpha_scale == 1 || alpha_scale == 2 || alpha_scale == 4);
+    _alpha_scale = alpha_scale;
+
+    if (_used_by_auto_shader) {
+      GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+    }
+  }
 }
 
 /**
@@ -247,7 +276,13 @@ get_alpha_scale() const {
  */
 INLINE void TextureStage::
 set_saved_result(bool saved_result) {
-  _saved_result = saved_result;
+  if (saved_result != _saved_result) {
+    _saved_result = saved_result;
+
+    if (_used_by_auto_shader) {
+      GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+    }
+  }
 }
 
 /**
@@ -641,6 +676,14 @@ get_sort_seq() {
   return _sort_seq;
 }
 
+/**
+ * Marks this TextureStage as having been used by the auto shader.
+ */
+INLINE void TextureStage::
+mark_used_by_auto_shader() const {
+  _used_by_auto_shader = true;
+}
+
 /**
  * Updates _uses_color, _involves_color_scale, _uses_primary_color and
  * _uses_last_saved_result appropriately.
@@ -684,6 +727,10 @@ update_color_flags() {
        _combine_alpha_source0 == CS_last_saved_result ||
        _combine_alpha_source1 == CS_last_saved_result ||
        _combine_alpha_source2 == CS_last_saved_result));
+
+  if (_used_by_auto_shader) {
+    GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+  }
 }
 
 INLINE ostream &

+ 3 - 1
panda/src/gobj/textureStage.cxx

@@ -25,7 +25,7 @@ TypeHandle TextureStage::_type_handle;
  * Initialize the texture stage at construction
  */
 TextureStage::
-TextureStage(const string &name) {
+TextureStage(const string &name) : _used_by_auto_shader(false) {
   _name = name;
   _sort = 0;
   _priority = 0;
@@ -90,6 +90,8 @@ operator = (const TextureStage &other) {
 
   _uses_color = other._uses_color;
   _involves_color_scale = other._involves_color_scale;
+
+  _used_by_auto_shader = false;
 }
 
 /**

+ 6 - 1
panda/src/gobj/textureStage.h

@@ -21,6 +21,7 @@
 #include "typedWritableReferenceCount.h"
 #include "updateSeq.h"
 #include "luse.h"
+#include "graphicsStateGuardianBase.h"
 
 class FactoryParams;
 
@@ -34,7 +35,7 @@ class FactoryParams;
 class EXPCL_PANDA_GOBJ TextureStage : public TypedWritableReferenceCount {
 PUBLISHED:
   explicit TextureStage(const string &name);
-  INLINE TextureStage(TextureStage &copy);
+  INLINE TextureStage(const TextureStage &copy);
   void operator = (const TextureStage &copy);
 
   virtual ~TextureStage();
@@ -206,6 +207,8 @@ PUBLISHED:
 public:
   INLINE static UpdateSeq get_sort_seq();
 
+  INLINE void mark_used_by_auto_shader() const;
+
 private:
   INLINE void update_color_flags();
 
@@ -249,6 +252,8 @@ private:
   static PT(TextureStage) _default_stage;
   static UpdateSeq _sort_seq;
 
+  mutable bool _used_by_auto_shader;
+
 public:
   // Datagram stuff
   static void register_with_read_factory();

+ 1 - 0
panda/src/gsgbase/graphicsStateGuardianBase.cxx

@@ -16,6 +16,7 @@
 #include <algorithm>
 
 AtomicAdjust::Pointer GraphicsStateGuardianBase::_gsg_list;
+UpdateSeq GraphicsStateGuardianBase::_generated_shader_seq;
 TypeHandle GraphicsStateGuardianBase::_type_handle;
 
 /**

+ 11 - 0
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -223,6 +223,14 @@ public:
   virtual void bind_light(Spotlight *light_obj, const NodePath &light,
                           int light_id) { }
 
+  virtual void ensure_generated_shader(const RenderState *state)=0;
+
+  static void mark_rehash_generated_shaders() {
+#ifdef HAVE_CG
+    ++_generated_shader_seq;
+#endif
+  }
+
 PUBLISHED:
   static GraphicsStateGuardianBase *get_default_gsg();
   static void set_default_gsg(GraphicsStateGuardianBase *default_gsg);
@@ -247,6 +255,9 @@ private:
   };
   static AtomicAdjust::Pointer _gsg_list;
 
+protected:
+  static UpdateSeq _generated_shader_seq;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;

+ 1 - 1
panda/src/pgraph/config_pgraph.cxx

@@ -227,7 +227,7 @@ ConfigVariableBool uniquify_states
           "are pointerwise equal.  This may improve caching performance, "
           "but also adds additional overhead to maintain the cache, "
           "including the need to check for a composition cycle in "
-          "the cache."));
+          "the cache.  It is highly recommended to keep this on."));
 
 ConfigVariableBool uniquify_attribs
 ("uniquify-attribs", true,

+ 8 - 3
panda/src/pgraph/cullableObject.cxx

@@ -142,10 +142,15 @@ munge_geom(GraphicsStateGuardianBase *gsg, GeomMunger *munger,
           DCAST(ShaderAttrib, ShaderAttrib::make())->set_flag(ShaderAttrib::F_hardware_skinning, true));
         _state = _state->compose(state);
       }
-    }
 
-    StateMunger *state_munger = (StateMunger *)munger;
-    _state = state_munger->munge_state(_state);
+      gsg->ensure_generated_shader(_state);
+    } else {
+      // We may need to munge the state for the fixed-function pipeline.
+      StateMunger *state_munger = (StateMunger *)munger;
+      if (state_munger->should_munge_state()) {
+        _state = state_munger->munge_state(_state);
+      }
+    }
 
     // If there is any animation left in the vertex data after it has been
     // munged--that is, we couldn't arrange to handle the animation in

+ 2 - 0
panda/src/pgraph/renderState.h

@@ -219,6 +219,7 @@ public:
   // declare this as a ShaderAttrib because that would create a circular
   // include-file dependency problem.  Aaargh.
   mutable CPT(RenderAttrib) _generated_shader;
+  mutable UpdateSeq _generated_shader_seq;
 
 private:
   // This mutex protects _states.  It also protects any modification to the
@@ -363,6 +364,7 @@ private:
   friend class GraphicsStateGuardian;
   friend class RenderAttribRegistry;
   friend class Extension<RenderState>;
+  friend class ShaderGenerator;
   friend class StateMunger;
 };
 

+ 10 - 1
panda/src/pgraph/stateMunger.I

@@ -16,6 +16,15 @@
  */
 INLINE StateMunger::
 StateMunger(GraphicsStateGuardianBase *gsg) :
-  GeomMunger(gsg)
+  GeomMunger(gsg),
+  _should_munge_state(false)
 {
 }
+
+/**
+ * Returns true if this munger has something interesting to do to the state.
+ */
+INLINE bool StateMunger::
+should_munge_state() const {
+  return _should_munge_state;
+}

+ 3 - 0
panda/src/pgraph/stateMunger.h

@@ -30,9 +30,12 @@ public:
   virtual ~StateMunger();
   CPT(RenderState) munge_state(const RenderState *state);
 
+  INLINE bool should_munge_state() const;
+
 protected:
   virtual CPT(RenderState) munge_state_impl(const RenderState *state);
 
+  bool _should_munge_state;
 
 public:
   static TypeHandle get_class_type() {

+ 209 - 118
panda/src/pgraphnodes/shaderGenerator.cxx

@@ -51,6 +51,14 @@ TypeHandle ShaderGenerator::_type_handle;
 
 #ifdef HAVE_CG
 
+#define PACK_COMBINE(src0, op0, src1, op1, src2, op2) ( \
+  ((uint16_t)src0) | ((((uint16_t)op0 - 1u) & 3u) << 3u) | \
+  ((uint16_t)src1 << 5u) | ((((uint16_t)op1 - 1u) & 3u) << 8u) | \
+  ((uint16_t)src2 << 10u) | ((((uint16_t)op2 - 1u) & 3u) << 13u))
+
+#define UNPACK_COMBINE_SRC(from, n) (TextureStage::CombineSource)((from >> ((uint16_t)n * 5u)) & 7u)
+#define UNPACK_COMBINE_OP(from, n) (TextureStage::CombineOperand)(((from >> (((uint16_t)n * 5u) + 3u)) & 3u) + 1u)
+
 static PStatCollector lookup_collector("*:Munge:ShaderGen:Lookup");
 static PStatCollector synthesize_collector("*:Munge:ShaderGen:Synthesize");
 
@@ -60,7 +68,7 @@ static PStatCollector synthesize_collector("*:Munge:ShaderGen:Synthesize");
  * shader generator belongs.
  */
 ShaderGenerator::
-ShaderGenerator(GraphicsStateGuardianBase *gsg) {
+ShaderGenerator(const GraphicsStateGuardianBase *gsg) {
   // The ATTR# input semantics seem to map to generic vertex attributes in
   // both arbvp1 and glslv, which behave more consistently.  However, they
   // don't exist in Direct3D 9.  Use this silly little check for now.
@@ -298,10 +306,17 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) {
     Texture *tex = texture->get_on_texture(stage);
     nassertd(tex != nullptr) continue;
 
+    // Mark this TextureStage as having been used by the shader generator, so
+    // that the next time its properties change, it will cause the state to be
+    // rehashed to ensure that the shader is regenerated if needed.
+    stage->mark_used_by_auto_shader();
+
     ShaderKey::TextureInfo info;
     info._type = tex->get_texture_type();
     info._mode = stage->get_mode();
     info._flags = 0;
+    info._combine_rgb = 0u;
+    info._combine_alpha = 0u;
 
     // While we look at the mode, determine whether we need to change the mode
     // in order to reflect disabled features.
@@ -357,6 +372,40 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) {
         info._flags = ShaderKey::TF_map_normal | ShaderKey::TF_map_gloss;
       }
       break;
+
+    case TextureStage::M_combine:
+      // If we have this rare, special mode, we encode all these extra
+      // parameters as flags to prevent bloating the shader key.
+      info._flags |= (uint32_t)stage->get_combine_rgb_mode() << ShaderKey::TF_COMBINE_RGB_MODE_SHIFT;
+      info._flags |= (uint32_t)stage->get_combine_alpha_mode() << ShaderKey::TF_COMBINE_ALPHA_MODE_SHIFT;
+      if (stage->get_rgb_scale() == 2) {
+        info._flags |= ShaderKey::TF_rgb_scale_2;
+      }
+      if (stage->get_rgb_scale() == 4) {
+        info._flags |= ShaderKey::TF_rgb_scale_4;
+      }
+      if (stage->get_alpha_scale() == 2) {
+        info._flags |= ShaderKey::TF_alpha_scale_2;
+      }
+      if (stage->get_alpha_scale() == 4) {
+        info._flags |= ShaderKey::TF_alpha_scale_4;
+      }
+      info._combine_rgb = PACK_COMBINE(
+        stage->get_combine_rgb_source0(), stage->get_combine_rgb_operand0(),
+        stage->get_combine_rgb_source1(), stage->get_combine_rgb_operand1(),
+        stage->get_combine_rgb_source2(), stage->get_combine_rgb_operand2());
+      info._combine_alpha = PACK_COMBINE(
+        stage->get_combine_alpha_source0(), stage->get_combine_alpha_operand0(),
+        stage->get_combine_alpha_source1(), stage->get_combine_alpha_operand1(),
+        stage->get_combine_alpha_source2(), stage->get_combine_alpha_operand2());
+
+      if (stage->uses_primary_color()) {
+        info._flags |= ShaderKey::TF_uses_primary_color;
+      }
+      if (stage->uses_last_saved_result()) {
+        info._flags |= ShaderKey::TF_uses_last_saved_result;
+      }
+      break;
     }
 
     // In fact, perhaps this stage should be disabled altogether?
@@ -417,11 +466,9 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) {
       info._gen_mode = TexGenAttrib::M_off;
     }
 
-    // If we have this rare, special mode, just include a pointer to the
-    // TextureStage object, because I can't be bothered to bloat the shader
-    // key with all these extra relevant properties.
-    if (stage->get_mode() == TextureStage::M_combine) {
-      info._stage = stage;
+    // Does this stage require saving its result?
+    if (stage->get_saved_result()) {
+      info._flags |= ShaderKey::TF_saved_result;
     }
 
     // Does this stage need a texcolor_# input?
@@ -433,6 +480,17 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) {
     key._texture_flags |= info._flags;
   }
 
+  // Does nothing use the saved result?  If so, don't bother saving it.
+  if ((key._texture_flags & ShaderKey::TF_uses_last_saved_result) == 0 &&
+      (key._texture_flags & ShaderKey::TF_saved_result) != 0) {
+
+    pvector<ShaderKey::TextureInfo>::iterator it;
+    for (it = key._textures.begin(); it != key._textures.end(); ++it) {
+      (*it)._flags &= ~ShaderKey::TF_saved_result;
+    }
+    key._texture_flags &= ~ShaderKey::TF_saved_result;
+  }
+
   // Decide whether to separate ambient and diffuse calculations.
   if (have_ambient) {
     if (key._material_flags & Material::F_ambient) {
@@ -468,6 +526,74 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) {
   }
 }
 
+/**
+ * Rehashes all the states with generated shaders, removing the ones that are
+ * no longer fresh.
+ *
+ * Call this if certain state has changed in such a way as to require a rerun
+ * of the shader generator.  This should be rare because in most cases, the
+ * shader generator will automatically regenerate shaders as necessary.
+ */
+INLINE void ShaderGenerator::
+rehash_generated_shaders() {
+  LightReMutexHolder holder(*RenderState::_states_lock);
+
+  // With uniquify-states turned on, we can actually go through all the states
+  // and check whether their generated shader is still OK.
+  size_t size = RenderState::_states->get_num_entries();
+  for (size_t si = 0; si < size; ++si) {
+    const RenderState *state = RenderState::_states->get_key(si);
+
+    if (state->_generated_shader != nullptr) {
+      ShaderKey key;
+      analyze_renderstate(key, state);
+
+      GeneratedShaders::const_iterator si;
+      si = _generated_shaders.find(key);
+      if (si != _generated_shaders.end()) {
+        if (si->second != state->_generated_shader) {
+          state->_generated_shader = si->second;
+          state->_munged_states.clear();
+        }
+      } else {
+        // We have not yet generated a shader for this modified state.
+        state->_generated_shader.clear();
+        state->_munged_states.clear();
+      }
+    }
+  }
+
+  // If we don't have uniquify-states, however, the above list won't contain
+  // all the state.  We can change a global seq value to require Panda to
+  // rehash the states the next time it tries to render an object with it.
+  if (!uniquify_states) {
+    GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+  }
+}
+
+/**
+ * Removes all previously generated shaders, requiring all shaders to be
+ * regenerated.  Does not clear cache of compiled shaders.
+ */
+INLINE void ShaderGenerator::
+clear_generated_shaders() {
+  LightReMutexHolder holder(*RenderState::_states_lock);
+
+  size_t size = RenderState::_states->get_num_entries();
+  for (size_t si = 0; si < size; ++si) {
+    const RenderState *state = RenderState::_states->get_key(si);
+    state->_generated_shader.clear();
+  }
+
+  _generated_shaders.clear();
+
+  // If we don't have uniquify-states, we can't clear all the ShaderAttribs
+  // that are cached on the states, but we can simulate the effect of that.
+  if (!uniquify_states) {
+    GraphicsStateGuardianBase::mark_rehash_generated_shaders();
+  }
+}
+
 /**
  * This is the routine that implements the next-gen fixed function pipeline by
  * synthesizing a shader.  It also takes care of setting up any buffers needed
@@ -1256,24 +1382,12 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) {
     }
   }
 
-  // Loop first to see if something is using primary_color or
-  // last_saved_result.
-  bool have_saved_result = false;
-  bool have_primary_color = false;
-  for (size_t i = 0; i < key._textures.size(); ++i) {
-    const ShaderKey::TextureInfo &tex = key._textures[i];
-    if (tex._stage == nullptr) {
-      continue;
-    }
-
-    if (tex._stage->uses_primary_color() && !have_primary_color) {
-      text << "\t float4 primary_color = result;\n";
-      have_primary_color = true;
-    }
-    if (tex._stage->uses_last_saved_result() && !have_saved_result) {
-      text << "\t float4 last_saved_result = result;\n";
-      have_saved_result = true;
-    }
+  // Store these if any stages will use it.
+  if (key._texture_flags & ShaderKey::TF_uses_primary_color) {
+    text << "\t float4 primary_color = result;\n";
+  }
+  if (key._texture_flags & ShaderKey::TF_uses_last_saved_result) {
+    text << "\t float4 last_saved_result = result;\n";
   }
 
   // Now loop through the textures to compose our magic blending formulas.
@@ -1315,24 +1429,21 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) {
       }
       break;
     case TextureStage::M_combine:
-      // Only in the case of M_combine have we filled in the _stage pointer.
       text << "\t result.rgb = ";
-      if (tex._stage->get_combine_rgb_mode() != TextureStage::CM_undefined) {
-        text << combine_mode_as_string(tex._stage, tex._stage->get_combine_rgb_mode(), false, i);
-      } else {
-        text << "tex" << i << ".rgb";
+      text << combine_mode_as_string(tex, (TextureStage::CombineMode)((tex._flags & ShaderKey::TF_COMBINE_RGB_MODE_MASK) >> ShaderKey::TF_COMBINE_RGB_MODE_SHIFT), false, i);
+      if (tex._flags & ShaderKey::TF_rgb_scale_2) {
+        text << " * 2";
       }
-      if (tex._stage->get_rgb_scale() != 1) {
-        text << " * " << tex._stage->get_rgb_scale();
+      if (tex._flags & ShaderKey::TF_rgb_scale_4) {
+        text << " * 4";
       }
       text << ";\n\t result.a = ";
-      if (tex._stage->get_combine_alpha_mode() != TextureStage::CM_undefined) {
-        text << combine_mode_as_string(tex._stage, tex._stage->get_combine_alpha_mode(), true, i);
-      } else {
-        text << "tex" << i << ".a";
+      text << combine_mode_as_string(tex, (TextureStage::CombineMode)((tex._flags & ShaderKey::TF_COMBINE_ALPHA_MODE_MASK) >> ShaderKey::TF_COMBINE_ALPHA_MODE_SHIFT), false, i);
+      if (tex._flags & ShaderKey::TF_alpha_scale_2) {
+        text << " * 2";
       }
-      if (tex._stage->get_alpha_scale() != 1) {
-        text << " * " << tex._stage->get_alpha_scale();
+      if (tex._flags & ShaderKey::TF_alpha_scale_4) {
+        text << " * 4";
       }
       text << ";\n";
       break;
@@ -1342,7 +1453,7 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) {
     default:
       break;
     }
-    if ((tex._flags & ShaderKey::TF_saved_result) != 0 && have_saved_result) {
+    if (tex._flags & ShaderKey::TF_saved_result) {
       text << "\t last_saved_result = result;\n";
     }
   }
@@ -1469,53 +1580,53 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) {
  * This 'synthesizes' a combine mode into a string.
  */
 const string ShaderGenerator::
-combine_mode_as_string(CPT(TextureStage) stage, TextureStage::CombineMode c_mode, bool alpha, short texindex) {
+combine_mode_as_string(const ShaderKey::TextureInfo &info, TextureStage::CombineMode c_mode, bool alpha, short texindex) {
   ostringstream text;
   switch (c_mode) {
-    case TextureStage::CM_modulate:
-      text << combine_source_as_string(stage, 0, alpha, alpha, texindex);
-      text << " * ";
-      text << combine_source_as_string(stage, 1, alpha, alpha, texindex);
-      break;
-    case TextureStage::CM_add:
-      text << combine_source_as_string(stage, 0, alpha, alpha, texindex);
-      text << " + ";
-      text << combine_source_as_string(stage, 1, alpha, alpha, texindex);
-      break;
-    case TextureStage::CM_add_signed:
-      text << combine_source_as_string(stage, 0, alpha, alpha, texindex);
-      text << " + ";
-      text << combine_source_as_string(stage, 1, alpha, alpha, texindex);
-      if (alpha) {
-        text << " - 0.5";
-      } else {
-        text << " - float3(0.5, 0.5, 0.5)";
-      }
-      break;
-    case TextureStage::CM_interpolate:
-      text << "lerp(";
-      text << combine_source_as_string(stage, 1, alpha, alpha, texindex);
-      text << ", ";
-      text << combine_source_as_string(stage, 0, alpha, alpha, texindex);
-      text << ", ";
-      text << combine_source_as_string(stage, 2, alpha, true, texindex);
-      text << ")";
-      break;
-    case TextureStage::CM_subtract:
-      text << combine_source_as_string(stage, 0, alpha, alpha, texindex);
-      text << " + ";
-      text << combine_source_as_string(stage, 1, alpha, alpha, texindex);
-      break;
-    case TextureStage::CM_dot3_rgb:
-      pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGB not yet supported in per-pixel mode.\n";
-      break;
-    case TextureStage::CM_dot3_rgba:
-      pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGBA not yet supported in per-pixel mode.\n";
-      break;
-    case TextureStage::CM_replace:
-    default: // Not sure if this is correct as default value.
-      text << combine_source_as_string(stage, 0, alpha, alpha, texindex);
-      break;
+  case TextureStage::CM_modulate:
+    text << combine_source_as_string(info, 0, alpha, alpha, texindex);
+    text << " * ";
+    text << combine_source_as_string(info, 1, alpha, alpha, texindex);
+    break;
+  case TextureStage::CM_add:
+    text << combine_source_as_string(info, 0, alpha, alpha, texindex);
+    text << " + ";
+    text << combine_source_as_string(info, 1, alpha, alpha, texindex);
+    break;
+  case TextureStage::CM_add_signed:
+    text << combine_source_as_string(info, 0, alpha, alpha, texindex);
+    text << " + ";
+    text << combine_source_as_string(info, 1, alpha, alpha, texindex);
+    if (alpha) {
+      text << " - 0.5";
+    } else {
+      text << " - float3(0.5, 0.5, 0.5)";
+    }
+    break;
+  case TextureStage::CM_interpolate:
+    text << "lerp(";
+    text << combine_source_as_string(info, 1, alpha, alpha, texindex);
+    text << ", ";
+    text << combine_source_as_string(info, 0, alpha, alpha, texindex);
+    text << ", ";
+    text << combine_source_as_string(info, 2, alpha, true, texindex);
+    text << ")";
+    break;
+  case TextureStage::CM_subtract:
+    text << combine_source_as_string(info, 0, alpha, alpha, texindex);
+    text << " + ";
+    text << combine_source_as_string(info, 1, alpha, alpha, texindex);
+    break;
+  case TextureStage::CM_dot3_rgb:
+    pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGB not yet supported in per-pixel mode.\n";
+    break;
+  case TextureStage::CM_dot3_rgba:
+    pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGBA not yet supported in per-pixel mode.\n";
+    break;
+  case TextureStage::CM_replace:
+  default: // Not sure if this is correct as default value.
+    text << combine_source_as_string(info, 0, alpha, alpha, texindex);
+    break;
   }
   return text.str();
 }
@@ -1524,39 +1635,15 @@ combine_mode_as_string(CPT(TextureStage) stage, TextureStage::CombineMode c_mode
  * This 'synthesizes' a combine source into a string.
  */
 const string ShaderGenerator::
-combine_source_as_string(CPT(TextureStage) stage, short num, bool alpha, bool single_value, short texindex) {
-  TextureStage::CombineSource c_src = TextureStage::CS_undefined;
-  TextureStage::CombineOperand c_op = TextureStage::CO_undefined;
-  if (alpha) {
-    switch (num) {
-      case 0:
-        c_src = stage->get_combine_alpha_source0();
-        c_op = stage->get_combine_alpha_operand0();
-        break;
-      case 1:
-        c_src = stage->get_combine_alpha_source1();
-        c_op = stage->get_combine_alpha_operand1();
-        break;
-      case 2:
-        c_src = stage->get_combine_alpha_source2();
-        c_op = stage->get_combine_alpha_operand2();
-        break;
-    }
+combine_source_as_string(const ShaderKey::TextureInfo &info, short num, bool alpha, bool single_value, short texindex) {
+  TextureStage::CombineSource c_src;
+  TextureStage::CombineOperand c_op;
+  if (!alpha) {
+    c_src = UNPACK_COMBINE_SRC(info._combine_rgb, num);
+    c_op = UNPACK_COMBINE_OP(info._combine_rgb, num);
   } else {
-    switch (num) {
-      case 0:
-        c_src = stage->get_combine_rgb_source0();
-        c_op = stage->get_combine_rgb_operand0();
-        break;
-      case 1:
-        c_src = stage->get_combine_rgb_source1();
-        c_op = stage->get_combine_rgb_operand1();
-        break;
-      case 2:
-        c_src = stage->get_combine_rgb_source2();
-        c_op = stage->get_combine_rgb_operand2();
-        break;
-    }
+    c_src = UNPACK_COMBINE_SRC(info._combine_alpha, num);
+    c_op = UNPACK_COMBINE_OP(info._combine_alpha, num);
   }
   ostringstream csource;
   if (c_op == TextureStage::CO_one_minus_src_color ||
@@ -1688,8 +1775,11 @@ operator < (const ShaderKey &other) const {
     if (tex._flags != other_tex._flags) {
       return tex._flags < other_tex._flags;
     }
-    if (tex._stage != other_tex._stage) {
-      return tex._stage < other_tex._stage;
+    if (tex._combine_rgb != other_tex._combine_rgb) {
+      return tex._combine_rgb < other_tex._combine_rgb;
+    }
+    if (tex._combine_alpha != other_tex._combine_alpha) {
+      return tex._combine_alpha < other_tex._combine_alpha;
     }
   }
   if (_lights.size() != other._lights.size()) {
@@ -1763,7 +1853,8 @@ operator == (const ShaderKey &other) const {
         tex._mode != other_tex._mode ||
         tex._gen_mode != other_tex._gen_mode ||
         tex._flags != other_tex._flags ||
-        tex._stage != other_tex._stage) {
+        tex._combine_rgb != other_tex._combine_rgb ||
+        tex._combine_alpha != other_tex._combine_alpha) {
       return false;
     }
   }

+ 34 - 20
panda/src/pgraphnodes/shaderGenerator.h

@@ -65,18 +65,15 @@ class GeomVertexAnimationSpec;
  */
 class EXPCL_PANDA_PGRAPHNODES ShaderGenerator : public TypedReferenceCount {
 PUBLISHED:
-  ShaderGenerator(GraphicsStateGuardianBase *gsg);
+  ShaderGenerator(const GraphicsStateGuardianBase *gsg);
   virtual ~ShaderGenerator();
   virtual CPT(ShaderAttrib) synthesize_shader(const RenderState *rs,
                                               const GeomVertexAnimationSpec &anim);
 
-protected:
-  static const string combine_mode_as_string(CPT(TextureStage) stage,
-                      TextureStage::CombineMode c_mode, bool alpha, short texindex);
-  static const string combine_source_as_string(CPT(TextureStage) stage,
-                         short num, bool alpha, bool single_value, short texindex);
-  static const string texture_type_as_string(Texture::TextureType ttype);
+  INLINE void rehash_generated_shaders();
+  INLINE void clear_generated_shaders();
 
+protected:
   // Shader register allocation:
 
   bool _use_generic_attr;
@@ -101,16 +98,28 @@ protected:
 
     GeomVertexAnimationSpec _anim_spec;
     enum TextureFlags {
-      TF_has_rgb = 1,
-      TF_has_alpha = 2,
-      TF_has_texscale = 4,
-      TF_has_texmat = 8,
-      TF_saved_result = 16,
-      TF_map_normal = 32,
-      TF_map_height = 64,
-      TF_map_glow = 128,
-      TF_map_gloss = 256,
-      TF_uses_color = 512,
+      TF_has_rgb      = 0x001,
+      TF_has_alpha    = 0x002,
+      TF_has_texscale = 0x004,
+      TF_has_texmat   = 0x008,
+      TF_saved_result = 0x010,
+      TF_map_normal   = 0x020,
+      TF_map_height   = 0x040,
+      TF_map_glow     = 0x080,
+      TF_map_gloss    = 0x100,
+      TF_uses_color   = 0x200,
+      TF_uses_primary_color = 0x400,
+      TF_uses_last_saved_result = 0x800,
+
+      TF_rgb_scale_2 = 0x1000,
+      TF_rgb_scale_4 = 0x2000,
+      TF_alpha_scale_2 = 0x4000,
+      TF_alpha_scale_4 = 0x8000,
+
+      TF_COMBINE_RGB_MODE_SHIFT = 16,
+      TF_COMBINE_RGB_MODE_MASK = 0x0000f0000,
+      TF_COMBINE_ALPHA_MODE_SHIFT = 20,
+      TF_COMBINE_ALPHA_MODE_MASK = 0x000f00000,
     };
 
     ColorAttrib::Type _color_type;
@@ -123,9 +132,8 @@ protected:
       TextureStage::Mode _mode;
       TexGenAttrib::Mode _gen_mode;
       int _flags;
-
-      // Stored only if combine modes / blend color is used
-      CPT(TextureStage) _stage;
+      uint16_t _combine_rgb;
+      uint16_t _combine_alpha;
     };
     pvector<TextureInfo> _textures;
 
@@ -160,6 +168,12 @@ protected:
 
   void analyze_renderstate(ShaderKey &key, const RenderState *rs);
 
+  static const string combine_mode_as_string(const ShaderKey::TextureInfo &info,
+                      TextureStage::CombineMode c_mode, bool alpha, short texindex);
+  static const string combine_source_as_string(const ShaderKey::TextureInfo &info,
+                         short num, bool alpha, bool single_value, short texindex);
+  static const string texture_type_as_string(Texture::TextureType ttype);
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;