Browse Source

Implement alpha test injection

Fixes #852
Fixes #473
rdb 1 year ago
parent
commit
d531d5d42c

+ 30 - 12
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -2161,6 +2161,8 @@ reset() {
        get_extension_func("glShaderSource");
     _glUseProgram = (PFNGLUSEPROGRAMPROC)
        get_extension_func("glUseProgram");
+    _glUniform1f = (PFNGLUNIFORM1FPROC)
+       get_extension_func("glUniform1f");
     _glUniform4f = (PFNGLUNIFORM4FPROC)
        get_extension_func("glUniform4f");
     _glUniform1i = (PFNGLUNIFORM1IPROC)
@@ -2254,6 +2256,8 @@ reset() {
     }
 
     if (is_at_least_gl_version(4, 0)) {
+      _glUniform1d = (PFNGLUNIFORM1DPROC)
+         get_extension_func("glUniform1d");
       _glUniform4d = (PFNGLUNIFORM4DPROC)
          get_extension_func("glUniform4d");
       _glUniform1dv = (PFNGLUNIFORM1DVPROC)
@@ -2314,6 +2318,7 @@ reset() {
   _glLinkProgram = glLinkProgram;
   _glShaderSource = (PFNGLSHADERSOURCEPROC_P) glShaderSource;
   _glUseProgram = glUseProgram;
+  _glUniform1f = glUniform1f;
   _glUniform4f = glUniform4f;
   _glUniform1i = glUniform1i;
   _glUniform1fv = glUniform1fv;
@@ -6940,7 +6945,7 @@ prepare_shader(Shader *se) {
 
 #ifndef OPENGLES_1
   push_group_marker(std::string("Prepare Shader ") + se->get_debug_name());
-  ShaderContext *result = new CLP(ShaderContext)(this, se);
+  CLP(ShaderContext) *result = new CLP(ShaderContext)(this, se);
   pop_group_marker();
 
   if (result->valid()) {
@@ -8615,18 +8620,26 @@ void CLP(GraphicsStateGuardian)::
 do_issue_shader() {
   PStatTimer timer(_draw_set_state_shader_pcollector);
 
-  ShaderContext *context = 0;
+  CLP(ShaderContext) *context = 0;
   Shader *shader = (Shader *)_target_shader->get_shader();
 
+  RenderAttrib::PandaCompareFunc alpha_test_mode = RenderAttrib::M_none;
+
   // If we don't have a shader, apply the default shader.
-  if (!has_fixed_function_pipeline() && !shader) {
-    shader = _default_shader;
-    nassertv(shader != nullptr);
+  if (!has_fixed_function_pipeline()) {
+    if (!shader) {
+      shader = _default_shader;
+      nassertv(shader != nullptr);
+    }
+
+    if (!_target_shader->get_flag(ShaderAttrib::F_subsume_alpha_test)) {
+      alpha_test_mode = _target_rs->get_alpha_test_mode();
+    }
   }
 
   if (shader) {
     if (_current_shader != shader) {
-      context = shader->prepare_now(get_prepared_objects(), this);
+      context = (CLP(ShaderContext) *)shader->prepare_now(get_prepared_objects(), this);
     } else {
       context = _current_shader_context;
     }
@@ -8638,7 +8651,7 @@ do_issue_shader() {
     shader = _default_shader;
     nassertv(shader != nullptr);
     if (_current_shader != shader) {
-      context = shader->prepare_now(get_prepared_objects(), this);
+      context = (CLP(ShaderContext) *)shader->prepare_now(get_prepared_objects(), this);
     } else {
       context = _current_shader_context;
     }
@@ -8659,7 +8672,7 @@ do_issue_shader() {
         // If it's a different type of shader, make sure to unbind the old.
         _current_shader_context->unbind();
       }
-      context->bind();
+      context->bind(alpha_test_mode);
       _current_shader = shader;
     }
 
@@ -12584,10 +12597,15 @@ set_state_and_transform(const RenderState *target,
     _state_mask.clear_bit(TextureAttrib::get_class_slot());
     _state_mask.set_bit(ShaderAttrib::get_class_slot());
   }
-  else if (!has_fixed_function_pipeline() && _current_shader == nullptr) { // In the case of OpenGL ES 2.x, we need to glUseShader before we draw anything.
-    do_issue_shader();
-    _state_mask.clear_bit(TextureAttrib::get_class_slot());
-    _state_mask.set_bit(ShaderAttrib::get_class_slot());
+  else if (!has_fixed_function_pipeline()) {
+    // If we don't have a fixed-function pipeline (eg. OpenGL ES 2.x) we need
+    // to bind a shader before drawing anything.  Also, the shader must
+    // implement the desired alpha test mode.
+    if (_current_shader == nullptr) {
+      do_issue_shader();
+      _state_mask.clear_bit(TextureAttrib::get_class_slot());
+      _state_mask.set_bit(ShaderAttrib::get_class_slot());
+    }
   }
 
   // Update all of the state that is bound to the shader program.

+ 7 - 3
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -170,7 +170,9 @@ typedef void (APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program);
 typedef void (APIENTRYP PFNGLSHADERSOURCEPROC_P) (GLuint shader, GLsizei count, const GLchar* const *string, const GLint *length);
 typedef void (APIENTRYP PFNGLSPECIALIZESHADERARBPROC) (GLuint shader, const GLchar *, GLuint, const GLuint *, const GLuint *);
 typedef void (APIENTRYP PFNGLUSEPROGRAMPROC) (GLuint program);
+typedef void (APIENTRYP PFNGLUNIFORM1FPROC) (GLint location, GLfloat v0);
 typedef void (APIENTRYP PFNGLUNIFORM4FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (APIENTRYP PFNGLUNIFORM1DPROC) (GLint location, GLdouble v0);
 typedef void (APIENTRYP PFNGLUNIFORM4DPROC) (GLint location, GLdouble v0, GLdouble v1, GLdouble v2, GLdouble v3);
 typedef void (APIENTRYP PFNGLUNIFORM1IPROC) (GLint location, GLint v0);
 typedef void (APIENTRYP PFNGLUNIFORM1FVPROC) (GLint location, GLsizei count, const GLfloat *value);
@@ -717,11 +719,11 @@ protected:
   GLuint _vertex_attrib_divisors[32];
 
   PT(Shader) _current_shader;
-  ShaderContext *_current_shader_context;
+  CLP(ShaderContext) *_current_shader_context;
   PT(Shader) _vertex_array_shader;
-  ShaderContext *_vertex_array_shader_context;
+  CLP(ShaderContext) *_vertex_array_shader_context;
   PT(Shader) _texture_binding_shader;
-  ShaderContext *_texture_binding_shader_context;
+  CLP(ShaderContext) *_texture_binding_shader_context;
 
   PT(Shader) _default_shader;
 
@@ -1056,6 +1058,7 @@ public:
   PFNGLSHADERBINARYPROC _glShaderBinary;
   PFNGLSHADERSOURCEPROC_P _glShaderSource;
   PFNGLUSEPROGRAMPROC  _glUseProgram;
+  PFNGLUNIFORM1FPROC _glUniform1f;
   PFNGLUNIFORM4FPROC _glUniform4f;
   PFNGLUNIFORM1IPROC _glUniform1i;
   PFNGLUNIFORM1FVPROC _glUniform1fv;
@@ -1109,6 +1112,7 @@ public:
   PFNGLBINDIMAGETEXTUREPROC _glBindImageTexture;
 #endif  // !OPENGLES_1
 #ifndef OPENGLES
+  PFNGLUNIFORM1DPROC _glUniform1d;
   PFNGLUNIFORM4DPROC _glUniform4d;
   PFNGLUNIFORM1DVPROC _glUniform1dv;
   PFNGLUNIFORM2DVPROC _glUniform2dv;

File diff suppressed because it is too large
+ 389 - 214
panda/src/glstuff/glShaderContext_src.cxx


+ 45 - 32
panda/src/glstuff/glShaderContext_src.h

@@ -33,6 +33,18 @@ private:
   struct UniformBlock;
   typedef pmap<const InternalName *, GLint> LocationMap;
 
+  struct UniformCall {
+    GLint _location;
+    GLuint _count;
+    void *_func;
+    size_t _offset;
+  };
+
+  struct UniformCalls {
+    pvector<UniformCall> _matrices;
+    pvector<UniformCall> _vectors;
+  };
+
 public:
   friend class CLP(GraphicsStateGuardian);
 
@@ -40,29 +52,33 @@ public:
   ~CLP(ShaderContext)();
   ALLOC_DELETED_CHAIN(CLP(ShaderContext));
 
+  bool valid(void) override;
+  void bind(RenderAttrib::PandaCompareFunc alpha_test_mode);
+  void unbind() override;
+
+  bool compile_for(RenderAttrib::PandaCompareFunc alpha_test_mode);
+
+private:
   static void r_count_locations_bindings(const ShaderType *type,
                                          GLint &num_locations,
                                          GLint &num_ssbo_bindings,
                                          GLint &num_image_bindings);
 
-  void r_collect_uniforms(const Shader::Parameter &param, UniformBlock &block,
+  void r_collect_uniforms(GLuint program,
+                          const Shader::Parameter &param, UniformCalls &calls,
                           const ShaderType *type, const char *name,
                           const char *sym, int &location,
                           const SparseArray &active_locations,
                           int &resource_index, int &binding,
                           size_t offset = 0);
 
-  void reflect_program(SparseArray &active_locations, LocationMap &locations, LocationMap &ssbo_bindings);
-  void reflect_attribute(int i, char *name_buf, GLsizei name_buflen);
-  void reflect_uniform_block(int i, const char *block_name,
+  void reflect_program(GLuint program, SparseArray &active_locations);
+  void reflect_attribute(GLuint program, int i, char *name_buf, GLsizei name_buflen);
+  void reflect_uniform_block(GLuint program, int i, const char *block_name,
                              char *name_buffer, GLsizei name_buflen);
   bool get_sampler_texture_type(int &out, GLenum param_type);
   const ShaderType *get_param_type(GLenum type);
 
-  bool valid(void) override;
-  void bind() override;
-  void unbind() override;
-
   INLINE void set_display_region(const DisplayRegion *display_region);
   void set_state_and_transform(const RenderState *state,
                                const TransformState *modelview_transform,
@@ -84,15 +100,18 @@ public:
   }
 
 private:
-  bool _validated;
-  GLuint _glsl_program;
+  bool _validated = false;
+  bool _inject_alpha_test = false;
+  GLuint _programs[RenderAttrib::M_always] {0u};
+  RenderAttrib::PandaCompareFunc _alpha_test_mode = RenderAttrib::M_none;
+  GLint _alpha_test_ref_locations[RenderAttrib::M_always];
+
+  // May exclude the fragment shader if _inject_alpha_test is set.
   struct Module {
-    const ShaderModule *_module;
+    Shader::Stage _stage;
     GLuint _handle;
-    bool _needs_compile;
   };
-  typedef small_vector<Module, 2> Modules;
-  Modules _modules;
+  small_vector<Module, 2> _modules;
   bool _is_legacy = false;
   bool _emulate_float_attribs = false;
 
@@ -119,15 +138,7 @@ private:
 
     // When UBOs are not used or supported, we use an array of glUniform
     // calls instead.
-    struct Call {
-      GLint _location;
-      GLuint _count;
-      void *_func;
-      size_t _offset;
-    };
-
-    pvector<Call> _matrices;
-    pvector<Call> _vectors;
+    small_vector<UniformCalls, 1> _calls;
   };
   pvector<UniformBlock> _uniform_blocks;
   int _uniform_data_deps = 0;
@@ -145,9 +156,6 @@ private:
     PT(ShaderInputBinding) _binding;
     ShaderInputBinding::ResourceId _resource_id;
     CLP(TextureContext) *_gtc = nullptr;
-#ifdef OPENGLES
-    GLint _binding_index;
-#endif
     ShaderType::Access _access;
     bool _written = false;
   };
@@ -156,6 +164,7 @@ private:
 
   BitMask32 _enabled_attribs;
   GLint _color_attrib_index;
+  uint32_t _bind_attrib_locations = 0;
 
   struct StorageBlock {
     PT(ShaderInputBinding) _binding;
@@ -164,18 +173,22 @@ private:
   };
   typedef pvector<StorageBlock> StorageBlocks;
   StorageBlocks _storage_blocks;
+  uint32_t _storage_block_bindings = 0;
 
   CLP(GraphicsStateGuardian) *_glgsg;
 
+  bool _remap_locations = false;
+  LocationMap _locations;
+  LocationMap _bindings;
+
   bool _uses_standard_vertex_arrays;
 
-  void report_shader_errors(const Module &module, bool fatal);
+  void report_shader_errors(GLuint handle, Shader::Stage stage, bool fatal);
   void report_program_errors(GLuint program, bool fatal);
-  bool attach_shader(const ShaderModule *module, Shader::ModuleSpecConstants &spec_consts,
-                     const LocationMap &locations, bool &remap_locations,
-                     const LocationMap &ssbo_bindings);
-  bool compile_and_link(const LocationMap &locations, bool &remap_locations,
-                        const LocationMap &bindings);
+  GLuint create_shader(GLuint program, const ShaderModule *module,
+                       const Shader::ModuleSpecConstants &spec_consts,
+                       RenderAttrib::PandaCompareFunc alpha_test_mode);
+  GLuint compile_and_link(RenderAttrib::PandaCompareFunc alpha_test_mode);
   void release_resources();
 
 public:

+ 4 - 0
panda/src/glstuff/glmisc_src.cxx

@@ -249,7 +249,11 @@ ConfigVariableBool gl_dump_compiled_shaders
             "into the current directory."));
 
 ConfigVariableBool gl_validate_shaders
+#ifdef NDEBUG
+  ("gl-validate-shaders", false,
+#else
   ("gl-validate-shaders", true,
+#endif
    PRC_DESC("Set this to true to enable glValidateShader the first time "
             "a shader is bound.  This may cause helpful information about "
             "shaders to be printed."));

+ 2 - 0
panda/src/gobj/shader.h

@@ -294,6 +294,8 @@ public:
   uint32_t _module_mask = 0;
   uint64_t _used_caps = 0;
 
+  bool _subsumes_alpha_test = false;
+
 protected:
   ShaderFile _filename;
   Filename _fullpath;

+ 2 - 1
panda/src/gobj/shaderEnums.h

@@ -156,7 +156,8 @@ PUBLISHED:
     D_texture        = 0x08000,
     D_tex_gen        = 0x10000,
     D_render_mode    = 0x20000,
-    D_state = D_color | D_colorscale | D_material | D_shader_inputs | D_fog | D_light | D_clip_planes | D_tex_matrix | D_texture | D_tex_gen | D_render_mode,
+    D_alpha_test     = 0x40000,
+    D_state = D_color | D_colorscale | D_material | D_shader_inputs | D_fog | D_light | D_clip_planes | D_tex_matrix | D_texture | D_tex_gen | D_render_mode | D_alpha_test,
   };
 
   enum StateMatrix {

+ 16 - 8
panda/src/pgraph/renderState.I

@@ -316,16 +316,26 @@ get_invert_composition_cache_result(size_t n) const {
   return _invert_composition_cache.get_data(n)._result;
 }
 
+/**
+ * Returns the effective alpha testing mode.  This call will never return
+ * M_never, it normalizes that value to M_none.
+ */
+INLINE RenderAttrib::PandaCompareFunc RenderState::
+get_alpha_test_mode() const {
+  if ((_flags & F_computed_cache) == 0) {
+    ((RenderState *)this)->update_cached();
+  }
+  return _alpha_test_mode;
+}
+
 /**
  * Returns the draw order indicated by the CullBinAttrib, if any, associated
  * by this state (or 0 if there is no CullBinAttrib).  See get_bin_index().
  */
 INLINE int RenderState::
 get_draw_order() const {
-  if ((_flags & F_checked_bin_index) == 0) {
-    // We pretend this function is const, even though it transparently
-    // modifies the internal draw_order cache.
-    ((RenderState *)this)->determine_bin_index();
+  if ((_flags & F_computed_cache) == 0) {
+    ((RenderState *)this)->update_cached();
   }
   return _draw_order;
 }
@@ -338,10 +348,8 @@ get_draw_order() const {
  */
 INLINE int RenderState::
 get_bin_index() const {
-  if ((_flags & F_checked_bin_index) == 0) {
-    // We pretend this function is const, even though it transparently
-    // modifies the internal bin_index cache.
-    ((RenderState *)this)->determine_bin_index();
+  if ((_flags & F_computed_cache) == 0) {
+    ((RenderState *)this)->update_cached();
   }
   return _bin_index;
 }

+ 11 - 3
panda/src/pgraph/renderState.cxx

@@ -12,6 +12,7 @@
  */
 
 #include "renderState.h"
+#include "alphaTestAttrib.h"
 #include "transparencyAttrib.h"
 #include "cullBinAttrib.h"
 #include "cullBinManager.h"
@@ -1719,13 +1720,20 @@ remove_cache_pointers() {
  * This is the private implementation of get_bin_index() and get_draw_order().
  */
 void RenderState::
-determine_bin_index() {
+update_cached() {
   LightMutexHolder holder(_lock);
-  if ((_flags & F_checked_bin_index) != 0) {
+  if ((_flags & F_computed_cache) != 0) {
     // Someone else checked it first.
     return;
   }
 
+  const AlphaTestAttrib *alpha_test;
+  if (get_attrib(alpha_test)) {
+    _alpha_test_mode = (RenderAttrib::PandaCompareFunc)((int)alpha_test->get_mode() & 7);
+  } else {
+    _alpha_test_mode = AlphaTestAttrib::M_none;
+  }
+
   std::string bin_name;
   _draw_order = 0;
 
@@ -1763,7 +1771,7 @@ determine_bin_index() {
       << "No bin named " << bin_name << "; creating default bin.\n";
     _bin_index = bin_manager->add_bin(bin_name, CullBinManager::BT_unsorted, 0);
   }
-  _flags |= F_checked_bin_index;
+  _flags |= F_computed_cache;
 }
 
 /**

+ 5 - 4
panda/src/pgraph/renderState.h

@@ -154,6 +154,7 @@ PUBLISHED:
 PUBLISHED:
   // These methods are intended for use by low-level code, but they're also
   // handy enough to expose to high-level users.
+  INLINE RenderAttrib::PandaCompareFunc get_alpha_test_mode() const;
   INLINE int get_draw_order() const;
   INLINE int get_bin_index() const;
   int get_geom_rendering(int geom_rendering) const;
@@ -216,7 +217,7 @@ private:
   void release_new();
   void remove_cache_pointers();
 
-  void determine_bin_index();
+  void update_cached();
   void determine_cull_callback();
   void fill_default();
 
@@ -323,14 +324,14 @@ private:
   // This is redundant, but it is a useful cache.
   SlotMask _filled_slots;
 
-  // We cache the index to the associated CullBin, if there happens to be a
-  // CullBinAttrib in the state.
+  // We cache the some attribute properties directly here for fast lookup.
+  RenderAttrib::PandaCompareFunc _alpha_test_mode;
   int _bin_index;
   int _draw_order;
   size_t _hash;
 
   enum Flags {
-    F_checked_bin_index       = 0x000001,
+    F_computed_cache          = 0x000001,
     F_checked_cull_callback   = 0x000002,
     F_has_cull_callback       = 0x000004,
     F_is_destructing          = 0x000008,

+ 3 - 0
panda/src/pgraph/shaderAttrib.cxx

@@ -87,6 +87,9 @@ set_shader(const Shader *s, int priority) const {
   result->_shader_priority = priority;
   result->_auto_shader = false;
   result->_has_shader = true;
+  if (s->_subsumes_alpha_test) {
+    result->_flags |= ShaderAttrib::F_subsume_alpha_test;
+  }
   return return_new(result);
 }
 

+ 2 - 0
panda/src/pgraphnodes/shaderGenerator.cxx

@@ -2070,6 +2070,8 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) {
   PT(Shader) shader = Shader::make(text.str(), Shader::SL_Cg);
   nassertr(shader != nullptr, nullptr);
 
+  shader->_subsumes_alpha_test = true;
+
   reset_register_allocator();
 
   CPT(ShaderAttrib) attr = make_attrib(key, shader);

+ 2 - 0
panda/src/shaderpipeline/CMakeLists.txt

@@ -6,6 +6,7 @@ set(P3SHADERPIPELINE_HEADERS
   shaderModuleSpirV.h shaderModuleSpirV.I
   spirVFlattenStructPass.h
   spirVHoistStructResourcesPass.h
+  spirVInjectAlphaTestPass.h
   spirVMakeBlockPass.h
   spirVRemoveUnusedVariablesPass.h
   spirVReplaceVariableTypePass.h
@@ -22,6 +23,7 @@ set(P3SHADERPIPELINE_SOURCES
   shaderModuleSpirV.cxx
   spirVFlattenStructPass.cxx
   spirVHoistStructResourcesPass.cxx
+  spirVInjectAlphaTestPass.cxx
   spirVMakeBlockPass.cxx
   spirVRemoveUnusedVariablesPass.cxx
   spirVReplaceVariableTypePass.cxx

+ 1 - 0
panda/src/shaderpipeline/p3shaderpipeline_composite2.cxx

@@ -1,6 +1,7 @@
 #ifndef CPPPARSER
 #include "spirVFlattenStructPass.cxx"
 #include "spirVHoistStructResourcesPass.cxx"
+#include "spirVInjectAlphaTestPass.cxx"
 #include "spirVMakeBlockPass.cxx"
 #include "spirVRemoveUnusedVariablesPass.cxx"
 #include "spirVReplaceVariableTypePass.cxx"

+ 120 - 0
panda/src/shaderpipeline/spirVInjectAlphaTestPass.cxx

@@ -0,0 +1,120 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file spirVInjectAlphaTestPass.cxx
+ * @author rdb
+ * @date 2024-11-10
+ */
+
+#include "spirVInjectAlphaTestPass.h"
+
+/**
+ * Transforms an OpEntryPoint.
+ * Return true to keep the instruction, false to omit it.
+ */
+bool SpirVInjectAlphaTestPass::
+transform_entry_point(spv::ExecutionModel model, uint32_t id, const char *name, const uint32_t *interface, uint16_t size) {
+  if (model == spv::ExecutionModelFragment) {
+    for (size_t i = 0; i < size; ++i) {
+      uint32_t var_id = interface[i];
+      if (_db.get_definition(var_id)._location == 0) {
+        _entry_points[id] = var_id;
+        break;
+      }
+    }
+  }
+  return true;
+}
+
+/**
+ *
+ */
+bool SpirVInjectAlphaTestPass::
+begin_function(Instruction op) {
+  auto it = _entry_points.find(op.args[1]);
+  if (it != _entry_points.end()) {
+    _var_id = it->second;
+  } else {
+    _var_id = 0;
+  }
+  return true;
+}
+
+/**
+ *
+ */
+bool SpirVInjectAlphaTestPass::
+transform_function_op(Instruction op) {
+  if (_var_id != 0 &&
+      (op.opcode == spv::OpReturn || op.opcode == spv::OpReturnValue)) {
+
+    spv::Op opcode;
+    switch (_mode) {
+    case M_always:
+    case M_none:
+      // You probably needn't have bothered running this pass...
+      return true;
+
+    case M_never:
+      op_kill();
+      return false;
+
+    case M_less:
+      opcode = spv::OpFOrdGreaterThanEqual;
+      break;
+
+    case M_equal:
+      opcode = spv::OpFOrdNotEqual;
+      break;
+
+    case M_less_equal:
+      opcode = spv::OpFOrdGreaterThan;
+      break;
+
+    case M_greater:
+      opcode = spv::OpFOrdLessThanEqual;
+      break;
+
+    case M_not_equal:
+      opcode = spv::OpFOrdEqual;
+      break;
+
+    case M_greater_equal:
+      opcode = spv::OpFOrdLessThan;
+      break;
+
+    default:
+      nassertr(false, true);
+      return true;
+    }
+
+    if (_alpha_ref_var_id == 0) {
+      _alpha_ref_var_id = define_variable(ShaderType::float_type, spv::StorageClassUniformConstant);
+      if (_location >= 0) {
+        decorate(_alpha_ref_var_id, spv::DecorationLocation, (uint32_t)_location);
+      }
+    }
+    uint32_t alpha = op_load(op_access_chain(_var_id, {define_int_constant(3)}));
+    uint32_t ref = op_load(_alpha_ref_var_id);
+
+    uint32_t branch = branch_if(op_compare(opcode, alpha, ref));
+      op_kill();
+    branch_endif(branch);
+  }
+
+  return SpirVTransformPass::transform_function_op(op);
+}
+
+/**
+ * Called when an OpFunctionEnd instruction is encountered, belonging to an
+ * OpFunction with the given identifier.
+ */
+void SpirVInjectAlphaTestPass::
+end_function(uint32_t function_id) {
+  _var_id = 0;
+}

+ 68 - 0
panda/src/shaderpipeline/spirVInjectAlphaTestPass.h

@@ -0,0 +1,68 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file spirVInjectAlphaTestPass.h
+ * @author rdb
+ * @date 2024-11-10
+ */
+
+#ifndef SPIRVINJECTALPHATESTPASS_H
+#define SPIRVINJECTALPHATESTPASS_H
+
+#include "spirVTransformPass.h"
+
+/**
+ * Injects an alpha test before all return statements of fragment entry points.
+ */
+class EXPCL_PANDA_SHADERPIPELINE SpirVInjectAlphaTestPass final : public SpirVTransformPass {
+public:
+  enum Mode {
+    M_none=0,           // alpha-test disabled (always-draw)
+    M_never,            // Never draw.
+    M_less,             // incoming < reference_alpha
+    M_equal,            // incoming == reference_alpha
+    M_less_equal,       // incoming <= reference_alpha
+    M_greater,          // incoming > reference_alpha
+    M_not_equal,        // incoming != reference_alpha
+    M_greater_equal,    // incoming >= reference_alpha
+    M_always            // Always draw.
+  };
+
+  SpirVInjectAlphaTestPass(Mode mode, int location = -1) : _mode(mode), _location(location) {}
+
+  virtual bool transform_entry_point(spv::ExecutionModel model, uint32_t id, const char *name, const uint32_t *interface, uint16_t size);
+  virtual bool begin_function(Instruction op);
+  virtual bool transform_function_op(Instruction op);
+  virtual void end_function(uint32_t function_id);
+
+public:
+  const Mode _mode;
+  const int _location;
+
+  uint32_t _alpha_ref_var_id = 0;
+
+private:
+  uint32_t _var_id = 0;
+
+  // For each entry point we access, the output variable to test.
+  pmap<uint32_t, uint32_t> _entry_points;
+
+  // This stores the type IDs of all the types that (indirectly) contain the
+  // type we want to unpack.  For each affected struct, access chains (struct
+  // members only) leading to the hoisted type in question, as well as the
+  // type that the wrapped additional variables should have.
+  pmap<uint32_t, pvector<std::pair<const ShaderType *, AccessChain> > > _affected_types;
+  pset<uint32_t> _affected_pointer_types;
+
+public:
+  // For each access chain consisting only of struct members
+  // (prefixed by a variable id), map to the variable that has been hoisted
+  pmap<AccessChain, uint32_t> _hoisted_vars;
+};
+
+#endif

+ 65 - 1
panda/src/shaderpipeline/spirVTransformPass.cxx

@@ -50,13 +50,17 @@ process_preamble(std::vector<uint32_t> &stream) {
       ++i;
 
       // Remove the deleted IDs from the entry point interface.
+      uint32_t interface_begin = i;
       while (i < op.nargs) {
         if (!is_deleted(op.args[i])) {
           new_args.push_back(op.args[i]);
         }
         ++i;
       }
-      add_debug(op.opcode, new_args.data(), new_args.size());
+
+      if (transform_entry_point((spv::ExecutionModel)op.args[0], op.args[1], (const char *)&new_args[2], &new_args[interface_begin], new_args.size() - interface_begin)) {
+        add_debug(op.opcode, new_args.data(), new_args.size());
+      }
     }
     else if (transform_debug_op(op)) {
       _new_preamble.insert(_new_preamble.end(), it._words, it.next()._words);
@@ -192,6 +196,15 @@ void SpirVTransformPass::
 preprocess() {
 }
 
+/**
+ * Transforms an OpEntryPoint.
+ * Return true to keep the instruction, false to omit it.
+ */
+bool SpirVTransformPass::
+transform_entry_point(spv::ExecutionModel model, uint32_t id, const char *name, const uint32_t *interface, uint16_t size) {
+  return true;
+}
+
 /**
  * Transforms a debug instruction (OpName or OpMemberName).
  * Return true to preserve the instruction, false to omit it (in which case you
@@ -1209,3 +1222,54 @@ op_composite_extract(uint32_t obj_id, std::initializer_list<uint32_t> chain) {
   mark_defined(id);
   return id;
 }
+
+/**
+ * Inserts a comparison op, taking two operands and returning a bool.
+ * At the moment, only works on scalars.
+ */
+uint32_t SpirVTransformPass::
+op_compare(spv::Op opcode, uint32_t obj1, uint32_t obj2) {
+  uint32_t type_id = define_type(ShaderType::bool_type);
+
+  uint32_t id = allocate_id();
+  _new_functions.insert(_new_functions.end(), {(5u << spv::WordCountShift) | opcode, type_id, id, obj1, obj2});
+
+  Definition &def = _db.modify_definition(id);
+  def._type_id = type_id;
+  def._type = ShaderType::bool_type;
+
+  mark_defined(id);
+  return id;
+}
+
+/**
+ * Inserts an OpKill.
+ */
+void SpirVTransformPass::
+op_kill() {
+  _new_functions.insert(_new_functions.end(), {(1u << spv::WordCountShift) | spv::OpKill});
+}
+
+/**
+ * Begins an "if" branch.
+ * The return value should be passed to branch_endif().
+ */
+uint32_t SpirVTransformPass::
+branch_if(uint32_t cond) {
+  uint32_t true_label = allocate_id();
+  uint32_t false_label = allocate_id();
+
+  _new_functions.insert(_new_functions.end(), {
+    (3 << spv::WordCountShift) | spv::OpSelectionMerge, false_label, (uint32_t)spv::SelectionControlMaskNone,
+    (4 << spv::WordCountShift) | spv::OpBranchConditional, cond, true_label, false_label,
+    (2 << spv::WordCountShift) | spv::OpLabel, true_label});
+  return false_label;
+}
+
+/**
+ * Ends an "if" branch.
+ */
+void SpirVTransformPass::
+branch_endif(uint32_t false_label) {
+  _new_functions.insert(_new_functions.end(), {(2u << spv::WordCountShift) | spv::OpLabel, false_label});
+}

+ 6 - 0
panda/src/shaderpipeline/spirVTransformPass.h

@@ -39,6 +39,7 @@ public:
   void process_functions(std::vector<uint32_t> &instructions);
 
   virtual void preprocess();
+  virtual bool transform_entry_point(spv::ExecutionModel model, uint32_t id, const char *name, const uint32_t *interface, uint16_t size);
   virtual bool transform_debug_op(Instruction op);
   virtual bool transform_annotation_op(Instruction op);
   virtual bool transform_definition_op(Instruction op);
@@ -128,6 +129,11 @@ protected:
   uint32_t op_vector_shuffle(uint32_t vec1, uint32_t vec2, const pvector<uint32_t> &components);
   uint32_t op_composite_construct(const ShaderType *type, const pvector<uint32_t> &constituents);
   uint32_t op_composite_extract(uint32_t obj_id, std::initializer_list<uint32_t>);
+  uint32_t op_compare(spv::Op opcode, uint32_t obj1, uint32_t obj2);
+  void op_kill();
+
+  uint32_t branch_if(uint32_t cond);
+  void branch_endif(uint32_t label);
 
   // The module is split into sections to make it easier to add instructions
   // to other sections while we are iterating.

Some files were not shown because too many files changed in this diff