Browse Source

shaderpipeline: Add textureSize emulation in DX9, remove unused param

rdb 1 year ago
parent
commit
3f6fb9f672

+ 5 - 5
panda/src/display/shaderInputBinding_impls.cxx

@@ -1658,7 +1658,7 @@ fetch_from_input(const ShaderAttrib *target_shader, void *into) const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderLightStructBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   if (_input != nullptr) {
     nassertr(index == 0, 0);
   }
@@ -1714,7 +1714,7 @@ get_state_dep() const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderTextureStagesBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return index;
 }
 
@@ -1767,7 +1767,7 @@ get_state_dep() const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderTextureBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return (ResourceId)_input.p();
 }
 
@@ -1827,7 +1827,7 @@ get_state_dep() const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderBufferBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return (ResourceId)_input.p();
 }
 
@@ -2192,7 +2192,7 @@ fetch_data(const State &state, void *into, bool packed) const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderAggregateBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   nassertr(index >= 0 && (size_t)index < _resources.size(), 0);
   return (ResourceId)_resources[index].p();
 }

+ 5 - 5
panda/src/display/shaderInputBinding_impls.h

@@ -165,7 +165,7 @@ public:
 
   virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId index,
                                     SamplerState &sampler, int &view) const;
@@ -211,7 +211,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId resource_id,
                                     SamplerState &sampler, int &view) const;
@@ -233,7 +233,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId resource_id,
                                     SamplerState &sampler, int &view) const;
@@ -257,7 +257,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(ShaderBuffer) fetch_shader_buffer(const State &state,
                                                ResourceId resource_id) const;
 
@@ -338,7 +338,7 @@ public:
 
   virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId index,
                                     SamplerState &sampler, int &view) const;

+ 124 - 15
panda/src/dxgsg9/dxShaderContext9.cxx

@@ -16,6 +16,7 @@
 #include "dxVertexBufferContext9.h"
 #include "shaderModuleSpirV.h"
 #include "spirVTransformer.h"
+#include "spirVEmulateTextureQueriesPass.h"
 #include "spirVHoistStructResourcesPass.h"
 #include "spirVRemoveUnusedVariablesPass.h"
 
@@ -118,26 +119,68 @@ compile_module(const ShaderModule *module, DWORD *&data) {
   // need to modify the SPIR-V to hoist those out.
   // We tell it not to remove the empty structs, since that changes the member
   // numbering, which we need to match between the original and the HLSL.
-  if (hoist_necessary) {
+  static const uint64_t emulate_caps = (Shader::C_texture_query_size | Shader::C_image_query_size | Shader::C_texture_query_levels);
+  if (hoist_necessary || (spv->_emulatable_caps & emulate_caps) != 0) {
     SpirVTransformer transformer(stream);
-    SpirVHoistStructResourcesPass hoist_pass(false);
-    transformer.run(hoist_pass);
-    transformer.run(SpirVRemoveUnusedVariablesPass());
-    stream = transformer.get_result();
 
-    for (const auto &item : hoist_pass._hoisted_vars) {
-      const auto &access_chain = item.first;
+    if ((spv->_emulatable_caps & emulate_caps) != 0) {
+      SpirVEmulateTextureQueriesPass pass(emulate_caps);
+      transformer.run(pass);
+
+      for (const auto &item : pass._size_var_ids) {
+        const auto &access_chain = item.first;
+        const auto &param_name = param_names[access_chain._var_id];
 
-      std::ostringstream str;
-      str << param_names[access_chain._var_id];
+        // Determine the resource index within the parameter.
+        const Shader::Parameter &param = _shader->_parameters[atoi(param_name.c_str() + 1)];
+        const ShaderType *type = param._type;
+        int resource_index = 0;
 
-      for (size_t i = 0; i < access_chain.size(); ++i) {
-        str << '_' << access_chain[i];
+        for (size_t i = 0; i < access_chain.size(); ++i) {
+          uint32_t index = access_chain[i];
+          if (const ShaderType::Array *array_type = type->as_array()) {
+            type = array_type->get_element_type();
+            resource_index += index * type->get_num_resources();
+          }
+          else if (const ShaderType::Struct *struct_type = type->as_struct()) {
+            for (size_t mi = 0; mi < index; ++mi) {
+              resource_index += struct_type->get_member(mi).type->get_num_resources();
+            }
+            type = struct_type->get_member(index).type;
+          }
+          else {
+            nassert_raise("invalid access chain");
+          }
+        }
+
+
+        char buf[256];
+        size_t size = sprintf(buf, "%ss_r%d", param_name.c_str(), resource_index);
+        param_names[item.second] = std::string(buf, size);
       }
+    }
+
+    if (hoist_necessary) {
+      SpirVHoistStructResourcesPass hoist_pass(false);
+      transformer.run(hoist_pass);
+
+      for (const auto &item : hoist_pass._hoisted_vars) {
+        // Note that this access chain contains only struct members.
+        const auto &access_chain = item.first;
 
-      param_names[item.second] = str.str();
+        std::ostringstream str;
+        str << param_names[access_chain._var_id];
+
+        for (size_t i = 0; i < access_chain.size(); ++i) {
+          str << '_' << access_chain[i];
+        }
+
+        param_names[item.second] = str.str();
+      }
+      transformer.run(SpirVRemoveUnusedVariablesPass());
     }
 
+    stream = transformer.get_result();
 #ifndef NDEBUG
     if (!stream.validate()) {
       return false;
@@ -295,6 +338,13 @@ query_constants(const ShaderModule *module, DWORD *data) {
     }
   }
 
+  struct SizeInput {
+    const Shader::Parameter &param;
+    int resource_index;
+    UINT reg;
+  };
+  pvector<SizeInput> size_inputs;
+
   Shader::Stage stage = module->get_stage();
 
   for (DWORD ci = 0; ci < table->Constants; ++ci) {
@@ -342,6 +392,13 @@ query_constants(const ShaderModule *module, DWORD *data) {
     }
 #endif
 
+    if (suffix[0] == 's') {
+      // Texture size input, named like p0s_r2
+      int resource_index = atoi(suffix + 3);
+      size_inputs.push_back({param, resource_index, constant.RegisterIndex});
+      continue;
+    }
+
     int reg_set = constant.RegisterSet;
     int reg_idx = constant.RegisterIndex;
     int reg_end = reg_idx + constant.RegisterCount;
@@ -383,6 +440,38 @@ query_constants(const ShaderModule *module, DWORD *data) {
     }
   }
 
+  for (const SizeInput &input : size_inputs) {
+    uint64_t resource_id = input.param._binding->get_resource_id(input.resource_index);
+
+    bool found_treg = false;
+    for (TextureRegister &treg : _textures) {
+      if (treg.binding == input.param._binding && treg.resource_id == resource_id) {
+        if (stage == Shader::Stage::VERTEX) {
+          if (treg.size_vreg >= 0) {
+            continue;
+          }
+          treg.size_vreg = input.reg;
+        }
+        if (stage == Shader::Stage::FRAGMENT) {
+          if (treg.size_freg >= 0) {
+            continue;
+          }
+          treg.size_freg = input.reg;
+        }
+        found_treg = true;
+        break;
+      }
+    }
+
+    if (!found_treg) {
+      // We have a size input for a texture that got optimized out.
+      // Generate a dummy texture register for this.
+      int vreg = (stage == Shader::Stage::VERTEX) ? input.reg : -1;
+      int freg = (stage == Shader::Stage::FRAGMENT) ? input.reg : -1;
+      _textures.push_back({(UINT)-1, input.param._binding, resource_id, vreg, freg});
+    }
+  }
+
   return true;
 }
 
@@ -516,7 +605,7 @@ r_query_resources(Shader::Stage stage, const Shader::Parameter &param,
       TextureRegister reg;
       reg.unit = reg_idx;
       reg.binding = param._binding;
-      reg.resource_id = param._binding->get_resource_id(resource_index, resource_type);
+      reg.resource_id = param._binding->get_resource_id(resource_index);
       _textures.push_back(std::move(reg));
       ++reg_idx;
     }
@@ -698,6 +787,9 @@ update_tables(GSG *gsg, const GeomVertexDataPipelineReader *data_reader) {
 void DXShaderContext9::
 disable_shader_texture_bindings(GSG *gsg) {
   for (const TextureRegister &reg : _textures) {
+    if (reg.unit == (UINT)-1) {
+      continue;
+    }
     HRESULT hr = gsg->_d3d_device->SetTexture(reg.unit, nullptr);
     if (FAILED(hr)) {
       dxgsg9_cat.error()
@@ -736,12 +828,29 @@ update_shader_texture_bindings(DXShaderContext9 *prev, GSG *gsg) {
       continue;
     }
 
-    TextureContext *tc = tex->prepare_now(gsg->_prepared_objects, gsg);
+    DXTextureContext9 *tc = (DXTextureContext9 *)tex->prepare_now(gsg->_prepared_objects, gsg);
     if (tc == nullptr) {
       continue;
     }
 
-    gsg->apply_texture(reg.unit, tc, view, sampler);
+    if (reg.unit != (UINT)-1) {
+      gsg->apply_texture(reg.unit, tc, view, sampler);
+    }
+    else if (!gsg->update_texture(tc, false)) {
+      continue;
+    }
+
+    if (reg.size_vreg >= 0 || reg.size_freg >= 0) {
+      DWORD levels = tc->get_d3d_texture(view)->GetLevelCount();
+      const float data[4] = {(float)tc->_width, (float)tc->_height, (float)tc->_depth, (float)levels};
+
+      if (reg.size_vreg >= 0) {
+        gsg->_d3d_device->SetVertexShaderConstantF(reg.size_vreg, data, 1);
+      }
+      if (reg.size_freg >= 0) {
+        gsg->_d3d_device->SetPixelShaderConstantF(reg.size_freg, data, 1);
+      }
+    }
   }
 }
 

+ 2 - 0
panda/src/dxgsg9/dxShaderContext9.h

@@ -88,6 +88,8 @@ private:
     UINT unit;
     PT(ShaderInputBinding) binding;
     uint64_t resource_id;
+    int size_vreg = -1;
+    int size_freg = -1;
   };
   pvector<TextureRegister> _textures;
 

+ 8 - 0
panda/src/dxgsg9/dxTextureContext9.cxx

@@ -1017,6 +1017,10 @@ create_texture(DXScreenData &scrn) {
       << "\n";
   }
 
+  _width = target_width;
+  _height = target_height;
+  _depth = target_depth;
+
   for (int view = 0; view < num_views; ++view) {
     IDirect3DBaseTexture9 *d3d_texture = nullptr;
     IDirect3DTexture9 *d3d_2d_texture;
@@ -1139,6 +1143,10 @@ create_simple_texture(DXScreenData &scrn) {
   DWORD usage = 0;
   D3DPOOL pool = D3DPOOL_MANAGED;
 
+  _width = target_width;
+  _height = target_height;
+  _depth = 1;
+
   int data_size = target_width * target_height * 4;
 
   IDirect3DTexture9 *d3d_2d_texture = nullptr;

+ 3 - 0
panda/src/dxgsg9/dxTextureContext9.h

@@ -53,6 +53,9 @@ private:
   unsigned int get_bits_per_pixel(Texture::Format format, int *alphbits);
   PN_stdfloat d3d_format_to_bytes_per_pixel (D3DFORMAT format);
 
+public:
+  UINT _width, _height, _depth;
+
 private:
   D3DFORMAT _d3d_format;    // the 'D3DFORMAT' the Panda TextureBuffer fmt corresponds to
   small_vector<IDirect3DBaseTexture9 *> _d3d_textures;

+ 4 - 3
panda/src/glstuff/glShaderContext_src.cxx

@@ -673,7 +673,7 @@ r_collect_uniforms(RenderAttrib::PandaCompareFunc alpha_test_mode,
 
       StorageBlock block;
       block._binding = param._binding;
-      block._resource_id = param._binding->get_resource_id(resource_index++, type);
+      block._resource_id = param._binding->get_resource_id(resource_index++);
       block._binding_index = binding;
       _storage_blocks.push_back(std::move(block));
       _storage_block_bindings |= (1 << binding);
@@ -722,7 +722,7 @@ r_collect_uniforms(RenderAttrib::PandaCompareFunc alpha_test_mode,
   if (const ShaderType::SampledImage *sampler = type->as_sampled_image()) {
     TextureUnit unit;
     unit._binding = param._binding;
-    unit._resource_id = param._binding->get_resource_id(resource_index++, type);
+    unit._resource_id = param._binding->get_resource_id(resource_index++);
     unit._target = _glgsg->get_texture_target(sampler->get_texture_type());
 
     for (int i = 0; i < RenderAttrib::M_always; ++i) {
@@ -782,7 +782,7 @@ r_collect_uniforms(RenderAttrib::PandaCompareFunc alpha_test_mode,
     ImageUnit unit;
 #endif
     unit._binding = param._binding;
-    unit._resource_id = param._binding->get_resource_id(resource_index++, type);
+    unit._resource_id = param._binding->get_resource_id(resource_index++);
     unit._access = image->get_access();
     unit._written = false;
 
@@ -2916,6 +2916,7 @@ create_shader(GLuint program, const ShaderModule *module, size_t mi,
         }
       }
 
+      // Assign names to emulated texture/image size variables.
       for (auto &item : size_var_ids) {
         const SpirVTransformPass::AccessChain &chain = item.first;
         auto it = id_to_location.find(chain._var_id);

+ 2 - 2
panda/src/gobj/shaderInputBinding.cxx

@@ -63,10 +63,10 @@ fetch_data(const State &state, void *into, bool packed) const {
 
 /**
  * Returns an opaque resource identifier that can later be used to fetch the
- * nth resource, which is of the given type.
+ * nth resource, numbered using a depth-first traversal of the parameter type.
  */
 ShaderInputBinding::ResourceId ShaderInputBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return 0;
 }
 

+ 1 - 1
panda/src/gobj/shaderInputBinding.h

@@ -57,7 +57,7 @@ public:
                           bool packed = false) const;
 
   typedef uintptr_t ResourceId;
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId resource_id,
                                     SamplerState &sampler, int &view) const;

+ 26 - 0
panda/src/shaderpipeline/spirVHoistStructResourcesPass.cxx

@@ -310,6 +310,7 @@ transform_function_op(Instruction op) {
         // the base id to our variable.
         new_args[2] = new_var_id;
         add_instruction(op.opcode, new_args.data(), new_args.size());
+        _db.set_origin(new_args[1], new_args[2]);
         return false;
       }
 
@@ -457,6 +458,8 @@ transform_function_op(Instruction op) {
     // people actually do this, we can add support.
     nassertr(!_affected_types.count(op.args[0]), false);
     nassertr(!is_deleted(op.args[2]), false);
+
+    _db.set_origin(op.args[1], op.args[2]);
     mark_used(op.args[2]);
     break;
 
@@ -474,6 +477,29 @@ transform_function_op(Instruction op) {
     }
     break;
 
+  case spv::OpImage:
+    {
+      Definition &def = _db.modify_definition(op.args[1]);
+      def._origin_id = _db.get_definition(op.args[2])._origin_id;
+      def._flags |= SpirVResultDatabase::DF_sampled_image;
+    }
+    break;
+
+  case spv::OpSampledImage:
+    _db.set_origin(op.args[1], op.args[2]);
+    break;
+
+  case spv::OpImageQuerySizeLod:
+  case spv::OpImageQuerySize:
+  case spv::OpImageQueryLevels:
+    {
+      uint32_t var_id = _db.get_definition(op.args[2])._origin_id;
+      if (var_id != 0) {
+        _db.modify_definition(var_id)._flags |= SpirVResultDatabase::DF_queried_image_size_levels;
+      }
+    }
+    break;
+
   case spv::OpReturnValue:
     // Cannot return a struct with an opaque type from a function.
     if (op.nargs >= 1) {

+ 8 - 0
panda/src/shaderpipeline/spirVResultDatabase.cxx

@@ -1289,6 +1289,14 @@ record_spec_constant(uint32_t id, uint32_t type_id) {
   def._flags |= DF_constant_expression;
 }
 
+/**
+ * Changes the origin of an existing id.
+ */
+void SpirVResultDatabase::
+set_origin(uint32_t id, uint32_t other) {
+  modify_definition(id)._origin_id = get_definition(other)._origin_id;
+}
+
 /**
  * Called for a variable, or any id whose value (indirectly) originates from a
  * variable, to mark the variable and any types used thereby as "used".

+ 1 - 0
panda/src/shaderpipeline/spirVResultDatabase.h

@@ -146,6 +146,7 @@ public:
   void record_temporary(uint32_t id, uint32_t type_id, uint32_t from_id, uint32_t function_id, bool propagate_constexpr=false);
   void record_spec_constant(uint32_t id, uint32_t type_id);
 
+  void set_origin(uint32_t id, uint32_t other);
   void mark_used(uint32_t id);
 
   void collect_nested_structs(pmap<uint32_t, const ShaderType::Struct *> &result, uint32_t id) const;

+ 3 - 0
tests/display/test_glsl_shader.py

@@ -75,6 +75,9 @@ def test_glsl_texture_size(env):
     assert(textureSize(tex2, 0) == ivec2(64, 32));
     assert(textureSize(tex3, 0) == ivec2(16, 16));
     assert(textureSize(tex4, 0) == ivec3(8, 4, 2));
+
+    // dummy sample this texture so it doesn't get optimized out
+    assert(texture(tex1[0], 0) != vec4(2, 2, 2, 2));
     """
     env.run_glsl(code, preamble, {'tex1[0]': tex1_0, 'tex1[1]': tex1_1, 'tex2': tex2, 'tex3': tex3, 'tex4': tex4})