Browse Source

a tiny optimisation to use less registers

rdb 12 years ago
parent
commit
b781547956
1 changed files with 64 additions and 78 deletions
  1. 64 78
      panda/src/pgraphnodes/shaderGenerator.cxx

+ 64 - 78
panda/src/pgraphnodes/shaderGenerator.cxx

@@ -95,6 +95,14 @@ alloc_vreg() {
   case  5: _vtregs_used += 1; return (char*)"TEXCOORD5";
   case  6: _vtregs_used += 1; return (char*)"TEXCOORD6";
   case  7: _vtregs_used += 1; return (char*)"TEXCOORD7";
+  }
+  switch (_vcregs_used) {
+  case  0: _vcregs_used += 1; return (char*)"COLOR0";
+  case  1: _vcregs_used += 1; return (char*)"COLOR1";
+  }
+  // These don't exist in arbvp1, though they're reportedly
+  // supported by other profiles.
+  switch (_vtregs_used) {
   case  8: _vtregs_used += 1; return (char*)"TEXCOORD8";
   case  9: _vtregs_used += 1; return (char*)"TEXCOORD9";
   case 10: _vtregs_used += 1; return (char*)"TEXCOORD10";
@@ -104,24 +112,6 @@ alloc_vreg() {
   case 14: _vtregs_used += 1; return (char*)"TEXCOORD14";
   case 15: _vtregs_used += 1; return (char*)"TEXCOORD15";
   }
-  switch (_vcregs_used) {
-  case  0: _vcregs_used += 1; return (char*)"COLOR0";
-  case  1: _vcregs_used += 1; return (char*)"COLOR1";
-  case  2: _vcregs_used += 1; return (char*)"COLOR2";
-  case  3: _vcregs_used += 1; return (char*)"COLOR3";
-  case  4: _vcregs_used += 1; return (char*)"COLOR4";
-  case  5: _vcregs_used += 1; return (char*)"COLOR5";
-  case  6: _vcregs_used += 1; return (char*)"COLOR6";
-  case  7: _vcregs_used += 1; return (char*)"COLOR7";
-  case  8: _vcregs_used += 1; return (char*)"COLOR8";
-  case  9: _vcregs_used += 1; return (char*)"COLOR9";
-  case 10: _vcregs_used += 1; return (char*)"COLOR10";
-  case 11: _vcregs_used += 1; return (char*)"COLOR11";
-  case 12: _vcregs_used += 1; return (char*)"COLOR12";
-  case 13: _vcregs_used += 1; return (char*)"COLOR13";
-  case 14: _vcregs_used += 1; return (char*)"COLOR14";
-  case 15: _vcregs_used += 1; return (char*)"COLOR15";
-  }
   return (char*)"UNKNOWN";
 }
 
@@ -141,6 +131,14 @@ alloc_freg() {
   case  5: _ftregs_used += 1; return (char*)"TEXCOORD5";
   case  6: _ftregs_used += 1; return (char*)"TEXCOORD6";
   case  7: _ftregs_used += 1; return (char*)"TEXCOORD7";
+  }
+  switch (_fcregs_used) {
+  case  0: _fcregs_used += 1; return (char*)"COLOR0";
+  case  1: _fcregs_used += 1; return (char*)"COLOR1";
+  }
+  // These don't exist in arbvp1/arbfp1, though they're
+  // reportedly supported by other profiles.
+  switch (_ftregs_used) {
   case  8: _ftregs_used += 1; return (char*)"TEXCOORD8";
   case  9: _ftregs_used += 1; return (char*)"TEXCOORD9";
   case 10: _ftregs_used += 1; return (char*)"TEXCOORD10";
@@ -150,24 +148,6 @@ alloc_freg() {
   case 14: _ftregs_used += 1; return (char*)"TEXCOORD14";
   case 15: _ftregs_used += 1; return (char*)"TEXCOORD15";
   }
-  switch (_fcregs_used) {
-  case  0: _fcregs_used += 1; return (char*)"COLOR0";
-  case  1: _fcregs_used += 1; return (char*)"COLOR1";
-  case  2: _fcregs_used += 1; return (char*)"COLOR2";
-  case  3: _fcregs_used += 1; return (char*)"COLOR3";
-  case  4: _fcregs_used += 1; return (char*)"COLOR4";
-  case  5: _fcregs_used += 1; return (char*)"COLOR5";
-  case  6: _fcregs_used += 1; return (char*)"COLOR6";
-  case  7: _fcregs_used += 1; return (char*)"COLOR7";
-  case  8: _fcregs_used += 1; return (char*)"COLOR8";
-  case  9: _fcregs_used += 1; return (char*)"COLOR9";
-  case 10: _fcregs_used += 1; return (char*)"COLOR10";
-  case 11: _fcregs_used += 1; return (char*)"COLOR11";
-  case 12: _fcregs_used += 1; return (char*)"COLOR12";
-  case 13: _fcregs_used += 1; return (char*)"COLOR13";
-  case 14: _fcregs_used += 1; return (char*)"COLOR14";
-  case 15: _fcregs_used += 1; return (char*)"COLOR15";
-  }
   return (char*)"UNKNOWN";
 }
 
@@ -230,7 +210,7 @@ analyze_renderstate(const RenderState *rs) {
   _out_aux_normal = (outputs & AuxBitplaneAttrib::ABO_aux_normal) ? true:false;
   _out_aux_glow = (outputs & AuxBitplaneAttrib::ABO_aux_glow) ? true:false;
   _out_aux_any = (_out_aux_normal || _out_aux_glow);
-  
+
   if (_out_aux_normal) {
     _need_eye_normal = true;
   }
@@ -285,7 +265,7 @@ analyze_renderstate(const RenderState *rs) {
 
   // See if there is a normal map, height map, gloss map, or glow map.
   // Also check if anything has TexGen.
-  
+
   const TexGenAttrib *tex_gen = DCAST(TexGenAttrib, rs->get_attrib_def(TexGenAttrib::get_class_slot()));
   for (int i=0; i<_num_textures; i++) {
     TextureStage *stage = texture->get_on_stage(i);
@@ -336,7 +316,6 @@ analyze_renderstate(const RenderState *rs) {
 
   if (la->get_num_on_lights() > 0) {
     _lighting = true;
-    _need_eye_position = true;
     _need_eye_normal = true;
   }
 
@@ -390,6 +369,13 @@ analyze_renderstate(const RenderState *rs) {
     } else if (_map_index_gloss >= 0) {
       _have_specular = true;
     }
+
+    if (_plights.size() + _slights.size() > 0) {
+      _need_eye_position = true;
+
+    } else if (_have_specular && _material->get_local()) {
+      _need_eye_position = true;
+    }
   }
 
   // Decide whether to separate ambient and diffuse calculations.
@@ -521,34 +507,34 @@ CPT(RenderAttrib) ShaderGenerator::
 create_shader_attrib(const string &txt) {
   PT(Shader) shader = Shader::make(txt);
   CPT(RenderAttrib) shattr = ShaderAttrib::make();
-  shattr=DCAST(ShaderAttrib, shattr)->set_shader(shader);
+  shattr = DCAST(ShaderAttrib, shattr)->set_shader(shader);
   if (_lighting) {
-    for (int i=0; i<(int)_alights.size(); i++) {
-      shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("alight", i), _alights_np[i]);
+    for (int i=0; i < (int)_alights.size(); i++) {
+      shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("alight", i), _alights_np[i]);
     }
-    for (int i=0; i<(int)_dlights.size(); i++) {
-      shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlight", i), _dlights_np[i]);
+    for (int i=0; i < (int)_dlights.size(); i++) {
+      shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlight", i), _dlights_np[i]);
       if (_shadows && _dlights[i]->_shadow_caster) {
         PT(Texture) tex = update_shadow_buffer(_dlights_np[i]);
         if (tex == NULL) {
           pgraph_cat.error() << "Failed to create shadow buffer for DirectionalLight '" << _dlights[i]->get_name() << "'!\n";
         }
-        shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlighttex", i), tex);
+        shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlighttex", i), tex);
       } else {
         _dlights[i]->clear_shadow_buffers();
       }
     }
-    for (int i=0; i<(int)_plights.size(); i++) {
-      shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("plight", i), _plights_np[i]);
+    for (int i=0; i < (int)_plights.size(); i++) {
+      shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("plight", i), _plights_np[i]);
     }
-    for (int i=0; i<(int)_slights.size(); i++) {
-      shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slight", i), _slights_np[i]);
+    for (int i=0; i < (int)_slights.size(); i++) {
+      shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slight", i), _slights_np[i]);
       if (_shadows && _slights[i]->_shadow_caster) {
         PT(Texture) tex = update_shadow_buffer(_slights_np[i]);
         if (tex == NULL) {
           pgraph_cat.error() << "Failed to create shadow buffer for Spotlight '" << _slights[i]->get_name() << "'!\n";
         }
-        shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slighttex", i), tex);
+        shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slighttex", i), tex);
       } else {
         _slights[i]->clear_shadow_buffers();
       }
@@ -574,7 +560,7 @@ update_shadow_buffer(NodePath light_np) {
   if (light == NULL || !light->_shadow_caster) {
     return NULL;
   }
-  
+
   // See if we already have a buffer. If not, create one.
   PT(Texture) tex;
   if (light->_sbuffers.count(_gsg) == 0) {
@@ -634,7 +620,6 @@ synthesize_shader(const RenderState *rs) {
 
   // These variables will hold the results of register allocation.
 
-  char *normal_vreg = 0;
   char *ntangent_vreg = 0;
   char *ntangent_freg = 0;
   char *nbinormal_vreg = 0;
@@ -652,6 +637,7 @@ synthesize_shader(const RenderState *rs) {
   char *hpos_freg = 0;
 
   if (_vertex_colors) {
+    // Reserve COLOR0
     _vcregs_used = 1;
     _fcregs_used = 1;
   }
@@ -676,8 +662,8 @@ synthesize_shader(const RenderState *rs) {
     text << "\t out float4 l_texcoord" << i << " : " << texcoord_freg[i] << ",\n";
   }
   if (_vertex_colors) {
-    text << "\t in float4 vtx_color : COLOR,\n";
-    text << "\t out float4 l_color : COLOR,\n";
+    text << "\t in float4 vtx_color : COLOR0,\n";
+    text << "\t out float4 l_color : COLOR0,\n";
   }
   if (_need_world_position || _need_world_normal) {
     text << "\t uniform float4x4 trans_model_to_world,\n";
@@ -701,8 +687,7 @@ synthesize_shader(const RenderState *rs) {
     text << "\t out float4 l_eye_normal : " << eye_normal_freg << ",\n";
   }
   if (_map_index_height >= 0 || _need_world_normal || _need_eye_normal) {
-    normal_vreg = alloc_vreg();
-    text << "\t in float4 vtx_normal : " << normal_vreg << ",\n";
+    text << "\t in float4 vtx_normal : NORMAL,\n";
   }
   if (_map_index_height >= 0) {
     htangent_vreg = alloc_vreg();
@@ -722,6 +707,7 @@ synthesize_shader(const RenderState *rs) {
       if (_map_index_normal != _map_index_height) {
         ntangent_vreg = alloc_vreg();
         nbinormal_vreg = alloc_vreg();
+        // NB. If we used TANGENT and BINORMAL, Cg would have them overlap with TEXCOORD6-7.
         text << "\t in float4 vtx_tangent" << _map_index_normal << " : " << ntangent_vreg << ",\n";
         text << "\t in float4 vtx_binormal" << _map_index_normal << " : " << nbinormal_vreg << ",\n";
       }
@@ -731,7 +717,7 @@ synthesize_shader(const RenderState *rs) {
       text << "\t out float4 l_binormal : " << nbinormal_freg << ",\n";
     }
     if (_shadows && _auto_shadow_on) {
-      for (int i=0; i<(int)_dlights.size(); i++) {
+      for (int i=0; i < (int)_dlights.size(); i++) {
         if (_dlights[i]->_shadow_caster) {
           dlightcoord_freg.push_back(alloc_freg());
           text << "\t uniform float4x4 trans_model_to_clip_of_dlight" << i << ",\n";
@@ -740,7 +726,7 @@ synthesize_shader(const RenderState *rs) {
           dlightcoord_freg.push_back(NULL);
         }
       }
-      for (int i=0; i<(int)_slights.size(); i++) {
+      for (int i=0; i < (int)_slights.size(); i++) {
         if (_slights[i]->_shadow_caster) {
           slightcoord_freg.push_back(alloc_freg());
           text << "\t uniform float4x4 trans_model_to_clip_of_slight" << i << ",\n";
@@ -793,12 +779,12 @@ synthesize_shader(const RenderState *rs) {
   }
   if (_shadows && _auto_shadow_on) {
     text << "\t float4x4 biasmat = {0.5f, 0.0f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.0f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, 1.0f};\n";
-    for (int i=0; i<(int)_dlights.size(); i++) {
+    for (int i=0; i < (int)_dlights.size(); i++) {
       if (_dlights[i]->_shadow_caster) {
         text << "\t l_dlightcoord" << i << " = mul(biasmat, mul(trans_model_to_clip_of_dlight" << i << ", vtx_position));\n";
       }
     }
-    for (int i=0; i<(int)_slights.size(); i++) {
+    for (int i=0; i < (int)_slights.size(); i++) {
       if (_slights[i]->_shadow_caster) {
         text << "\t l_slightcoord" << i << " = mul(biasmat, mul(trans_model_to_clip_of_slight" << i << ", vtx_position));\n";
       }
@@ -827,7 +813,7 @@ synthesize_shader(const RenderState *rs) {
   if (_need_world_normal) {
     text << "\t in float4 l_world_normal : " << world_normal_freg << ",\n";
   }
-  if (_need_eye_position) { 
+  if (_need_eye_position) {
     text << "\t in float4 l_eye_position : " << eye_position_freg << ",\n";
   }
   if (_need_eye_normal) {
@@ -851,10 +837,10 @@ synthesize_shader(const RenderState *rs) {
     text << "\t in float3 l_binormal : " << nbinormal_freg << ",\n";
   }
   if (_lighting) {
-    for (int i=0; i<(int)_alights.size(); i++) {
+    for (int i=0; i < (int)_alights.size(); i++) {
       text << "\t uniform float4 alight_alight" << i << ",\n";
     }
-    for (int i=0; i<(int)_dlights.size(); i++) {
+    for (int i=0; i < (int)_dlights.size(); i++) {
       text << "\t uniform float4x4 dlight_dlight" << i << "_rel_view,\n";
       if (_shadows && _dlights[i]->_shadow_caster && _auto_shadow_on) {
         if (_use_shadow_filter) {
@@ -865,10 +851,10 @@ synthesize_shader(const RenderState *rs) {
         text << "\t in float4 l_dlightcoord" << i << " : " << dlightcoord_freg[i] << ",\n";
       }
     }
-    for (int i=0; i<(int)_plights.size(); i++) {
+    for (int i=0; i < (int)_plights.size(); i++) {
       text << "\t uniform float4x4 plight_plight" << i << "_rel_view,\n";
     }
-    for (int i=0; i<(int)_slights.size(); i++) {
+    for (int i=0; i < (int)_slights.size(); i++) {
       text << "\t uniform float4x4 slight_slight" << i << "_rel_view,\n";
       text << "\t uniform float4   satten_slight" << i << ",\n";
       if (_shadows && _slights[i]->_shadow_caster && _auto_shadow_on) {
@@ -899,7 +885,7 @@ synthesize_shader(const RenderState *rs) {
   }
   text << "\t out float4 o_color : COLOR0,\n";
   if (_vertex_colors) {
-    text << "\t in float4 l_color : COLOR,\n";
+    text << "\t in float4 l_color : COLOR0,\n";
   } else {
     text << "\t uniform float4 attr_color,\n";
   }
@@ -917,7 +903,7 @@ synthesize_shader(const RenderState *rs) {
   }
   text << "\t float4 result;\n";
   if (_out_aux_any) {
-    text << "\t o_aux = float4(0,0,0,0);\n";
+    text << "\t o_aux = float4(0, 0, 0, 0);\n";
   }
   // Now generate any texture coordinates according to TexGenAttrib. If it has a TexMatrixAttrib, also transform them.
   for (int i=0; i<_num_textures; i++) {
@@ -959,11 +945,11 @@ synthesize_shader(const RenderState *rs) {
     case Texture::TT_2d_texture_array:
       text << "xyz";
       break;
-    case Texture::TT_2d_texture: 
-      text << "xy";  
+    case Texture::TT_2d_texture:
+      text << "xy";
       break;
     case Texture::TT_1d_texture:
-      text << "x";   
+      text << "x";
       break;
     default:
       break;
@@ -1058,7 +1044,7 @@ synthesize_shader(const RenderState *rs) {
         text << "\t float shininess = 50; // no shininess specified, using default\n";
       }
     }
-    for (int i=0; i<(int)_alights.size(); i++) {
+    for (int i=0; i < (int)_alights.size(); i++) {
       text << "\t // Ambient Light " << i << "\n";
       text << "\t lcolor = alight_alight" << i << ";\n";
       if (_separate_ambient_diffuse && _have_ambient) {
@@ -1067,7 +1053,7 @@ synthesize_shader(const RenderState *rs) {
         text << "\t tot_diffuse += lcolor;\n";
       }
     }
-    for (int i=0; i<(int)_dlights.size(); i++) {
+    for (int i=0; i < (int)_dlights.size(); i++) {
       text << "\t // Directional Light " << i << "\n";
       text << "\t lcolor = dlight_dlight" << i << "_rel_view[0];\n";
       text << "\t lspec  = dlight_dlight" << i << "_rel_view[1];\n";
@@ -1095,7 +1081,7 @@ synthesize_shader(const RenderState *rs) {
         text << "\t tot_specular += lspec;\n";
       }
     }
-    for (int i=0; i<(int)_plights.size(); i++) {
+    for (int i=0; i < (int)_plights.size(); i++) {
       text << "\t // Point Light " << i << "\n";
       text << "\t lcolor = plight_plight" << i << "_rel_view[0];\n";
       text << "\t lspec  = plight_plight" << i << "_rel_view[1];\n";
@@ -1113,14 +1099,14 @@ synthesize_shader(const RenderState *rs) {
         if (_material->get_local()) {
           text << "\t lhalf  = normalize(lvec - normalize(l_eye_position));\n";
         } else {
-          text << "\t lhalf = normalize(lvec - float4(0,1,0,0));\n";
+          text << "\t lhalf = normalize(lvec - float4(0, 1, 0, 0));\n";
         }
         text << "\t lspec *= lattenv;\n";
         text << "\t lspec *= pow(saturate(dot(l_eye_normal.xyz, lhalf.xyz)), shininess);\n";
         text << "\t tot_specular += lspec;\n";
       }
     }
-    for (int i=0; i<(int)_slights.size(); i++) {
+    for (int i=0; i < (int)_slights.size(); i++) {
       text << "\t // Spot Light " << i << "\n";
       text << "\t lcolor = slight_slight" << i << "_rel_view[0];\n";
       text << "\t lspec  = slight_slight" << i << "_rel_view[1];\n";
@@ -1431,7 +1417,7 @@ synthesize_shader(const RenderState *rs) {
     case Fog::M_linear:
       text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate((attr_fog.z - l_hpos.z) * attr_fog.w));\n";
       break;
-    case Fog::M_exponential:
+    case Fog::M_exponential: // 1.442695f = 1 / log(2)
       text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate(exp2(attr_fog.x * l_hpos.z * -1.442695f)));\n";
       break;
     case Fog::M_exponential_squared:
@@ -1454,10 +1440,10 @@ synthesize_shader(const RenderState *rs) {
   // Insert the shader into the shader attrib.
   CPT(RenderAttrib) shattr = create_shader_attrib(text.str());
   if (_subsume_alpha_test) {
-    shattr=DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_subsume_alpha_test, true);
+    shattr = DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_subsume_alpha_test, true);
   }
   if (_disable_alpha_write) {
-    shattr=DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_disable_alpha_write, true);
+    shattr = DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_disable_alpha_write, true);
   }
   clear_analysis();
   reset_register_allocator();