Browse Source

a few tinydisplay optimizations

David Rose 17 years ago
parent
commit
979896da75

+ 3 - 0
panda/src/putil/Sources.pp

@@ -51,6 +51,7 @@
     modifierButtons.I modifierButtons.h mouseButton.h \
     modifierButtons.I modifierButtons.h mouseButton.h \
     mouseData.I mouseData.h nameUniquifier.I nameUniquifier.h \
     mouseData.I mouseData.h nameUniquifier.I nameUniquifier.h \
     nodeCachedReferenceCount.h nodeCachedReferenceCount.I \
     nodeCachedReferenceCount.h nodeCachedReferenceCount.I \
+    pbitops.I pbitops.h \
     portalMask.h \
     portalMask.h \
     pta_double.h \
     pta_double.h \
     pta_float.h pta_int.h \
     pta_float.h pta_int.h \
@@ -98,6 +99,7 @@
     modifierButtons.cxx mouseButton.cxx mouseData.cxx \
     modifierButtons.cxx mouseButton.cxx mouseData.cxx \
     nameUniquifier.cxx \
     nameUniquifier.cxx \
     nodeCachedReferenceCount.cxx \
     nodeCachedReferenceCount.cxx \
+    pbitops.cxx \
     pta_double.cxx pta_float.cxx \
     pta_double.cxx pta_float.cxx \
     pta_int.cxx pta_ushort.cxx \
     pta_int.cxx pta_ushort.cxx \
     simpleHashMap.cxx \
     simpleHashMap.cxx \
@@ -156,6 +158,7 @@
     nameUniquifier.I nameUniquifier.h \
     nameUniquifier.I nameUniquifier.h \
     nodeCachedReferenceCount.h nodeCachedReferenceCount.I \
     nodeCachedReferenceCount.h nodeCachedReferenceCount.I \
     portalMask.h \
     portalMask.h \
+    pbitops.I pbitops.h \
     pta_double.h \
     pta_double.h \
     pta_float.h pta_int.h pta_ushort.h \
     pta_float.h pta_int.h pta_ushort.h \
     simpleHashMap.I simpleHashMap.h \
     simpleHashMap.I simpleHashMap.h \

+ 1 - 88
panda/src/putil/bitMask.I

@@ -462,12 +462,7 @@ get_lowest_off_bit() const {
 template<class WType, int nbits>
 template<class WType, int nbits>
 INLINE int BitMask<WType, nbits>::
 INLINE int BitMask<WType, nbits>::
 get_highest_on_bit() const {
 get_highest_on_bit() const {
-  if (_word == 0) {
-    return -1;
-  }
-
-  WordType w = ::flood_bits_down(_word);
-  return count_bits_in_word(w) - 1;
+  return ::get_highest_on_bit(_word);
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
@@ -871,88 +866,6 @@ init_type() {
   register_type(_type_handle, str.str());
   register_type(_type_handle, str.str());
 }
 }
 
 
-////////////////////////////////////////////////////////////////////
-//     Function: count_bits_in_word
-//  Description: Returns the number of 1 bits in the indicated word.
-////////////////////////////////////////////////////////////////////
-INLINE int
-count_bits_in_word(PN_uint32 x) {
-  return (int)num_bits_on[x & 0xffff] + (int)num_bits_on[(x >> 16) & 0xffff];
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: count_bits_in_word
-//  Description: Returns the number of 1 bits in the indicated word.
-////////////////////////////////////////////////////////////////////
-INLINE int
-count_bits_in_word(PN_uint64 x) {
-  return count_bits_in_word((PN_uint32)x) + count_bits_in_word((PN_uint32)(x >> 32));
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_down
-//  Description: Returns a value such that every bit at or below the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint32
-flood_bits_down(PN_uint32 x) {
-  x |= (x >> 1);
-  x |= (x >> 2);
-  x |= (x >> 4);
-  x |= (x >> 8);
-  x |= (x >> 16);
-  return x;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_down
-//  Description: Returns a value such that every bit at or below the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint64
-flood_bits_down(PN_uint64 x) {
-  x |= (x >> 1);
-  x |= (x >> 2);
-  x |= (x >> 4);
-  x |= (x >> 8);
-  x |= (x >> 16);
-  x |= (x >> 32);
-  return x;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_up
-//  Description: Returns a value such that every bit at or above the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint32
-flood_bits_up(PN_uint32 x) {
-  x |= (x << 1);
-  x |= (x << 2);
-  x |= (x << 4);
-  x |= (x << 8);
-  x |= (x << 16);
-  return x;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_up
-//  Description: Returns a value such that every bit at or above the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint64
-flood_bits_up(PN_uint64 x) {
-  x |= (x << 1);
-  x |= (x << 2);
-  x |= (x << 4);
-  x |= (x << 8);
-  x |= (x << 16);
-  x |= (x << 32);
-  return x;
-}
-
-
-
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: BitMask::flood_up_in_place
 //     Function: BitMask::flood_up_in_place
 //       Access: Published
 //       Access: Published

+ 0 - 2
panda/src/putil/bitMask.cxx

@@ -22,5 +22,3 @@
 #ifdef __GNUC__
 #ifdef __GNUC__
 #pragma implementation
 #pragma implementation
 #endif
 #endif
-
-unsigned char num_bits_on[65536];

+ 1 - 11
panda/src/putil/bitMask.h

@@ -20,7 +20,7 @@
 #define BITMASK_H
 #define BITMASK_H
 
 
 #include "pandabase.h"
 #include "pandabase.h"
-
+#include "pbitops.h"
 #include "numeric_types.h"
 #include "numeric_types.h"
 #include "typedObject.h"
 #include "typedObject.h"
 #include "indent.h"
 #include "indent.h"
@@ -151,16 +151,6 @@ private:
   static TypeHandle _type_handle;
   static TypeHandle _type_handle;
 };
 };
 
 
-INLINE int count_bits_in_word(PN_uint32 x);
-INLINE int count_bits_in_word(PN_uint64 x);
-INLINE PN_uint32 flood_bits_down(PN_uint32 x);
-INLINE PN_uint64 flood_bits_down(PN_uint64 x);
-INLINE PN_uint32 flood_bits_up(PN_uint32 x);
-INLINE PN_uint64 flood_bits_up(PN_uint64 x);
-
-// This table precomputes the number of on bits in each 16-bit word.
-extern EXPCL_PANDA_PUTIL unsigned char num_bits_on[65536];
-
 #include "bitMask.I"
 #include "bitMask.I"
 
 
 template<class WType, int nbits>
 template<class WType, int nbits>

+ 146 - 0
panda/src/putil/pbitops.I

@@ -0,0 +1,146 @@
+// Filename: pbitops.I
+// Created by:  drose (10May08)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: count_bits_in_word
+//  Description: Returns the number of 1 bits in the indicated word.
+////////////////////////////////////////////////////////////////////
+INLINE int
+count_bits_in_word(PN_uint32 x) {
+  return (int)num_bits_on[x & 0xffff] + (int)num_bits_on[(x >> 16) & 0xffff];
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: count_bits_in_word
+//  Description: Returns the number of 1 bits in the indicated word.
+////////////////////////////////////////////////////////////////////
+INLINE int
+count_bits_in_word(PN_uint64 x) {
+  return count_bits_in_word((PN_uint32)x) + count_bits_in_word((PN_uint32)(x >> 32));
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_down
+//  Description: Returns a value such that every bit at or below the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint32
+flood_bits_down(PN_uint32 x) {
+  x |= (x >> 1);
+  x |= (x >> 2);
+  x |= (x >> 4);
+  x |= (x >> 8);
+  x |= (x >> 16);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_down
+//  Description: Returns a value such that every bit at or below the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint64
+flood_bits_down(PN_uint64 x) {
+  x |= (x >> 1);
+  x |= (x >> 2);
+  x |= (x >> 4);
+  x |= (x >> 8);
+  x |= (x >> 16);
+  x |= (x >> 32);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_up
+//  Description: Returns a value such that every bit at or above the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint32
+flood_bits_up(PN_uint32 x) {
+  x |= (x << 1);
+  x |= (x << 2);
+  x |= (x << 4);
+  x |= (x << 8);
+  x |= (x << 16);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_up
+//  Description: Returns a value such that every bit at or above the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint64
+flood_bits_up(PN_uint64 x) {
+  x |= (x << 1);
+  x |= (x << 2);
+  x |= (x << 4);
+  x |= (x << 8);
+  x |= (x << 16);
+  x |= (x << 32);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_highest_on_bit
+//  Description: Returns the index of the highest 1 bit in the word.
+//               Returns -1 if there are no 1 bits.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_highest_on_bit(PN_uint32 x) {
+  PN_uint32 w = flood_bits_down(x);
+  return count_bits_in_word(w) - 1;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_highest_on_bit
+//  Description: Returns the index of the highest 1 bit in the word.
+//               Returns -1 if there are no 1 bits.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_highest_on_bit(PN_uint64 x) {
+  PN_uint64 w = flood_bits_down(x);
+  return count_bits_in_word(w) - 1;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_next_higher_bit
+//  Description: Returns the smallest power of 2 greater than x.
+//
+//               Returns the smallest number n such that (1 << n) is
+//               larger than x.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_next_higher_bit(PN_uint32 x) {
+  PN_uint32 w = flood_bits_down(x);
+  return count_bits_in_word(w);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_next_higher_bit
+//  Description: Returns the smallest power of 2 greater than x.
+//
+//               Returns the smallest number n such that (1 << n) is
+//               larger than x.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_next_higher_bit(PN_uint64 x) {
+  PN_uint64 w = flood_bits_down(x);
+  return count_bits_in_word(w);
+}

+ 21 - 0
panda/src/putil/pbitops.cxx

@@ -0,0 +1,21 @@
+// Filename: pbitops.cxx
+// Created by:  drose (10May08)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+#include "pbitops.h"
+
+unsigned char num_bits_on[65536];

+ 49 - 0
panda/src/putil/pbitops.h

@@ -0,0 +1,49 @@
+// Filename: pbitops.h
+// Created by:  drose (10May08)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// [email protected] .
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef PBITOPS_H
+#define PBITOPS_H
+
+#include "pandabase.h"
+#include "numeric_types.h"
+
+////////////////////////////////////////////////////////////////////
+// This file defines a few low-level bit-operation routines, optimized
+// all to heck.
+////////////////////////////////////////////////////////////////////
+
+INLINE int count_bits_in_word(PN_uint32 x);
+INLINE int count_bits_in_word(PN_uint64 x);
+
+INLINE PN_uint32 flood_bits_down(PN_uint32 x);
+INLINE PN_uint64 flood_bits_down(PN_uint64 x);
+INLINE PN_uint32 flood_bits_up(PN_uint32 x);
+INLINE PN_uint64 flood_bits_up(PN_uint64 x);
+
+INLINE int get_highest_on_bit(PN_uint32 x);
+INLINE int get_highest_on_bit(PN_uint64 x);
+
+INLINE int get_next_higher_bit(PN_uint32 x);
+INLINE int get_next_higher_bit(PN_uint64 x);
+
+// This table precomputes the number of on bits in each 16-bit word.
+extern EXPCL_PANDA_PUTIL unsigned char num_bits_on[65536];
+
+#include "pbitops.I"
+
+#endif

+ 1 - 0
panda/src/putil/putil_composite2.cxx

@@ -8,6 +8,7 @@
 #include "mouseData.cxx"
 #include "mouseData.cxx"
 #include "nameUniquifier.cxx"
 #include "nameUniquifier.cxx"
 #include "nodeCachedReferenceCount.cxx"
 #include "nodeCachedReferenceCount.cxx"
+#include "pbitops.cxx"
 #include "pta_double.cxx"
 #include "pta_double.cxx"
 #include "pta_float.cxx"
 #include "pta_float.cxx"
 #include "pta_int.cxx"
 #include "pta_int.cxx"

+ 11 - 0
panda/src/tinydisplay/config_tinydisplay.cxx

@@ -70,6 +70,17 @@ ConfigVariableInt td_texture_ram
           "frame, even if this means this limit remains exceeded.)  "
           "frame, even if this means this limit remains exceeded.)  "
           "Set it to -1 for no limit."));
           "Set it to -1 for no limit."));
 
 
+ConfigVariableBool td_ignore_mipmaps
+  ("td-ignore-mipmaps", false,
+   PRC_DESC("Configure this true to disable use of mipmaps on the "
+            "tinydisplay software renderer."));
+
+ConfigVariableBool td_perspective_textures
+  ("td-perspective-textures", true,
+   PRC_DESC("Configure this false to disable use of perspective-correct "
+            "textures on the tinydisplay software renderer, for a small "
+            "performance gain."));
+
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 //     Function: init_libtinydisplay
 //     Function: init_libtinydisplay
 //  Description: Initializes the library.  This must be called at
 //  Description: Initializes the library.  This must be called at

+ 2 - 0
panda/src/tinydisplay/config_tinydisplay.h

@@ -35,5 +35,7 @@ extern ConfigVariableInt x_wheel_up_button;
 extern ConfigVariableInt x_wheel_down_button;
 extern ConfigVariableInt x_wheel_down_button;
 
 
 extern ConfigVariableInt td_texture_ram;
 extern ConfigVariableInt td_texture_ram;
+extern ConfigVariableBool td_ignore_mipmaps;
+extern ConfigVariableBool td_perspective_textures;
 
 
 #endif
 #endif

+ 52 - 30
panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx

@@ -598,6 +598,15 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
     }
     }
   }
   }
 
 
+  if (_c->texture_2d_enabled && _texture_replace) {
+    // We don't need the vertex color or lighting calculation after
+    // all, since the current texture will just hide all of that.
+    needs_color = false;
+    needs_normal = false;
+  }
+
+  bool lighting_enabled = (needs_normal && _c->lighting_enabled);
+
   for (i = 0; i < num_used_vertices; ++i) {
   for (i = 0; i < num_used_vertices; ++i) {
     GLVertex *v = &_vertices[i];
     GLVertex *v = &_vertices[i];
     const LVecBase4f &d = rvertex.get_data4f();
     const LVecBase4f &d = rvertex.get_data4f();
@@ -642,18 +651,18 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
 
 
     v->color = _c->current_color;
     v->color = _c->current_color;
 
 
-    if (needs_normal) {
+    if (lighting_enabled) {
       const LVecBase3f &d = rnormal.get_data3f();
       const LVecBase3f &d = rnormal.get_data3f();
       _c->current_normal.X = d[0];
       _c->current_normal.X = d[0];
       _c->current_normal.Y = d[1];
       _c->current_normal.Y = d[1];
       _c->current_normal.Z = d[2];
       _c->current_normal.Z = d[2];
       _c->current_normal.W = 0.0f;
       _c->current_normal.W = 0.0f;
-    }
 
 
-    gl_vertex_transform(_c, v);
-
-    if (_c->lighting_enabled) {
+      gl_vertex_transform(_c, v);
       gl_shade_vertex(_c, v);
       gl_shade_vertex(_c, v);
+
+    } else {
+      gl_vertex_transform(_c, v);
     }
     }
 
 
     if (v->clip_code == 0) {
     if (v->clip_code == 0) {
@@ -666,16 +675,16 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   // Set up the appropriate function callback for filling triangles,
   // Set up the appropriate function callback for filling triangles,
   // according to the current state.
   // according to the current state.
 
 
-  int depth_write_state = 0;
+  int depth_write_state = 0;  // zon
   if (_target._depth_write->get_mode() != DepthWriteAttrib::M_on) {
   if (_target._depth_write->get_mode() != DepthWriteAttrib::M_on) {
-    depth_write_state = 1;
+    depth_write_state = 1;  // zoff
   }
   }
 
 
-  int color_write_state = 0;
+  int color_write_state = 0;  // noblend
   switch (_target._transparency->get_mode()) {
   switch (_target._transparency->get_mode()) {
   case TransparencyAttrib::M_alpha:
   case TransparencyAttrib::M_alpha:
   case TransparencyAttrib::M_dual:
   case TransparencyAttrib::M_dual:
-    color_write_state = 1;
+    color_write_state = 1;    // blend
     break;
     break;
 
 
   default:
   default:
@@ -685,36 +694,36 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   unsigned int color_channels =
   unsigned int color_channels =
     _target._color_write->get_channels() & _color_write_mask;
     _target._color_write->get_channels() & _color_write_mask;
   if (color_channels == ColorWriteAttrib::C_off) {
   if (color_channels == ColorWriteAttrib::C_off) {
-    color_write_state = 2;
+    color_write_state = 2;    // nocolor
   }
   }
 
 
-  int alpha_test_state = 0;
+  int alpha_test_state = 0;   // anone
   switch (_target._alpha_test->get_mode()) {
   switch (_target._alpha_test->get_mode()) {
   case AlphaTestAttrib::M_none:
   case AlphaTestAttrib::M_none:
   case AlphaTestAttrib::M_never:
   case AlphaTestAttrib::M_never:
   case AlphaTestAttrib::M_always:
   case AlphaTestAttrib::M_always:
   case AlphaTestAttrib::M_equal:
   case AlphaTestAttrib::M_equal:
   case AlphaTestAttrib::M_not_equal:
   case AlphaTestAttrib::M_not_equal:
-    alpha_test_state = 0;
+    alpha_test_state = 0;    // anone
     break;
     break;
 
 
   case AlphaTestAttrib::M_less:
   case AlphaTestAttrib::M_less:
   case AlphaTestAttrib::M_less_equal:
   case AlphaTestAttrib::M_less_equal:
-    alpha_test_state = 1;
+    alpha_test_state = 1;    // aless
     _c->zb->reference_alpha = (unsigned int)_target._alpha_test->get_reference_alpha() * 0xff00;
     _c->zb->reference_alpha = (unsigned int)_target._alpha_test->get_reference_alpha() * 0xff00;
     break;
     break;
 
 
   case AlphaTestAttrib::M_greater:
   case AlphaTestAttrib::M_greater:
   case AlphaTestAttrib::M_greater_equal:
   case AlphaTestAttrib::M_greater_equal:
-    alpha_test_state = 2;
+    alpha_test_state = 2;    // amore
     _c->zb->reference_alpha = (unsigned int)_target._alpha_test->get_reference_alpha() * 0xff00;
     _c->zb->reference_alpha = (unsigned int)_target._alpha_test->get_reference_alpha() * 0xff00;
     break;
     break;
   }
   }
 
 
-  int depth_test_state = 1;
+  int depth_test_state = 1;    // zless
   _c->depth_test = 1;  // set this for ZB_line
   _c->depth_test = 1;  // set this for ZB_line
   if (_target._depth_test->get_mode() == DepthTestAttrib::M_none) {
   if (_target._depth_test->get_mode() == DepthTestAttrib::M_none) {
-    depth_test_state = 0;
+    depth_test_state = 0;      // zless
     _c->depth_test = 0;
     _c->depth_test = 0;
   }
   }
   
   
@@ -724,17 +733,17 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
     // as well use the flat shading model.
     // as well use the flat shading model.
     shade_model = ShadeModelAttrib::M_flat;
     shade_model = ShadeModelAttrib::M_flat;
   }
   }
-  int shading_state = 2;  // smooth
+  int shade_model_state = 2;  // smooth
   _c->smooth_shade_model = true;
   _c->smooth_shade_model = true;
 
 
   if (shade_model == ShadeModelAttrib::M_flat) {
   if (shade_model == ShadeModelAttrib::M_flat) {
     _c->smooth_shade_model = false;
     _c->smooth_shade_model = false;
-    shading_state = 1;  // flat
+    shade_model_state = 1;  // flat
     if (_c->current_color.X == 1.0f &&
     if (_c->current_color.X == 1.0f &&
         _c->current_color.Y == 1.0f &&
         _c->current_color.Y == 1.0f &&
         _c->current_color.Z == 1.0f &&
         _c->current_color.Z == 1.0f &&
         _c->current_color.W == 1.0f) {
         _c->current_color.W == 1.0f) {
-      shading_state = 0;  // white
+      shade_model_state = 0;  // white
     }
     }
   }
   }
 
 
@@ -743,12 +752,18 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   if (_c->texture_2d_enabled) {
   if (_c->texture_2d_enabled) {
     texfilter_state = _texfilter_state;
     texfilter_state = _texfilter_state;
     texturing_state = 2;  // perspective-correct textures
     texturing_state = 2;  // perspective-correct textures
-    if (_c->matrix_model_projection_no_w_transform) {
+    if (_c->matrix_model_projection_no_w_transform || !td_perspective_textures) {
       texturing_state = 1;  // non-perspective-correct textures
       texturing_state = 1;  // non-perspective-correct textures
     }
     }
+
+    if (_texture_replace) {
+      // If we're completely replacing the underlying color, then it
+      // doesn't matter what the color is.
+      shade_model_state = 0;
+    }
   }
   }
 
 
-  _c->zb_fill_tri = fill_tri_funcs[depth_write_state][color_write_state][alpha_test_state][depth_test_state][texfilter_state][shading_state][texturing_state];
+  _c->zb_fill_tri = fill_tri_funcs[depth_write_state][color_write_state][alpha_test_state][depth_test_state][texfilter_state][shade_model_state][texturing_state];
   
   
   return true;
   return true;
 }
 }
@@ -1592,6 +1607,16 @@ do_issue_texture() {
     
     
   // Then, turn on the current texture mode.
   // Then, turn on the current texture mode.
   apply_texture(tc);
   apply_texture(tc);
+
+  // Set a few state cache values.
+  _texfilter_state = 0;    // nearest
+  if (texture->uses_mipmaps() && !td_ignore_mipmaps) {
+    _texfilter_state = 1;  // mipmap
+  }
+
+  // M_replace means M_replace; anything else is treated the same as
+  // M_modulate.
+  _texture_replace = (stage->get_mode() == TextureStage::M_replace);
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
@@ -1609,11 +1634,6 @@ apply_texture(TextureContext *tc) {
   _c->current_texture = gtc->_gltex;
   _c->current_texture = gtc->_gltex;
   _c->texture_2d_enabled = true;
   _c->texture_2d_enabled = true;
 
 
-  _texfilter_state = 0;
-  if (gtc->get_texture()->uses_mipmaps()) {
-    _texfilter_state = 1;
-  }
-
   GLTexture *gltex = gtc->_gltex;
   GLTexture *gltex = gtc->_gltex;
 
 
   if (gtc->was_image_modified() || gltex->num_levels == 0) {
   if (gtc->was_image_modified() || gltex->num_levels == 0) {
@@ -1808,13 +1828,15 @@ setup_gltex(GLTexture *gltex, int x_size, int y_size, int num_levels) {
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////
 int TinyGraphicsStateGuardian::
 int TinyGraphicsStateGuardian::
 get_tex_shift(int orig_size) {
 get_tex_shift(int orig_size) {
-  unsigned int filled = flood_bits_down((unsigned int)(orig_size - 1));
-  int size = filled + 1;
-  if (size != orig_size || size > _max_texture_dimension) {
+  if ((orig_size & (orig_size - 1)) != 0) {
+    // Not a power of 2.
+    return -1;
+  }
+  if (orig_size > _max_texture_dimension) {
     return -1;
     return -1;
   }
   }
 
 
-  return count_bits_in_word((unsigned int)size - 1);
+  return count_bits_in_word((unsigned int)orig_size - 1);
 }
 }
 
 
 ////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////

+ 1 - 0
panda/src/tinydisplay/tinyGraphicsStateGuardian.h

@@ -136,6 +136,7 @@ private:
   };
   };
   int _color_material_flags;
   int _color_material_flags;
   int _texfilter_state;
   int _texfilter_state;
+  bool _texture_replace;
 
 
   SimpleLru _textures_lru;
   SimpleLru _textures_lru;
 
 

+ 10 - 3
panda/src/tinydisplay/zbuffer.h

@@ -6,7 +6,7 @@
  */
  */
 
 
 #include "zfeatures.h"
 #include "zfeatures.h"
-#include "bitMask.h"
+#include "pbitops.h"
 
 
 typedef unsigned short ZPOINT;
 typedef unsigned short ZPOINT;
 #define ZB_Z_BITS 16
 #define ZB_Z_BITS 16
@@ -23,7 +23,7 @@ typedef unsigned short ZPOINT;
 /* This is the theoretical max number of bits we have available to
 /* This is the theoretical max number of bits we have available to
    shift down to achieve each next mipmap level, based on the size of
    shift down to achieve each next mipmap level, based on the size of
    a 32-bit int.  We need to preallocate mipmap arrays of this size. */
    a 32-bit int.  We need to preallocate mipmap arrays of this size. */
-#define MAX_MIPMAP_LEVELS (32 - ZB_POINT_ST_FRAC_BITS)
+#define MAX_MIPMAP_LEVELS (32 - ZB_POINT_ST_FRAC_BITS + 1)
 
 
 /* Returns the index within a texture level for the given (s, t) texel. */
 /* Returns the index within a texture level for the given (s, t) texel. */
 #define ZB_TEXEL(level, s, t)                                         \
 #define ZB_TEXEL(level, s, t)                                         \
@@ -36,8 +36,15 @@ typedef unsigned short ZPOINT;
 #define ZB_LOOKUP_TEXTURE_NEAREST_MIPMAP(texture_levels, s, t, level) \
 #define ZB_LOOKUP_TEXTURE_NEAREST_MIPMAP(texture_levels, s, t, level) \
   ZB_LOOKUP_TEXTURE_NEAREST((texture_levels) + (level), (s) >> (level), (t) >> (level))
   ZB_LOOKUP_TEXTURE_NEAREST((texture_levels) + (level), (s) >> (level), (t) >> (level))
 
 
+/* A special abs() function which doesn't require any branching
+   instructions.  Might not work on some exotic hardware. */
+
+/* Also doesn't appear to be any faster in practice.  Guess gcc is
+   already doing the right thing.  Is msvc? */
+//#define FAST_ABS(v) (((v) ^ ((v) >> (sizeof(v) * 8 - 1))) - ((v) >> (sizeof(v) * 8 - 1)))
+
 #define DO_CALC_MIPMAP_LEVEL \
 #define DO_CALC_MIPMAP_LEVEL \
-    mipmap_level = count_bits_in_word(flood_bits_down((unsigned int)max(abs(dsdx), abs(dtdx)) >> ZB_POINT_ST_FRAC_BITS))
+  mipmap_level = get_next_higher_bit(((unsigned int)abs(dsdx) + (unsigned int)abs(dtdx)) >> ZB_POINT_ST_FRAC_BITS)
 
 
 #if 0
 #if 0
 /* Experiment with bilinear filtering.  Looks great, but seems to run
 /* Experiment with bilinear filtering.  Looks great, but seems to run

+ 3 - 0
panda/src/tinydisplay/ztriangle.h

@@ -69,6 +69,8 @@
     return;
     return;
   fz = 1.0f / fz;
   fz = 1.0f / fz;
 
 
+  EARLY_OUT_FZ();
+
   fdx1 *= fz;
   fdx1 *= fz;
   fdy1 *= fz;
   fdy1 *= fz;
   fdx2 *= fz;
   fdx2 *= fz;
@@ -378,6 +380,7 @@
 #undef INTERP_STZ
 #undef INTERP_STZ
 
 
 #undef EARLY_OUT
 #undef EARLY_OUT
+#undef EARLY_OUT_FZ
 #undef DRAW_INIT
 #undef DRAW_INIT
 #undef DRAW_LINE  
 #undef DRAW_LINE  
 #undef PUT_PIXEL
 #undef PUT_PIXEL

+ 1 - 1
panda/src/tinydisplay/ztriangle.py

@@ -30,7 +30,7 @@ Options = [
 # The various combinations of these options are explicit within
 # The various combinations of these options are explicit within
 # ztriangle_two.h.
 # ztriangle_two.h.
 ExtraOptions = [
 ExtraOptions = [
-    # shading
+    # shade model
     [ 'white', 'flat', 'smooth' ],
     [ 'white', 'flat', 'smooth' ],
 
 
     # texturing
     # texturing

+ 218 - 164
panda/src/tinydisplay/ztriangle_two.h

@@ -7,18 +7,22 @@ static void FNAME(white_untextured) (ZBuffer *zb,
   {						\
   {						\
   }
   }
 
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()                             \
 #define DRAW_INIT()                             \
   {                                             \
   {                                             \
   }
   }
  
  
-#define PUT_PIXEL(_a)                                   \
-  {                                                     \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                       \
-    if (ZCMP(pz[_a], zz)) {                             \
-      STORE_PIX(pp[_a], 0xffffffffUL, 0xffffUL, 0xffffUL, 0xffffUL, 0xffffUL);     \
-      STORE_Z(pz[_a], zz);                              \
-    }                                                   \
-    z+=dzdx;                                            \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      STORE_PIX(pp[_a], 0xffffffffUL, 0xffffUL, 0xffffUL, 0xffffUL, 0xffffUL); \
+      STORE_Z(pz[_a], zz);                                              \
+    }                                                                   \
+    z+=dzdx;                                                            \
   }
   }
 
 
 #include "ztriangle.h"
 #include "ztriangle.h"
@@ -36,6 +40,10 @@ static void FNAME(flat_untextured) (ZBuffer *zb,
   {						\
   {						\
   }
   }
 
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()                             \
 #define DRAW_INIT()                             \
   {                                             \
   {                                             \
     if (!ACMP(zb, p2->a)) {                     \
     if (!ACMP(zb, p2->a)) {                     \
@@ -80,10 +88,14 @@ static void FNAME(smooth_untextured) (ZBuffer *zb,
     c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
     c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
     if (c0 == c1 && c0 == c2) {                         \
     if (c0 == c1 && c0 == c2) {                         \
       /* It's really a flat-shaded triangle. */         \
       /* It's really a flat-shaded triangle. */         \
-      FNAME(flat_untextured)(zb, p0, p1, p2);       \
+      FNAME(flat_untextured)(zb, p0, p1, p2);           \
       return;                                           \
       return;                                           \
     }                                                   \
     }                                                   \
   }
   }
+
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
   
   
 #define DRAW_INIT() 				\
 #define DRAW_INIT() 				\
   {						\
   {						\
@@ -120,16 +132,20 @@ static void FNAME(white_textured) (ZBuffer *zb,
   {						\
   {						\
   }
   }
 
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()				\
 #define DRAW_INIT()				\
   {						\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
   }
   }
 
 
 #define PUT_PIXEL(_a)                                                   \
 #define PUT_PIXEL(_a)                                                   \
   {                                                                     \
   {                                                                     \
     zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
     zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
     if (ZCMP(pz[_a], zz)) {                                             \
     if (ZCMP(pz[_a], zz)) {                                             \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                           \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
       if (ACMP(zb, PIXEL_A(tmp))) {                                     \
       if (ACMP(zb, PIXEL_A(tmp))) {                                     \
         STORE_PIX(pp[_a], tmp, PIXEL_R(tmp), PIXEL_G(tmp), PIXEL_B(tmp), PIXEL_A(tmp)); \
         STORE_PIX(pp[_a], tmp, PIXEL_R(tmp), PIXEL_G(tmp), PIXEL_B(tmp), PIXEL_A(tmp)); \
         STORE_Z(pz[_a], zz);                                            \
         STORE_Z(pz[_a], zz);                                            \
@@ -156,37 +172,41 @@ static void FNAME(flat_textured) (ZBuffer *zb,
   {						\
   {						\
   }
   }
 
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()				\
 #define DRAW_INIT()				\
   {						\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     or0 = p2->r;                                \
     or0 = p2->r;                                \
     og0 = p2->g;                                \
     og0 = p2->g;                                \
     ob0 = p2->b;                                \
     ob0 = p2->b;                                \
     oa0 = p2->a;                                \
     oa0 = p2->a;                                \
   }
   }
 
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa0 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,       \
-                                og0 * PIXEL_G(tmp) >> 16,       \
-                                ob0 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or0 * PIXEL_R(tmp) >> 16,                     \
-                  og0 * PIXEL_G(tmp) >> 16,                     \
-                  ob0 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa0 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,               \
+                                og0 * PIXEL_G(tmp) >> 16,               \
+                                ob0 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or0 * PIXEL_R(tmp) >> 16,                             \
+                  og0 * PIXEL_G(tmp) >> 16,                             \
+                  ob0 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
   }
 
 
 #include "ztriangle.h"
 #include "ztriangle.h"
@@ -201,55 +221,59 @@ static void FNAME(smooth_textured) (ZBuffer *zb,
 #define INTERP_ST
 #define INTERP_ST
 #define INTERP_RGB
 #define INTERP_RGB
 
 
-#define EARLY_OUT()                                             \
-  {                                                             \
-    int c0, c1, c2;                                             \
-    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);             \
-    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);             \
-    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);             \
-    if (c0 == c1 && c0 == c2) {                                 \
-      /* It's really a flat-shaded triangle. */                 \
-      if (c0 == 0xffffffff) {                                   \
-        /* Actually, it's a white triangle. */                  \
+#define EARLY_OUT()                                     \
+  {                                                     \
+    int c0, c1, c2;                                     \
+    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);     \
+    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);     \
+    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
+    if (c0 == c1 && c0 == c2) {                         \
+      /* It's really a flat-shaded triangle. */         \
+      if (c0 == 0xffffffff) {                           \
+        /* Actually, it's a white triangle. */          \
         FNAME(white_textured)(zb, p0, p1, p2);          \
         FNAME(white_textured)(zb, p0, p1, p2);          \
-        return;                                                 \
-      }                                                         \
-      FNAME(flat_textured)(zb, p0, p1, p2);        \
-      return;                                                   \
-    }                                                           \
+        return;                                         \
+      }                                                 \
+      FNAME(flat_textured)(zb, p0, p1, p2);             \
+      return;                                           \
+    }                                                   \
+  }
+
+#define EARLY_OUT_FZ() 				\
+  {						\
   }
   }
 
 
 #define DRAW_INIT()                             \
 #define DRAW_INIT()                             \
   {                                             \
   {                                             \
-    texture_levels = zb->current_texture;             \
-  }
-
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa1 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,       \
-                                og1 * PIXEL_G(tmp) >> 16,       \
-                                ob1 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or1 * PIXEL_R(tmp) >> 16,                     \
-                  og1 * PIXEL_G(tmp) >> 16,                     \
-                  ob1 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    og1+=dgdx;                                                  \
-    or1+=drdx;                                                  \
-    ob1+=dbdx;                                                  \
-    oa1+=dadx;                                                  \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+    texture_levels = zb->current_texture;       \
+  }
+
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa1 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,               \
+                                og1 * PIXEL_G(tmp) >> 16,               \
+                                ob1 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or1 * PIXEL_R(tmp) >> 16,                             \
+                  og1 * PIXEL_G(tmp) >> 16,                             \
+                  ob1 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    og1+=dgdx;                                                          \
+    or1+=drdx;                                                          \
+    ob1+=dbdx;                                                          \
+    oa1+=dadx;                                                          \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
   }
 
 
 #include "ztriangle.h"
 #include "ztriangle.h"
@@ -275,9 +299,19 @@ static void FNAME(white_perspective) (ZBuffer *zb,
   {						\
   {						\
   }
   }
 
 
+#define EARLY_OUT_FZ()                                                  \
+  {                                                                     \
+    if (fz > 0.001 || fz < -.001) {                                     \
+      /* This triangle is small enough not to worry about perspective   \
+         correction. */                                                 \
+      FNAME(white_textured)(zb, p0, p1, p2);                            \
+      return;                                                           \
+    }                                                                   \
+  }
+
 #define DRAW_INIT()				\
 #define DRAW_INIT()				\
   {						\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     fdzdx=(float)dzdx;                          \
     fdzdx=(float)dzdx;                          \
     fndzdx=NB_INTERP * fdzdx;                   \
     fndzdx=NB_INTERP * fdzdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
@@ -289,7 +323,7 @@ static void FNAME(white_perspective) (ZBuffer *zb,
   {                                                                     \
   {                                                                     \
     zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
     zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
     if (ZCMP(pz[_a], zz)) {                                             \
     if (ZCMP(pz[_a], zz)) {                                             \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                           \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
       if (ACMP(zb, PIXEL_A(tmp))) {                                     \
       if (ACMP(zb, PIXEL_A(tmp))) {                                     \
         STORE_PIX(pp[_a], tmp, PIXEL_R(tmp), PIXEL_G(tmp), PIXEL_B(tmp), PIXEL_A(tmp)); \
         STORE_PIX(pp[_a], tmp, PIXEL_R(tmp), PIXEL_G(tmp), PIXEL_B(tmp), PIXEL_A(tmp)); \
         STORE_Z(pz[_a], zz);                                            \
         STORE_Z(pz[_a], zz);                                            \
@@ -302,7 +336,7 @@ static void FNAME(white_perspective) (ZBuffer *zb,
 
 
 #define DRAW_LINE()                                     \
 #define DRAW_LINE()                                     \
   {                                                     \
   {                                                     \
-    register ZPOINT *pz;                        \
+    register ZPOINT *pz;                                \
     register PIXEL *pp;                                 \
     register PIXEL *pp;                                 \
     register unsigned int s,t,z,zz;                     \
     register unsigned int s,t,z,zz;                     \
     register int n,dsdx,dtdx;                           \
     register int n,dsdx,dtdx;                           \
@@ -320,11 +354,11 @@ static void FNAME(white_perspective) (ZBuffer *zb,
         float ss,tt;                                    \
         float ss,tt;                                    \
         ss=(sz * zinv);                                 \
         ss=(sz * zinv);                                 \
         tt=(tz * zinv);                                 \
         tt=(tz * zinv);                                 \
-        s=(unsigned int) ss;                                     \
-        t=(unsigned int) tt;                                     \
+        s=(unsigned int) ss;                            \
+        t=(unsigned int) tt;                            \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
-        CALC_MIPMAP_LEVEL; \
+        CALC_MIPMAP_LEVEL;                              \
         fz+=fndzdx;                                     \
         fz+=fndzdx;                                     \
         zinv=1.0f / fz;                                 \
         zinv=1.0f / fz;                                 \
       }                                                 \
       }                                                 \
@@ -346,11 +380,11 @@ static void FNAME(white_perspective) (ZBuffer *zb,
       float ss,tt;                                      \
       float ss,tt;                                      \
       ss=(sz * zinv);                                   \
       ss=(sz * zinv);                                   \
       tt=(tz * zinv);                                   \
       tt=(tz * zinv);                                   \
-      s=(unsigned int) ss;                                       \
-      t=(unsigned int) tt;                                       \
+      s=(unsigned int) ss;                              \
+      t=(unsigned int) tt;                              \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
-      CALC_MIPMAP_LEVEL; \
+      CALC_MIPMAP_LEVEL;                                \
     }                                                   \
     }                                                   \
     while (n>=0) {                                      \
     while (n>=0) {                                      \
       PUT_PIXEL(0);                                     \
       PUT_PIXEL(0);                                     \
@@ -383,9 +417,19 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
   {						\
   {						\
   }
   }
 
 
+#define EARLY_OUT_FZ()                                                  \
+  {                                                                     \
+    if (fz > 0.001 || fz < -.001) {                                     \
+      /* This triangle is small enough not to worry about perspective   \
+         correction. */                                                 \
+      FNAME(flat_textured)(zb, p0, p1, p2);                             \
+      return;                                                           \
+    }                                                                   \
+  }
+
 #define DRAW_INIT() 				\
 #define DRAW_INIT() 				\
   {						\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     fdzdx=(float)dzdx;                          \
     fdzdx=(float)dzdx;                          \
     fndzdx=NB_INTERP * fdzdx;                   \
     fndzdx=NB_INTERP * fdzdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
@@ -396,33 +440,33 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
     oa0 = p2->a;                                \
     oa0 = p2->a;                                \
   }
   }
 
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa0 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,       \
-                                og0 * PIXEL_G(tmp) >> 16,       \
-                                ob0 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or0 * PIXEL_R(tmp) >> 16,                     \
-                  og0 * PIXEL_G(tmp) >> 16,                     \
-                  ob0 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa0 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,               \
+                                og0 * PIXEL_G(tmp) >> 16,               \
+                                ob0 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or0 * PIXEL_R(tmp) >> 16,                             \
+                  og0 * PIXEL_G(tmp) >> 16,                             \
+                  ob0 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
   }
 
 
 #define DRAW_LINE()                                     \
 #define DRAW_LINE()                                     \
   {                                                     \
   {                                                     \
-    register ZPOINT *pz;                        \
+    register ZPOINT *pz;                                \
     register PIXEL *pp;                                 \
     register PIXEL *pp;                                 \
     register unsigned int s,t,z,zz;                     \
     register unsigned int s,t,z,zz;                     \
     register int n,dsdx,dtdx;                           \
     register int n,dsdx,dtdx;                           \
@@ -445,11 +489,11 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
         float ss,tt;                                    \
         float ss,tt;                                    \
         ss=(sz * zinv);                                 \
         ss=(sz * zinv);                                 \
         tt=(tz * zinv);                                 \
         tt=(tz * zinv);                                 \
-        s=(unsigned int) ss;                                     \
-        t=(unsigned int) tt;                                     \
+        s=(unsigned int) ss;                            \
+        t=(unsigned int) tt;                            \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
-        CALC_MIPMAP_LEVEL; \
+        CALC_MIPMAP_LEVEL;                              \
         fz+=fndzdx;                                     \
         fz+=fndzdx;                                     \
         zinv=1.0f / fz;                                 \
         zinv=1.0f / fz;                                 \
       }                                                 \
       }                                                 \
@@ -471,11 +515,11 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
       float ss,tt;                                      \
       float ss,tt;                                      \
       ss=(sz * zinv);                                   \
       ss=(sz * zinv);                                   \
       tt=(tz * zinv);                                   \
       tt=(tz * zinv);                                   \
-      s=(unsigned int) ss;                                       \
-      t=(unsigned int) tt;                                       \
+      s=(unsigned int) ss;                              \
+      t=(unsigned int) tt;                              \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
-      CALC_MIPMAP_LEVEL; \
+      CALC_MIPMAP_LEVEL;                                \
     }                                                   \
     }                                                   \
     while (n>=0) {                                      \
     while (n>=0) {                                      \
       PUT_PIXEL(0);                                     \
       PUT_PIXEL(0);                                     \
@@ -502,64 +546,74 @@ static void FNAME(smooth_perspective) (ZBuffer *zb,
 #define INTERP_STZ
 #define INTERP_STZ
 #define INTERP_RGB
 #define INTERP_RGB
 
 
-#define EARLY_OUT()                                                     \
-  {                                                                     \
-    int c0, c1, c2;                                                     \
-    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);                     \
-    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);                     \
-    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);                     \
-    if (c0 == c1 && c0 == c2) {                                         \
-      /* It's really a flat-shaded triangle. */                         \
-      if (c0 == 0xffffffff) {                                           \
-        /* Actually, it's a white triangle. */                          \
+#define EARLY_OUT()                                     \
+  {                                                     \
+    int c0, c1, c2;                                     \
+    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);     \
+    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);     \
+    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
+    if (c0 == c1 && c0 == c2) {                         \
+      /* It's really a flat-shaded triangle. */         \
+      if (c0 == 0xffffffff) {                           \
+        /* Actually, it's a white triangle. */          \
         FNAME(white_perspective)(zb, p0, p1, p2);       \
         FNAME(white_perspective)(zb, p0, p1, p2);       \
-        return;                                                         \
-      }                                                                 \
-      FNAME(flat_perspective)(zb, p0, p1, p2);     \
+        return;                                         \
+      }                                                 \
+      FNAME(flat_perspective)(zb, p0, p1, p2);          \
+      return;                                           \
+    }                                                   \
+  }
+
+#define EARLY_OUT_FZ()                                                  \
+  {                                                                     \
+    if (fz > 0.001 || fz < -.001) {                                     \
+      /* This triangle is small enough not to worry about perspective   \
+         correction. */                                                 \
+      FNAME(smooth_textured)(zb, p0, p1, p2);                           \
       return;                                                           \
       return;                                                           \
     }                                                                   \
     }                                                                   \
   }
   }
 
 
 #define DRAW_INIT() 				\
 #define DRAW_INIT() 				\
   {						\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     fdzdx=(float)dzdx;                          \
     fdzdx=(float)dzdx;                          \
     fndzdx=NB_INTERP * fdzdx;                   \
     fndzdx=NB_INTERP * fdzdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
     ndtzdx=NB_INTERP * dtzdx;                   \
     ndtzdx=NB_INTERP * dtzdx;                   \
   }
   }
 
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa1 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,       \
-                                og1 * PIXEL_G(tmp) >> 16,       \
-                                ob1 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or1 * PIXEL_R(tmp) >> 16,                     \
-                  og1 * PIXEL_G(tmp) >> 16,                     \
-                  ob1 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    og1+=dgdx;                                                  \
-    or1+=drdx;                                                  \
-    ob1+=dbdx;                                                  \
-    oa1+=dadx;                                                  \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa1 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,               \
+                                og1 * PIXEL_G(tmp) >> 16,               \
+                                ob1 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or1 * PIXEL_R(tmp) >> 16,                             \
+                  og1 * PIXEL_G(tmp) >> 16,                             \
+                  ob1 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    og1+=dgdx;                                                          \
+    or1+=drdx;                                                          \
+    ob1+=dbdx;                                                          \
+    oa1+=dadx;                                                          \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
   }
 
 
 #define DRAW_LINE()                                     \
 #define DRAW_LINE()                                     \
   {                                                     \
   {                                                     \
-    register ZPOINT *pz;                        \
+    register ZPOINT *pz;                                \
     register PIXEL *pp;                                 \
     register PIXEL *pp;                                 \
     register unsigned int s,t,z,zz;                     \
     register unsigned int s,t,z,zz;                     \
     register int n,dsdx,dtdx;                           \
     register int n,dsdx,dtdx;                           \
@@ -582,11 +636,11 @@ static void FNAME(smooth_perspective) (ZBuffer *zb,
         float ss,tt;                                    \
         float ss,tt;                                    \
         ss=(sz * zinv);                                 \
         ss=(sz * zinv);                                 \
         tt=(tz * zinv);                                 \
         tt=(tz * zinv);                                 \
-        s=(unsigned int) ss;                                     \
-        t=(unsigned int) tt;                                     \
+        s=(unsigned int) ss;                            \
+        t=(unsigned int) tt;                            \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
-        CALC_MIPMAP_LEVEL; \
+        CALC_MIPMAP_LEVEL;                              \
         fz+=fndzdx;                                     \
         fz+=fndzdx;                                     \
         zinv=1.0f / fz;                                 \
         zinv=1.0f / fz;                                 \
       }                                                 \
       }                                                 \
@@ -608,11 +662,11 @@ static void FNAME(smooth_perspective) (ZBuffer *zb,
       float ss,tt;                                      \
       float ss,tt;                                      \
       ss=(sz * zinv);                                   \
       ss=(sz * zinv);                                   \
       tt=(tz * zinv);                                   \
       tt=(tz * zinv);                                   \
-      s=(unsigned int) ss;                                       \
-      t=(unsigned int) tt;                                       \
+      s=(unsigned int) ss;                              \
+      t=(unsigned int) tt;                              \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
-      CALC_MIPMAP_LEVEL; \
+      CALC_MIPMAP_LEVEL;                                \
     }                                                   \
     }                                                   \
     while (n>=0) {                                      \
     while (n>=0) {                                      \
       PUT_PIXEL(0);                                     \
       PUT_PIXEL(0);                                     \