Browse Source

Updated to stb_truetype 1.06 (#133)

ocornut 10 years ago
parent
commit
d7a2a23457
1 changed files with 520 additions and 57 deletions
  1. 520 57
      stb_truetype.h

+ 520 - 57
stb_truetype.h

@@ -1,7 +1,7 @@
-// [ImGui] this is a slightly modified version of stb_truetype.h 1.05
+// [ImGui] this is a slightly modified version of stb_truetype.h 1.06
 // [ImGui] we added stbtt_PackFontRangesGatherRects() and stbtt_PackFontRangesRenderIntoRects() and modified stbtt_PackBegin()
 
-// stb_truetype.h - v1.05 - public domain
+// stb_truetype.h - v1.06 - public domain
 // authored from 2009-2014 by Sean Barrett / RAD Game Tools
 //
 //   This library processes TrueType files:
@@ -48,6 +48,9 @@
 //
 // VERSION HISTORY
 //
+//   1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine)
+//                     also more precise AA rasterizer, except if shapes overlap
+//                     remove need for STBTT_sort
 //   1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC
 //   1.04 (2015-04-15) typo in example
 //   1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes
@@ -123,6 +126,15 @@
 //           stbtt_GetFontVMetrics()
 //           stbtt_GetCodepointKernAdvance()
 //
+//   Starting with version 1.06, the rasterizer was replaced with a new,
+//   faster and generally-more-precise rasterizer. The new rasterizer more
+//   accurately measures pixel coverage for anti-aliasing, except in the case
+//   where multiple shapes overlap, in which case it overestimates the AA pixel
+//   coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If
+//   this turns out to be a problem, you can re-enable the old rasterizer with
+//        #define STBTT_RASTERIZER_VERSION 1
+//   which will incur about a 15% speed hit.
+//
 // ADDITIONAL DOCUMENTATION
 //
 //   Immediately after this block comment are a series of sample programs.
@@ -222,7 +234,15 @@
 //   Baked bitmap interface              70 LOC  /
 //   Font name matching & access        150 LOC  ---- 150 
 //   C runtime library abstraction       60 LOC  ----  60
-
+//
+//
+// PERFORMANCE MEASUREMENTS FOR 1.06:
+//
+//                      32-bit     64-bit
+//   Previous release:  8.83 s     7.68 s
+//   Pool allocations:  7.72 s     6.34 s
+//   Inline sort     :  6.54 s     5.65 s
+//   New rasterizer  :  5.63 s     5.00 s
 
 //////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
@@ -333,7 +353,7 @@ int main(int arg, char **argv)
    stbtt_fontinfo font;
    int i,j,ascent,baseline,ch=0;
    float scale, xpos=2; // leave a little padding in case the character extends left
-   char *text = "Heljo World!";
+   char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness
 
    fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb"));
    stbtt_InitFont(&font, buffer, 0);
@@ -391,12 +411,6 @@ int main(int arg, char **argv)
    typedef char stbtt__check_size32[sizeof(stbtt_int32)==4 ? 1 : -1];
    typedef char stbtt__check_size16[sizeof(stbtt_int16)==2 ? 1 : -1];
 
-   // #define your own STBTT_sort() to override this to avoid qsort
-   #ifndef STBTT_sort
-   #include <stdlib.h>
-   #define STBTT_sort(data,num_items,item_size,compare_func)   qsort(data,num_items,item_size,compare_func)
-   #endif
-
    // #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h
    #ifndef STBTT_ifloor
    #include <math.h>
@@ -915,6 +929,10 @@ enum { // languageID for STBTT_PLATFORM_ID_MAC
 
 typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERSAMPLE-1)) == 0 ? 1 : -1];
 
+#ifndef STBTT_RASTERIZER_VERSION
+#define STBTT_RASTERIZER_VERSION 2
+#endif
+
 //////////////////////////////////////////////////////////////////////////
 //
 // accessors to parse data from file
@@ -1563,42 +1581,129 @@ STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codep
    stbtt_GetCodepointBitmapBoxSubpixel(font, codepoint, scale_x, scale_y,0.0f,0.0f, ix0,iy0,ix1,iy1);
 }
 
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Rasterizer
+
+typedef struct stbtt__hheap_chunk
+{
+   struct stbtt__hheap_chunk *next;
+} stbtt__hheap_chunk;
+
+typedef struct stbtt__hheap
+{
+   struct stbtt__hheap_chunk *head;
+   void   *first_free;
+   int    num_remaining_in_head_chunk;
+} stbtt__hheap;
+
+static void *stbtt__hheap_alloc(stbtt__hheap *hh, size_t size, void *userdata)
+{
+   if (hh->first_free) {
+      void *p = hh->first_free;
+      hh->first_free = * (void **) p;
+      return p;
+   } else {
+      if (hh->num_remaining_in_head_chunk == 0) {
+         int count = (size < 32 ? 2000 : size < 128 ? 800 : 100);
+         stbtt__hheap_chunk *c = (stbtt__hheap_chunk *) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata);
+         if (c == NULL)
+            return NULL;
+         c->next = hh->head;
+         hh->head = c;
+         hh->num_remaining_in_head_chunk = count;
+      }
+      --hh->num_remaining_in_head_chunk;
+      return (char *) (hh->head) + size * hh->num_remaining_in_head_chunk;
+   }
+}
+
+static void stbtt__hheap_free(stbtt__hheap *hh, void *p)
+{
+   *(void **) p = hh->first_free;
+   hh->first_free = p;
+}
+
+static void stbtt__hheap_cleanup(stbtt__hheap *hh, void *userdata)
+{
+   stbtt__hheap_chunk *c = hh->head;
+   while (c) {
+      stbtt__hheap_chunk *n = c->next;
+      STBTT_free(c, userdata);
+      c = n;
+   }
+}
+
 typedef struct stbtt__edge {
    float x0,y0, x1,y1;
    int invert;
 } stbtt__edge;
 
+
 typedef struct stbtt__active_edge
 {
+   struct stbtt__active_edge *next;
+   #if STBTT_RASTERIZER_VERSION==1
    int x,dx;
    float ey;
-   struct stbtt__active_edge *next;
-   int valid;
+   int direction;
+   #elif STBTT_RASTERIZER_VERSION==2
+   float fx,fdx,fdy;
+   float direction;
+   float sy;
+   float ey;
+   #else
+   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+   #endif
 } stbtt__active_edge;
 
-#define FIXSHIFT   10
-#define FIX        (1 << FIXSHIFT)
-#define FIXMASK    (FIX-1)
+#if STBTT_RASTERIZER_VERSION == 1
+#define STBTT_FIXSHIFT   10
+#define STBTT_FIX        (1 << STBTT_FIXSHIFT)
+#define STBTT_FIXMASK    (STBTT_FIX-1)
 
-static stbtt__active_edge *new_active(stbtt__edge *e, int off_x, float start_point, void *userdata)
+static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata)
 {
-   stbtt__active_edge *z = (stbtt__active_edge *) STBTT_malloc(sizeof(*z), userdata); // @TODO: make a pool of these!!!
+   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
    float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
-   STBTT_assert(e->y0 <= start_point);
    if (!z) return z;
-   // round dx down to avoid going too far
+   
+   // round dx down to avoid overshooting
    if (dxdy < 0)
-      z->dx = -STBTT_ifloor(FIX * -dxdy);
+      z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy);
    else
-      z->dx = STBTT_ifloor(FIX * dxdy);
-   z->x = STBTT_ifloor(FIX * (e->x0 + dxdy * (start_point - e->y0)));
-   z->x -= off_x * FIX;
+      z->dx = STBTT_ifloor(STBTT_FIX * dxdy);
+
+   z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount
+   z->x -= off_x * STBTT_FIX;
+
+   z->ey = e->y1;
+   z->next = 0;
+   z->direction = e->invert ? 1 : -1;
+   return z;
+}
+#elif STBTT_RASTERIZER_VERSION == 2
+static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata)
+{
+   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
+   float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
+   //STBTT_assert(e->y0 <= start_point);
+   if (!z) return z;
+   z->fdx = dxdy;
+   z->fdy = (1/dxdy);
+   z->fx = e->x0 + dxdy * (start_point - e->y0);
+   z->fx -= off_x;
+   z->direction = e->invert ? 1.0f : -1.0f;
+   z->sy = e->y0;
    z->ey = e->y1;
    z->next = 0;
-   z->valid = e->invert ? 1 : -1;
    return z;
 }
+#else
+#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
 
+#if STBTT_RASTERIZER_VERSION == 1
 // note: this routine clips fills that extend off the edges... ideally this
 // wouldn't happen, but it could happen if the truetype glyph bounding boxes
 // are wrong, or if the user supplies a too-small bitmap
@@ -1610,26 +1715,26 @@ static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__ac
    while (e) {
       if (w == 0) {
          // if we're currently at zero, we need to record the edge start point
-         x0 = e->x; w += e->valid;
+         x0 = e->x; w += e->direction;
       } else {
-         int x1 = e->x; w += e->valid;
+         int x1 = e->x; w += e->direction;
          // if we went to zero, we need to draw
          if (w == 0) {
-            int i = x0 >> FIXSHIFT;
-            int j = x1 >> FIXSHIFT;
+            int i = x0 >> STBTT_FIXSHIFT;
+            int j = x1 >> STBTT_FIXSHIFT;
 
             if (i < len && j >= 0) {
                if (i == j) {
                   // x0,x1 are the same pixel, so compute combined coverage
-                  scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> FIXSHIFT);
+                  scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT);
                } else {
                   if (i >= 0) // add antialiasing for x0
-                     scanline[i] = scanline[i] + (stbtt_uint8) (((FIX - (x0 & FIXMASK)) * max_weight) >> FIXSHIFT);
+                     scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT);
                   else
                      i = -1; // clip
 
                   if (j < len) // add antialiasing for x1
-                     scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & FIXMASK) * max_weight) >> FIXSHIFT);
+                     scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT);
                   else
                      j = len; // clip
 
@@ -1646,6 +1751,7 @@ static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__ac
 
 static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
 {
+   stbtt__hheap hh = { 0 };
    stbtt__active_edge *active = NULL;
    int y,j=0;
    int max_weight = (255 / vsubsample);  // weight per vertical scanline
@@ -1673,9 +1779,9 @@ static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e,
             stbtt__active_edge * z = *step;
             if (z->ey <= scan_y) {
                *step = z->next; // delete from list
-               STBTT_assert(z->valid);
-               z->valid = 0;
-               STBTT_free(z, userdata);
+               STBTT_assert(z->direction);
+               z->direction = 0;
+               stbtt__hheap_free(&hh, z);
             } else {
                z->x += z->dx; // advance to position for current scanline
                step = &((*step)->next); // advance through list
@@ -1704,7 +1810,7 @@ static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e,
          // insert all edges that start before the center of this scanline -- omit ones that also end on this scanline
          while (e->y0 <= scan_y) {
             if (e->y1 > scan_y) {
-               stbtt__active_edge *z = new_active(e, off_x, scan_y, userdata);
+               stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata);
                // find insertion point
                if (active == NULL)
                   active = z;
@@ -1735,24 +1841,378 @@ static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e,
       ++j;
    }
 
-   while (active) {
-      stbtt__active_edge *z = active;
-      active = active->next;
-      STBTT_free(z, userdata);
+   stbtt__hheap_cleanup(&hh, userdata);
+
+   if (scanline != scanline_data)
+      STBTT_free(scanline, userdata);
+}
+
+#elif STBTT_RASTERIZER_VERSION == 2
+
+// the edge passed in here does not cross the vertical line at x or the vertical line at x+1
+// (i.e. it has already been clipped to those)
+static void stbtt__handle_clipped_edge(float *scanline, int x, stbtt__active_edge *e, float x0, float y0, float x1, float y1)
+{
+   if (y0 == y1) return;
+   assert(y0 < y1);
+   assert(e->sy <= e->ey);
+   if (y0 > e->ey) return;
+   if (y1 < e->sy) return;
+   if (y0 < e->sy) {
+      x0 += (x1-x0) * (e->sy - y0) / (y1-y0);
+      y0 = e->sy;
+   }
+   if (y1 > e->ey) {
+      x1 += (x1-x0) * (e->ey - y1) / (y1-y0);
+      y1 = e->ey;
+   }
+
+   if (x0 == x)
+      assert(x1 <= x+1);
+   else if (x0 == x+1)
+      assert(x1 >= x);
+   else if (x0 <= x)
+      assert(x1 <= x);
+   else if (x0 >= x+1)
+      assert(x1 >= x+1);
+   else
+      assert(x1 >= x && x1 <= x+1);
+
+   if (x0 <= x && x1 <= x)
+      scanline[x] += e->direction * (y1-y0);
+   else if (x0 >= x+1 && x1 >= x+1)
+      ;
+   else {
+      assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1);
+      scanline[x] += e->direction * (y1-y0) * (1-((x0-x)+(x1-x))/2); // coverage = 1 - average x position
+   }
+}
+
+static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill, int len, stbtt__active_edge *e, float y_top)
+{
+   float y_bottom = y_top+1;
+
+   while (e) {
+      // brute force every pixel
+
+      // compute intersection points with top & bottom
+      assert(e->ey >= y_top);
+
+      if (e->fdx == 0) {
+         float x0 = e->fx;
+         if (x0 < len) {
+            if (x0 >= 0) {
+               stbtt__handle_clipped_edge(scanline,(int) x0,e, x0,y_top, x0,y_bottom);
+               stbtt__handle_clipped_edge(scanline_fill-1,(int) x0+1,e, x0,y_top, x0,y_bottom);
+            } else {
+               stbtt__handle_clipped_edge(scanline_fill-1,0,e, x0,y_top, x0,y_bottom);
+            }
+         }
+      } else {
+         float x0 = e->fx;
+         float dx = e->fdx;
+         float xb = x0 + dx;
+         float x_top, x_bottom;
+         float y0,y1;
+         float dy = e->fdy;
+         assert(e->sy <= y_bottom && e->ey >= y_top);
+
+         // compute endpoints of line segment clipped to this scanline (if the
+         // line segment starts on this scanline. x0 is the intersection of the
+         // line with y_top, but that may be off the line segment.
+         if (e->sy > y_top) {
+            x_top = x0 + dx * (e->sy - y_top);
+            y0 = e->sy;
+         } else {
+            x_top = x0;
+            y0 = y_top;
+         }
+         if (e->ey < y_bottom) {
+            x_bottom = x0 + dx * (e->ey - y_top);
+            y1 = e->ey;
+         } else {
+            x_bottom = xb;
+            y1 = y_bottom;
+         }
+
+         if (x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len) {
+            // from here on, we don't have to range check x values
+
+            if ((int) x_top == (int) x_bottom) {
+               float height;
+               // simple case, only spans one pixel
+               int x = (int) x_top;
+               height = y1 - y0;
+               assert(x >= 0 && x < len);
+               scanline[x] += e->direction * (1-((x_top - x) + (x_bottom-x))/2)  * height;
+               scanline_fill[x] += e->direction * height; // everything right of this pixel is filled
+            } else {
+               int x,x1,x2;
+               float y_crossing, step, sign, area;
+               // covers 2+ pixels
+               if (x_top > x_bottom) {
+                  // flip scanline vertically; signed area is the same
+                  float t;
+                  y0 = y_bottom - (y0 - y_top);
+                  y1 = y_bottom - (y1 - y_top);
+                  t = y0, y0 = y1, y1 = t;
+                  t = x_bottom, x_bottom = x_top, x_top = t;
+                  dx = -dx;
+                  dy = -dy;
+                  t = x0, x0 = xb, xb = t;
+               }
+
+               x1 = (int) x_top;
+               x2 = (int) x_bottom;
+               // compute intersection with y axis at x1+1
+               y_crossing = (x1+1 - x0) * dy + y_top;
+
+               sign = e->direction;
+               // area of the rectangle covered from y0..y_crossing
+               area = sign * (y_crossing-y0);
+               // area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing)
+               scanline[x1] += area * (1-((x_top - x1)+(x1+1-x1))/2);
+
+               step = sign * dy;
+               for (x = x1+1; x < x2; ++x) {
+                  scanline[x] += area + step/2;
+                  area += step;
+               }
+               y_crossing += dy * (x2 - (x1+1));
+
+               assert(fabs(area) <= 1.01f);
+
+               scanline[x2] += area + sign * (1-((x2-x2)+(x_bottom-x2))/2) * (y1-y_crossing);
+
+               scanline_fill[x2] += sign * (y1-y0);
+            }
+         } else {
+            // if edge goes outside of box we're drawing, we require
+            // clipping logic. since this does not match the intended use
+            // of this library, we use a different, very slow brute
+            // force implementation
+            int x;
+            for (x=0; x < len; ++x) {
+               // cases:
+               //
+               // there can be up to two intersections with the pixel. any intersection
+               // with left or right edges can be handled by splitting into two (or three)
+               // regions. intersections with top & bottom do not necessitate case-wise logic.
+               float y0,y1;
+               float y_cur = y_top, x_cur = x0;
+               // x = e->x + e->dx * (y-y_top)
+               // (y-y_top) = (x - e->x) / e->dx
+               // y = (x - e->x) / e->dx + y_top
+               y0 = (x - x0) / dx + y_top;
+               y1 = (x+1 - x0) / dx + y_top;
+
+               if (y0 < y1) {
+                  if (y0 > y_top && y0 < y_bottom) {
+                     stbtt__handle_clipped_edge(scanline,x,e, x_cur,y_cur, (float) x,y0);
+                     y_cur = y0;
+                     x_cur = (float) x;
+                  }
+                  if (y1 >= y_cur && y1 < y_bottom) {
+                     stbtt__handle_clipped_edge(scanline,x,e, x_cur,y_cur, (float) x+1,y1);
+                     y_cur = y1;
+                     x_cur = (float) x+1;
+                  }
+               } else {
+                  if (y1 >= y_cur && y1 < y_bottom) {
+                     stbtt__handle_clipped_edge(scanline,x,e, x_cur,y_cur, (float) x+1,y1);
+                     y_cur = y1;
+                     x_cur = (float) x+1;
+                  }
+                  if (y0 > y_top && y0 < y_bottom) {
+                     stbtt__handle_clipped_edge(scanline,x,e, x_cur,y_cur, (float) x,y0);
+                     y_cur = y0;
+                     x_cur = (float) x;
+                  }
+               }
+               stbtt__handle_clipped_edge(scanline,x,e, x_cur,y_cur, xb,y_bottom);
+            }
+         }
+      }
+      e = e->next;
+   }
+}
+
+// directly AA rasterize edges w/o supersampling
+static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
+{
+    (void)vsubsample;
+   stbtt__hheap hh = { 0 };
+   stbtt__active_edge *active = NULL;
+   int y,j=0, i;
+   float scanline_data[129], *scanline, *scanline2;
+
+   if (result->w > 64)
+      scanline = (float *) STBTT_malloc((result->w*2+1) * sizeof(float), userdata);
+   else
+      scanline = scanline_data;
+
+   scanline2 = scanline + result->w;
+
+   y = off_y;
+   e[n].y0 = (float) (off_y + result->h) + 1;
+
+   while (j < result->h) {
+      // find center of pixel for this scanline
+      float scan_y_top    = y + 0.0f;
+      float scan_y_bottom = y + 1.0f;
+      stbtt__active_edge **step = &active;
+
+      STBTT_memset(scanline , 0, result->w*sizeof(scanline[0]));
+      STBTT_memset(scanline2, 0, (result->w+1)*sizeof(scanline[0]));
+
+      // update all active edges;
+      // remove all active edges that terminate before the top of this scanline
+      while (*step) {
+         stbtt__active_edge * z = *step;
+         if (z->ey <= scan_y_top) {
+            *step = z->next; // delete from list
+            STBTT_assert(z->direction);
+            z->direction = 0;
+            stbtt__hheap_free(&hh, z);
+         } else {
+            step = &((*step)->next); // advance through list
+         }
+      }
+
+      // insert all edges that start before the bottom of this scanline
+      while (e->y0 <= scan_y_bottom) {
+         stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y_top, userdata);
+         assert(z->ey >= scan_y_top);
+         // insert at front
+         z->next = active;
+         active = z;
+         ++e;
+      }
+
+      // now process all active edges
+      if (active)
+         stbtt__fill_active_edges_new(scanline, scanline2+1, result->w, active, scan_y_top);
+
+      {
+         float sum = 0;
+         for (i=0; i < result->w; ++i) {
+            float k;
+            int m;
+            sum += scanline2[i];
+            k = scanline[i] + sum;
+            k = (float) fabs(k)*255 + 0.5f;
+            m = (int) k;
+            if (m > 255) m = 255;
+            result->pixels[j*result->stride + i] = (unsigned char) m;
+         }
+      }
+      // advance all the edges
+      step = &active;
+      while (*step) {
+         stbtt__active_edge *z = *step;
+         z->fx += z->fdx; // advance to position for current scanline
+         step = &((*step)->next); // advance through list
+      }
+
+      ++y;
+      ++j;
    }
 
+   stbtt__hheap_cleanup(&hh, userdata);
+
    if (scanline != scanline_data)
       STBTT_free(scanline, userdata);
 }
+#else
+#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
 
-static int stbtt__edge_compare(const void *p, const void *q)
+#define STBTT__COMPARE(a,b)  ((a)->y0 < (b)->y0)
+
+static void stbtt__sort_edges_ins_sort(stbtt__edge *p, int n)
 {
-   stbtt__edge *a = (stbtt__edge *) p;
-   stbtt__edge *b = (stbtt__edge *) q;
+   int i,j;
+   for (i=1; i < n; ++i) {
+      stbtt__edge t = p[i], *a = &t;
+      j = i;
+      while (j > 0) {
+         stbtt__edge *b = &p[j-1];
+         int c = STBTT__COMPARE(a,b);
+         if (!c) break;
+         p[j] = p[j-1];
+         --j;
+      }
+      if (i != j)
+         p[j] = t;
+   }
+}
 
-   if (a->y0 < b->y0) return -1;
-   if (a->y0 > b->y0) return  1;
-   return 0;
+static void stbtt__sort_edges_quicksort(stbtt__edge *p, int n)
+{
+   /* threshhold for transitioning to insertion sort */
+   while (n > 12) {
+      stbtt__edge t;
+      int c01,c12,c,m,i,j;
+
+      /* compute median of three */
+      m = n >> 1;
+      c01 = STBTT__COMPARE(&p[0],&p[m]);
+      c12 = STBTT__COMPARE(&p[m],&p[n-1]);
+      /* if 0 >= mid >= end, or 0 < mid < end, then use mid */
+      if (c01 != c12) {
+         /* otherwise, we'll need to swap something else to middle */
+         int z;
+         c = STBTT__COMPARE(&p[0],&p[n-1]);
+         /* 0>mid && mid<n:  0>n => n; 0<n => 0 */
+         /* 0<mid && mid>n:  0>n => 0; 0<n => n */
+         z = (c == c12) ? 0 : n-1;
+         t = p[z];
+         p[z] = p[m];
+         p[m] = t;
+      }
+      /* now p[m] is the median-of-three */
+      /* swap it to the beginning so it won't move around */
+      t = p[0];
+      p[0] = p[m];
+      p[m] = t;
+
+      /* partition loop */
+      i=1;
+      j=n-1;
+      for(;;) {
+         /* handling of equality is crucial here */
+         /* for sentinels & efficiency with duplicates */
+         for (;;++i) {
+            if (!STBTT__COMPARE(&p[i], &p[0])) break;
+         }
+         for (;;--j) {
+            if (!STBTT__COMPARE(&p[0], &p[j])) break;
+         }
+         /* make sure we haven't crossed */
+         if (i >= j) break;
+         t = p[i];
+         p[i] = p[j];
+         p[j] = t;
+
+         ++i;
+         --j;
+      }
+      /* recurse on smaller side, iterate on larger */
+      if (j < (n-i)) {
+         stbtt__sort_edges_quicksort(p,j);
+         p = p+i;
+         n = n-i;
+      } else {
+         stbtt__sort_edges_quicksort(p+i, n-i);
+         n = j;
+      }
+   }
+}
+
+static void stbtt__sort_edges(stbtt__edge *p, int n)
+{
+   stbtt__sort_edges_quicksort(p, n);
+   stbtt__sort_edges_ins_sort(p, n);
 }
 
 typedef struct
@@ -1765,7 +2225,13 @@ static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcou
    float y_scale_inv = invert ? -scale_y : scale_y;
    stbtt__edge *e;
    int n,i,j,k,m;
+#if STBTT_RASTERIZER_VERSION == 1
    int vsubsample = result->h < 8 ? 15 : 5;
+#elif STBTT_RASTERIZER_VERSION == 2
+   int vsubsample = 1;
+#else
+   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
    // vsubsample should divide 255 evenly; otherwise we won't reach full opacity
 
    // now we have to blow out the windings into explicit edge lists
@@ -1802,7 +2268,8 @@ static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcou
    }
 
    // now sort the edges by their highest point (should snap to integer, and then by x)
-   STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare);
+   //STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare);
+   stbtt__sort_edges(e, n);
 
    // now, traverse the scanlines and find the intersections on each scanline, use xor winding rule
    stbtt__rasterize_sorted_edges(result, e, n, vsubsample, off_x, off_y, userdata);
@@ -2346,17 +2813,13 @@ STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, stbtt_fon
       for (j=0; j < ranges[i].num_chars_in_range; ++j) {
          int x0,y0,x1,y1;
 		 int glyph = stbtt_FindGlyphIndex(info,ranges[i].first_unicode_char_in_range + j);
-         if (glyph) {
-            stbtt_GetGlyphBitmapBoxSubpixel(info,glyph,
-                                            scale * spc->h_oversample,
-                                            scale * spc->v_oversample,
-                                            0,0,
-                                            &x0,&y0,&x1,&y1);
-            rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1);
-            rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1);
-         } else {
-            rects[k].w = rects[k].h = 1;
-         }
+         stbtt_GetGlyphBitmapBoxSubpixel(info,glyph,
+                                         scale * spc->h_oversample,
+                                         scale * spc->v_oversample,
+                                         0,0,
+                                         &x0,&y0,&x1,&y1);
+         rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1);
+         rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1);
          ++k;
       }
    }