8 years ago · 8ce85d2462
--- a/3rdparty/edtaa3/LICENSE.md
+++ b/3rdparty/edtaa3/LICENSE.md
@@ -1,34 +0,0 @@
 
				-https://github.com/OpenGLInsights/OpenGLInsightsCode/blob/master/Chapter%2012%202D%20Shape%20Rendering%20by%20Distance%20Fields/LICENSE.txt
			
 
				-
			
 
				-The C code and the GLSL code for the OpenGL demo is public
			
 
				-domain code. The distance transform code in the console
			
 
				-application to create distance field textures, located in
			
 
				-the file "edtaa3func.c", is MIT licensed, and free to use
			
 
				-under the following conditions.
			
 
				-
			
 
				-https://github.com/OpenGLInsights/OpenGLInsightsCode/issues/6#issuecomment-67829157
			
 
				-
			
 
				-----
			
 
				-
			
 
				-Copyright (C) 2011 by Stefan Gustavson
			
 
				-([email protected])
			
 
				-
			
 
				-Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				-of this software and associated documentation files (the "Software"), to deal
			
 
				-in the Software without restriction, including without limitation the rights
			
 
				-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				-copies of the Software, and to permit persons to whom the Software is
			
 
				-furnished to do so, subject to the following conditions:
			
 
				-
			
 
				-The above copyright notice and this permission notice shall be included in
			
 
				-all copies or substantial portions of the Software.
			
 
				-
			
 
				-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				-THE SOFTWARE.
			
 
				-
			
 
				-----
			
--- a/3rdparty/edtaa3/edtaa3func.cpp
+++ b/3rdparty/edtaa3/edtaa3func.cpp
@@ -1,580 +0,0 @@
 
				-/*
			
 
				- * edtaa3()
			
 
				- *
			
 
				- * Sweep-and-update Euclidean distance transform of an
			
 
				- * image. Positive pixels are treated as object pixels,
			
 
				- * zero or negative pixels are treated as background.
			
 
				- * An attempt is made to treat antialiased edges correctly.
			
 
				- * The input image must have pixels in the range [0,1],
			
 
				- * and the antialiased image should be a box-filter
			
 
				- * sampling of the ideal, crisp edge.
			
 
				- * If the antialias region is more than 1 pixel wide,
			
 
				- * the result from this transform will be inaccurate.
			
 
				- *
			
 
				- * By Stefan Gustavson ([email protected]).
			
 
				- *
			
 
				- * Originally written in 1994, based on a verbal
			
 
				- * description of Per-Erik Danielsson's SSED8 algorithm
			
 
				- * as presented in the PhD dissertation of Ingemar
			
 
				- * Ragnemalm. This is Per-Erik Danielsson's scanline
			
 
				- * scheme from 1979 - I only implemented it in C.
			
 
				- *
			
 
				- * Updated in 2004 to treat border pixels correctly,
			
 
				- * and cleaned up the code to improve readability.
			
 
				- *
			
 
				- * Updated in 2009 to handle anti-aliased edges,
			
 
				- * as published in the article "Anti-aliased Euclidean
			
 
				- * distance transform" by Stefan Gustavson and Robin Strand,
			
 
				- * Pattern Recognition Letters 32 (2011) 252–257.
			
 
				- *
			
 
				- * Updated in 2011 to avoid a corner case causing an
			
 
				- * infinite loop for some input data.
			
 
				- *
			
 
				-*/
			
 
				-
			
 
				-/*
			
 
				-
			
 
				-Copyright (C) 2011 by Stefan Gustavson
			
 
				-
			
 
				-([email protected])
			
 
				-
			
 
				-This code is distributed under the permissive "MIT license":
			
 
				-
			
 
				-Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				-of this software and associated documentation files (the "Software"), to deal
			
 
				-in the Software without restriction, including without limitation the rights
			
 
				-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				-copies of the Software, and to permit persons to whom the Software is
			
 
				-furnished to do so, subject to the following conditions:
			
 
				-The above copyright notice and this permission notice shall be included in
			
 
				-all copies or substantial portions of the Software.
			
 
				-
			
 
				-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				-THE SOFTWARE.
			
 
				-
			
 
				-*/
			
 
				-
			
 
				-#include <math.h>
			
 
				-
			
 
				-/*
			
 
				- * Compute the local gradient at edge pixels using convolution filters.
			
 
				- * The gradient is computed only at edge pixels. At other places in the
			
 
				- * image, it is never used, and it's mostly zero anyway.
			
 
				- */
			
 
				-void computegradient(double *img, int w, int h, double *gx, double *gy)
			
 
				-{
			
 
				-    int i,j,k;
			
 
				-    double glength;
			
 
				-#define SQRT2 1.4142136
			
 
				-    for(i = 1; i < h-1; i++) { // Avoid edges where the kernels would spill over
			
 
				-        for(j = 1; j < w-1; j++) {
			
 
				-            k = i*w + j;
			
 
				-            if((img[k]>0.0) && (img[k]<1.0)) { // Compute gradient for edge pixels only
			
 
				-                gx[k] = -img[k-w-1] - SQRT2*img[k-1] - img[k+w-1] + img[k-w+1] + SQRT2*img[k+1] + img[k+w+1];
			
 
				-                gy[k] = -img[k-w-1] - SQRT2*img[k-w] - img[k-w+1] + img[k+w-1] + SQRT2*img[k+w] + img[k+w+1];
			
 
				-                glength = gx[k]*gx[k] + gy[k]*gy[k];
			
 
				-                if(glength > 0.0) { // Avoid division by zero
			
 
				-                    glength = sqrt(glength);
			
 
				-                    gx[k]=gx[k]/glength;
			
 
				-                    gy[k]=gy[k]/glength;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-    // TODO: Compute reasonable values for gx, gy also around the image edges.
			
 
				-    // (These are zero now, which reduces the accuracy for a 1-pixel wide region
			
 
				-	// around the image edge.) 2x2 kernels would be suitable for this.
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * A somewhat tricky function to approximate the distance to an edge in a
			
 
				- * certain pixel, with consideration to either the local gradient (gx,gy)
			
 
				- * or the direction to the pixel (dx,dy) and the pixel greyscale value a.
			
 
				- * The latter alternative, using (dx,dy), is the metric used by edtaa2().
			
 
				- * Using a local estimate of the edge gradient (gx,gy) yields much better
			
 
				- * accuracy at and near edges, and reduces the error even at distant pixels
			
 
				- * provided that the gradient direction is accurately estimated.
			
 
				- */
			
 
				-double edgedf(double gx, double gy, double a)
			
 
				-{
			
 
				-    double df, glength, temp, a1;
			
 
				-
			
 
				-    if ((gx == 0) || (gy == 0)) { // Either A) gu or gv are zero, or B) both
			
 
				-        df = 0.5-a;  // Linear approximation is A) correct or B) a fair guess
			
 
				-    } else {
			
 
				-        glength = sqrt(gx*gx + gy*gy);
			
 
				-        if(glength>0) {
			
 
				-            gx = gx/glength;
			
 
				-            gy = gy/glength;
			
 
				-        }
			
 
				-        /* Everything is symmetric wrt sign and transposition,
			
 
				-         * so move to first octant (gx>=0, gy>=0, gx>=gy) to
			
 
				-         * avoid handling all possible edge directions.
			
 
				-         */
			
 
				-        gx = fabs(gx);
			
 
				-        gy = fabs(gy);
			
 
				-        if(gx<gy) {
			
 
				-            temp = gx;
			
 
				-            gx = gy;
			
 
				-            gy = temp;
			
 
				-        }
			
 
				-        a1 = 0.5*gy/gx;
			
 
				-        if (a < a1) { // 0 <= a < a1
			
 
				-            df = 0.5*(gx + gy) - sqrt(2.0*gx*gy*a);
			
 
				-        } else if (a < (1.0-a1)) { // a1 <= a <= 1-a1
			
 
				-            df = (0.5-a)*gx;
			
 
				-        } else { // 1-a1 < a <= 1
			
 
				-            df = -0.5*(gx + gy) + sqrt(2.0*gx*gy*(1.0-a));
			
 
				-        }
			
 
				-    }    
			
 
				-    return df;
			
 
				-}
			
 
				-
			
 
				-double distaa3(double *img, double *gximg, double *gyimg, int w, int c, int xc, int yc, int xi, int yi)
			
 
				-{
			
 
				-  double di, df, dx, dy, gx, gy, a;
			
 
				-  int closest;
			
 
				-  
			
 
				-  closest = c-xc-yc*w; // Index to the edge pixel pointed to from c
			
 
				-  a = img[closest];    // Grayscale value at the edge pixel
			
 
				-  gx = gximg[closest]; // X gradient component at the edge pixel
			
 
				-  gy = gyimg[closest]; // Y gradient component at the edge pixel
			
 
				-  
			
 
				-  if(a > 1.0) a = 1.0;
			
 
				-  if(a < 0.0) a = 0.0; // Clip grayscale values outside the range [0,1]
			
 
				-  if(a == 0.0) return 1000000.0; // Not an object pixel, return "very far" ("don't know yet")
			
 
				-
			
 
				-  dx = (double)xi;
			
 
				-  dy = (double)yi;
			
 
				-  di = sqrt(dx*dx + dy*dy); // Length of integer vector, like a traditional EDT
			
 
				-  if(di==0) { // Use local gradient only at edges
			
 
				-      // Estimate based on local gradient only
			
 
				-      df = edgedf(gx, gy, a);
			
 
				-  } else {
			
 
				-      // Estimate gradient based on direction to edge (accurate for large di)
			
 
				-      df = edgedf(dx, dy, a);
			
 
				-  }
			
 
				-  return di + df; // Same metric as edtaa2, except at edges (where di=0)
			
 
				-}
			
 
				-
			
 
				-// Shorthand macro: add ubiquitous parameters img, gx, gy and w and call distaa3()
			
 
				-#define DISTAA(c,xc,yc,xi,yi) (distaa3(img, gx, gy, w, c, xc, yc, xi, yi))
			
 
				-
			
 
				-void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist)
			
 
				-{
			
 
				-  int x, y, i, c;
			
 
				-  int offset_u, offset_ur, offset_r, offset_rd,
			
 
				-  offset_d, offset_dl, offset_l, offset_lu;
			
 
				-  double olddist, newdist;
			
 
				-  int cdistx, cdisty, newdistx, newdisty;
			
 
				-  int changed;
			
 
				-  double epsilon = 1e-3; // Safeguard against errors due to limited precision
			
 
				-
			
 
				-  /* Initialize index offsets for the current image width */
			
 
				-  offset_u = -w;
			
 
				-  offset_ur = -w+1;
			
 
				-  offset_r = 1;
			
 
				-  offset_rd = w+1;
			
 
				-  offset_d = w;
			
 
				-  offset_dl = w-1;
			
 
				-  offset_l = -1;
			
 
				-  offset_lu = -w-1;
			
 
				-
			
 
				-  /* Initialize the distance images */
			
 
				-  for(i=0; i<w*h; i++) {
			
 
				-    distx[i] = 0; // At first, all pixels point to
			
 
				-    disty[i] = 0; // themselves as the closest known.
			
 
				-    if(img[i] <= 0.0)
			
 
				-      {
			
 
				-	dist[i]= 1000000.0; // Big value, means "not set yet"
			
 
				-      }
			
 
				-    else if (img[i]<1.0) {
			
 
				-      dist[i] = edgedf(gx[i], gy[i], img[i]); // Gradient-assisted estimate
			
 
				-    }
			
 
				-    else {
			
 
				-      dist[i]= 0.0; // Inside the object
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  /* Perform the transformation */
			
 
				-  do
			
 
				-    {
			
 
				-      changed = 0;
			
 
				-
			
 
				-      /* Scan rows, except first row */
			
 
				-      for(y=1; y<h; y++)
			
 
				-        {
			
 
				-
			
 
				-          /* move index to leftmost pixel of current row */
			
 
				-          i = y*w;
			
 
				-
			
 
				-          /* scan right, propagate distances from above & left */
			
 
				-
			
 
				-          /* Leftmost pixel is special, has no left neighbors */
			
 
				-          olddist = dist[i];
			
 
				-          if(olddist > 0) // If non-zero distance or not set yet
			
 
				-            {
			
 
				-	      c = i + offset_u; // Index of candidate for testing
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_ur;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-          i++;
			
 
				-
			
 
				-          /* Middle pixels have all neighbors */
			
 
				-          for(x=1; x<w-1; x++, i++)
			
 
				-            {
			
 
				-              olddist = dist[i];
			
 
				-              if(olddist <= 0) continue; // No need to update further
			
 
				-
			
 
				-	      c = i+offset_l;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_lu;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_u;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_ur;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-          /* Rightmost pixel of row is special, has no right neighbors */
			
 
				-          olddist = dist[i];
			
 
				-          if(olddist > 0) // If not already zero distance
			
 
				-            {
			
 
				-	      c = i+offset_l;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_lu;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_u;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx;
			
 
				-              newdisty = cdisty+1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-          /* Move index to second rightmost pixel of current row. */
			
 
				-          /* Rightmost pixel is skipped, it has no right neighbor. */
			
 
				-          i = y*w + w-2;
			
 
				-
			
 
				-          /* scan left, propagate distance from right */
			
 
				-          for(x=w-2; x>=0; x--, i--)
			
 
				-            {
			
 
				-              olddist = dist[i];
			
 
				-              if(olddist <= 0) continue; // Already zero distance
			
 
				-
			
 
				-	      c = i+offset_r;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-      
			
 
				-      /* Scan rows in reverse order, except last row */
			
 
				-      for(y=h-2; y>=0; y--)
			
 
				-        {
			
 
				-          /* move index to rightmost pixel of current row */
			
 
				-          i = y*w + w-1;
			
 
				-
			
 
				-          /* Scan left, propagate distances from below & right */
			
 
				-
			
 
				-          /* Rightmost pixel is special, has no right neighbors */
			
 
				-          olddist = dist[i];
			
 
				-          if(olddist > 0) // If not already zero distance
			
 
				-            {
			
 
				-	      c = i+offset_d;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_dl;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-          i--;
			
 
				-
			
 
				-          /* Middle pixels have all neighbors */
			
 
				-          for(x=w-2; x>0; x--, i--)
			
 
				-            {
			
 
				-              olddist = dist[i];
			
 
				-              if(olddist <= 0) continue; // Already zero distance
			
 
				-
			
 
				-	      c = i+offset_r;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_rd;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_d;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_dl;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-          /* Leftmost pixel is special, has no left neighbors */
			
 
				-          olddist = dist[i];
			
 
				-          if(olddist > 0) // If not already zero distance
			
 
				-            {
			
 
				-	      c = i+offset_r;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_rd;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx-1;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  olddist=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-
			
 
				-	      c = i+offset_d;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx;
			
 
				-              newdisty = cdisty-1;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-          /* Move index to second leftmost pixel of current row. */
			
 
				-          /* Leftmost pixel is skipped, it has no left neighbor. */
			
 
				-          i = y*w + 1;
			
 
				-          for(x=1; x<w; x++, i++)
			
 
				-            {
			
 
				-              /* scan right, propagate distance from left */
			
 
				-              olddist = dist[i];
			
 
				-              if(olddist <= 0) continue; // Already zero distance
			
 
				-
			
 
				-	      c = i+offset_l;
			
 
				-	      cdistx = distx[c];
			
 
				-	      cdisty = disty[c];
			
 
				-              newdistx = cdistx+1;
			
 
				-              newdisty = cdisty;
			
 
				-              newdist = DISTAA(c, cdistx, cdisty, newdistx, newdisty);
			
 
				-              if(newdist < olddist-epsilon)
			
 
				-                {
			
 
				-                  distx[i]=newdistx;
			
 
				-                  disty[i]=newdisty;
			
 
				-                  dist[i]=newdist;
			
 
				-                  changed = 1;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-  while(changed); // Sweep until no more updates are made
			
 
				-
			
 
				-  /* The transformation is completed. */
			
 
				-
			
 
				-}
			
--- a/3rdparty/edtaa3/edtaa3func.h
+++ b/3rdparty/edtaa3/edtaa3func.h
@@ -1,7 +0,0 @@
 
				-#ifndef EDTAA3_H_HEADER_GUARD
			
 
				-#define EDTAA3_H_HEADER_GUARD
			
 
				-
			
 
				-extern void computegradient(double *img, int w, int h, double *gx, double *gy);
			
 
				-extern void edtaa3(double *img, double *gx, double *gy, int w, int h, short *distx, short *disty, double *dist);
			
 
				-
			
 
				-#endif // EDTAA3_H_HEADER_GUARD
			
--- a/3rdparty/etc1/LICENSE
+++ b/3rdparty/etc1/LICENSE
@@ -1,161 +0,0 @@
 
				-Apache License
			
 
				-
			
 
				-Version 2.0, January 2004
			
 
				-
			
 
				-http://www.apache.org/licenses/
			
 
				-
			
 
				-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				-
			
 
				-1. Definitions.
			
 
				-
			
 
				-"License" shall mean the terms and conditions for use, reproduction, and
			
 
				-distribution as defined by Sections 1 through 9 of this document.
			
 
				-
			
 
				-"Licensor" shall mean the copyright owner or entity authorized by the
			
 
				-copyright owner that is granting the License.
			
 
				-
			
 
				-"Legal Entity" shall mean the union of the acting entity and all other
			
 
				-entities that control, are controlled by, or are under common control with
			
 
				-that entity. For the purposes of this definition, "control" means (i) the
			
 
				-power, direct or indirect, to cause the direction or management of such 
			
 
				-entity, whether by contract or otherwise, or (ii) ownership of fifty 
			
 
				-percent (50%) or more of the outstanding shares, or (iii) beneficial 
			
 
				-ownership of such entity.
			
 
				-
			
 
				-"You" (or "Your") shall mean an individual or Legal Entity exercising 
			
 
				-permissions granted by this License.
			
 
				-
			
 
				-"Source" form shall mean the preferred form for making modifications, 
			
 
				-including but not limited to software source code, documentation 
			
 
				-source, and configuration files.
			
 
				-
			
 
				-"Object" form shall mean any form resulting from mechanical transformation 
			
 
				-or translation of a Source form, including but not limited to compiled 
			
 
				-object code, generated documentation, and conversions to other media types.
			
 
				-
			
 
				-"Work" shall mean the work of authorship, whether in Source or Object 
			
 
				-form, made available under the License, as indicated by a copyright 
			
 
				-notice that is included in or attached to the work (an example is 
			
 
				-provided in the Appendix below).
			
 
				-
			
 
				-"Derivative Works" shall mean any work, whether in Source or Object 
			
 
				-form, that is based on (or derived from) the Work and for which the 
			
 
				-editorial revisions, annotations, elaborations, or other modifications 
			
 
				-represent, as a whole, an original work of authorship. For the purposes 
			
 
				-of this License, Derivative Works shall not include works that remain 
			
 
				-separable from, or merely link (or bind by name) to the interfaces of, 
			
 
				-the Work and Derivative Works thereof.
			
 
				-
			
 
				-"Contribution" shall mean any work of authorship, including the original 
			
 
				-version of the Work and any modifications or additions to that Work or 
			
 
				-Derivative Works thereof, that is intentionally submitted to Licensor 
			
 
				-for inclusion in the Work by the copyright owner or by an individual or 
			
 
				-Legal Entity authorized to submit on behalf of the copyright owner. For 
			
 
				-the purposes of this definition, "submitted" means any form of electronic, 
			
 
				-verbal, or written communication sent to the Licensor or its 
			
 
				-representatives, including but not limited to communication on electronic 
			
 
				-mailing lists, source code control systems, and issue tracking systems that 
			
 
				-are managed by, or on behalf of, the Licensor for the purpose of discussing 
			
 
				-and improving the Work, but excluding communication that is conspicuously 
			
 
				-marked or otherwise designated in writing by the copyright owner as "Not 
			
 
				-a Contribution."
			
 
				-
			
 
				-"Contributor" shall mean Licensor and any individual or Legal Entity on 
			
 
				-behalf of whom a Contribution has been received by Licensor and subsequently 
			
 
				-incorporated within the Work.
			
 
				-
			
 
				-2. Grant of Copyright License. Subject to the terms and conditions of this 
			
 
				-License, each Contributor hereby grants to You a perpetual, worldwide, 
			
 
				-non-exclusive, no-charge, royalty-free, irrevocable copyright license to 
			
 
				-reproduce, prepare Derivative Works of, publicly display, publicly perform, 
			
 
				-sublicense, and distribute the Work and such Derivative Works in Source or 
			
 
				-Object form.
			
 
				-
			
 
				-3. Grant of Patent License. Subject to the terms and conditions of this 
			
 
				-License, each Contributor hereby grants to You a perpetual, worldwide, 
			
 
				-non-exclusive, no-charge, royalty-free, irrevocable (except as stated in 
			
 
				-this section) patent license to make, have made, use, offer to sell, sell, 
			
 
				-import, and otherwise transfer the Work, where such license applies only to 
			
 
				-those patent claims licensable by such Contributor that are necessarily 
			
 
				-infringed by their Contribution(s) alone or by combination of their 
			
 
				-Contribution(s) with the Work to which such Contribution(s) was submitted. 
			
 
				-If You institute patent litigation against any entity (including a cross-claim
			
 
				-or counterclaim in a lawsuit) alleging that the Work or a Contribution 
			
 
				-incorporated within the Work constitutes direct or contributory patent 
			
 
				-infringement, then any patent licenses granted to You under this License 
			
 
				-for that Work shall terminate as of the date such litigation is filed.
			
 
				-
			
 
				-4. Redistribution. You may reproduce and distribute copies of the Work or 
			
 
				-Derivative Works thereof in any medium, with or without modifications, and 
			
 
				-in Source or Object form, provided that You meet the following conditions:
			
 
				-
			
 
				-You must give any other recipients of the Work or Derivative Works a copy of 
			
 
				-this License; and
			
 
				-You must cause any modified files to carry prominent notices stating that 
			
 
				-You changed the files; and
			
 
				-You must retain, in the Source form of any Derivative Works that You 
			
 
				-distribute, all copyright, patent, trademark, and attribution notices 
			
 
				-from the Source form of the Work, excluding those notices that do not 
			
 
				-pertain to any part of the Derivative Works; and
			
 
				-If the Work includes a "NOTICE" text file as part of its distribution, 
			
 
				-then any Derivative Works that You distribute must include a readable 
			
 
				-copy of the attribution notices contained within such NOTICE file, excluding
			
 
				-those notices that do not pertain to any part of the Derivative Works, in
			
 
				-at least one of the following places: within a NOTICE text file distributed 
			
 
				-as part of the Derivative Works; within the Source form or documentation, if 
			
 
				-provided along with the Derivative Works; or, within a display generated by 
			
 
				-the Derivative Works, if and wherever such third-party notices normally 
			
 
				-appear. The contents of the NOTICE file are for informational purposes 
			
 
				-only and do not modify the License. You may add Your own attribution 
			
 
				-notices within Derivative Works that You distribute, alongside or as 
			
 
				-an addendum to the NOTICE text from the Work, provided that such additional 
			
 
				-attribution notices cannot be construed as modifying the License. 
			
 
				-
			
 
				-You may add Your own copyright statement to Your modifications and may provide
			
 
				-additional or different license terms and conditions for use, reproduction, or
			
 
				-distribution of Your modifications, or for any such Derivative Works as a 
			
 
				-whole, provided Your use, reproduction, and distribution of the Work otherwise 
			
 
				-complies with the conditions stated in this License.
			
 
				-5. Submission of Contributions. Unless You explicitly state otherwise, any 
			
 
				-Contribution intentionally submitted for inclusion in the Work by You to the 
			
 
				-Licensor shall be under the terms and conditions of this License, without any 
			
 
				-additional terms or conditions. Notwithstanding the above, nothing herein 
			
 
				-shall supersede or modify the terms of any separate license agreement you 
			
 
				-may have executed with Licensor regarding such Contributions.
			
 
				-
			
 
				-6. Trademarks. This License does not grant permission to use the trade names, 
			
 
				-trademarks, service marks, or product names of the Licensor, except as 
			
 
				-required for reasonable and customary use in describing the origin of the 
			
 
				-Work and reproducing the content of the NOTICE file.
			
 
				-
			
 
				-7. Disclaimer of Warranty. Unless required by applicable law or agreed to 
			
 
				-in writing, Licensor provides the Work (and each Contributor provides its 
			
 
				-Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 
			
 
				-ANY KIND, either express or implied, including, without limitation, any 
			
 
				-warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or 
			
 
				-FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining 
			
 
				-the appropriateness of using or redistributing the Work and assume any risks 
			
 
				-associated with Your exercise of permissions under this License.
			
 
				-
			
 
				-8. Limitation of Liability. In no event and under no legal theory, whether in
			
 
				-tort (including negligence), contract, or otherwise, unless required by 
			
 
				-applicable law (such as deliberate and grossly negligent acts) or agreed to 
			
 
				-in writing, shall any Contributor be liable to You for damages, including 
			
 
				-any direct, indirect, special, incidental, or consequential damages of any 
			
 
				-character arising as a result of this License or out of the use or inability 
			
 
				-to use the Work (including but not limited to damages for loss of goodwill, 
			
 
				-work stoppage, computer failure or malfunction, or any and all other 
			
 
				-commercial damages or losses), even if such Contributor has been advised 
			
 
				-of the possibility of such damages.
			
 
				-
			
 
				-9. Accepting Warranty or Additional Liability. While redistributing the 
			
 
				-Work or Derivative Works thereof, You may choose to offer, and charge a 
			
 
				-fee for, acceptance of support, warranty, indemnity, or other liability 
			
 
				-obligations and/or rights consistent with this License. However, in accepting
			
 
				-such obligations, You may act only on Your own behalf and on Your sole 
			
 
				-responsibility, not on behalf of any other Contributor, and only if You
			
 
				-agree to indemnify, defend, and hold each Contributor harmless for any 
			
 
				-liability incurred by, or claims asserted against, such Contributor by 
			
 
				-reason of your accepting any such warranty or additional liability.
			
 
				-
			
 
				-END OF TERMS AND CONDITIONS
			
--- a/3rdparty/etc1/etc1.cpp
+++ b/3rdparty/etc1/etc1.cpp
@@ -1,686 +0,0 @@
 
				-// Copyright 2009 Google Inc.
			
 
				-//
			
 
				-// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-// you may not use this file except in compliance with the License.
			
 
				-// You may obtain a copy of the License at
			
 
				-//
			
 
				-//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-//
			
 
				-// Unless required by applicable law or agreed to in writing, software
			
 
				-// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-// See the License for the specific language governing permissions and
			
 
				-// limitations under the License.
			
 
				-
			
 
				-//////////////////////////////////////////////////////////////////////////////////////////
			
 
				-
			
 
				-// This is a fork of the AOSP project ETC1 codec. The original code can be found
			
 
				-// at the following web site:
			
 
				-// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/include/ETC1/
			
 
				-
			
 
				-//////////////////////////////////////////////////////////////////////////////////////////
			
 
				-
			
 
				-#include "etc1.h"
			
 
				-
			
 
				-#include <cstring>
			
 
				-
			
 
				-/* From http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
			
 
				-
			
 
				- The number of bits that represent a 4x4 texel block is 64 bits if
			
 
				- <internalformat> is given by ETC1_RGB8_OES.
			
 
				-
			
 
				- The data for a block is a number of bytes,
			
 
				-
			
 
				- {q0, q1, q2, q3, q4, q5, q6, q7}
			
 
				-
			
 
				- where byte q0 is located at the lowest memory address and q7 at
			
 
				- the highest. The 64 bits specifying the block is then represented
			
 
				- by the following 64 bit integer:
			
 
				-
			
 
				- int64bit = 256*(256*(256*(256*(256*(256*(256*q0+q1)+q2)+q3)+q4)+q5)+q6)+q7;
			
 
				-
			
 
				- ETC1_RGB8_OES:
			
 
				-
			
 
				- a) bit layout in bits 63 through 32 if diffbit = 0
			
 
				-
			
 
				- 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
			
 
				- -----------------------------------------------
			
 
				- | base col1 | base col2 | base col1 | base col2 |
			
 
				- | R1 (4bits)| R2 (4bits)| G1 (4bits)| G2 (4bits)|
			
 
				- -----------------------------------------------
			
 
				-
			
 
				- 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32
			
 
				- ---------------------------------------------------
			
 
				- | base col1 | base col2 | table  | table  |diff|flip|
			
 
				- | B1 (4bits)| B2 (4bits)| cw 1   | cw 2   |bit |bit |
			
 
				- ---------------------------------------------------
			
 
				-
			
 
				-
			
 
				- b) bit layout in bits 63 through 32 if diffbit = 1
			
 
				-
			
 
				- 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48
			
 
				- -----------------------------------------------
			
 
				- | base col1    | dcol 2 | base col1    | dcol 2 |
			
 
				- | R1' (5 bits) | dR2    | G1' (5 bits) | dG2    |
			
 
				- -----------------------------------------------
			
 
				-
			
 
				- 47 46 45 44 43 42 41 40 39 38 37 36 35 34  33  32
			
 
				- ---------------------------------------------------
			
 
				- | base col 1   | dcol 2 | table  | table  |diff|flip|
			
 
				- | B1' (5 bits) | dB2    | cw 1   | cw 2   |bit |bit |
			
 
				- ---------------------------------------------------
			
 
				-
			
 
				-
			
 
				- c) bit layout in bits 31 through 0 (in both cases)
			
 
				-
			
 
				- 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
			
 
				- -----------------------------------------------
			
 
				- |       most significant pixel index bits       |
			
 
				- | p| o| n| m| l| k| j| i| h| g| f| e| d| c| b| a|
			
 
				- -----------------------------------------------
			
 
				-
			
 
				- 15 14 13 12 11 10  9  8  7  6  5  4  3   2   1  0
			
 
				- --------------------------------------------------
			
 
				- |         least significant pixel index bits       |
			
 
				- | p| o| n| m| l| k| j| i| h| g| f| e| d| c | b | a |
			
 
				- --------------------------------------------------
			
 
				-
			
 
				-
			
 
				- Add table 3.17.2: Intensity modifier sets for ETC1 compressed textures:
			
 
				-
			
 
				- table codeword                modifier table
			
 
				- ------------------        ----------------------
			
 
				- 0                     -8  -2  2   8
			
 
				- 1                    -17  -5  5  17
			
 
				- 2                    -29  -9  9  29
			
 
				- 3                    -42 -13 13  42
			
 
				- 4                    -60 -18 18  60
			
 
				- 5                    -80 -24 24  80
			
 
				- 6                   -106 -33 33 106
			
 
				- 7                   -183 -47 47 183
			
 
				-
			
 
				-
			
 
				- Add table 3.17.3 Mapping from pixel index values to modifier values for
			
 
				- ETC1 compressed textures:
			
 
				-
			
 
				- pixel index value
			
 
				- ---------------
			
 
				- msb     lsb           resulting modifier value
			
 
				- -----   -----          -------------------------
			
 
				- 1       1            -b (large negative value)
			
 
				- 1       0            -a (small negative value)
			
 
				- 0       0             a (small positive value)
			
 
				- 0       1             b (large positive value)
			
 
				-
			
 
				-
			
 
				- */
			
 
				-
			
 
				-static const int kModifierTable[] = {
			
 
				-/* 0 */2, 8, -2, -8,
			
 
				-/* 1 */5, 17, -5, -17,
			
 
				-/* 2 */9, 29, -9, -29,
			
 
				-/* 3 */13, 42, -13, -42,
			
 
				-/* 4 */18, 60, -18, -60,
			
 
				-/* 5 */24, 80, -24, -80,
			
 
				-/* 6 */33, 106, -33, -106,
			
 
				-/* 7 */47, 183, -47, -183 };
			
 
				-
			
 
				-static const int kLookup[8] = { 0, 1, 2, 3, -4, -3, -2, -1 };
			
 
				-
			
 
				-static inline etc1_byte clamp(int x) {
			
 
				-    return (etc1_byte) (x >= 0 ? (x < 255 ? x : 255) : 0);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int convert4To8(int b) {
			
 
				-    int c = b & 0xf;
			
 
				-    return (c << 4) | c;
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int convert5To8(int b) {
			
 
				-    int c = b & 0x1f;
			
 
				-    return (c << 3) | (c >> 2);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int convert6To8(int b) {
			
 
				-    int c = b & 0x3f;
			
 
				-    return (c << 2) | (c >> 4);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int divideBy255(int d) {
			
 
				-    return (d + 128 + (d >> 8)) >> 8;
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int convert8To4(int b) {
			
 
				-    int c = b & 0xff;
			
 
				-    return divideBy255(c * 15);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int convert8To5(int b) {
			
 
				-    int c = b & 0xff;
			
 
				-    return divideBy255(c * 31);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int convertDiff(int base, int diff) {
			
 
				-    return convert5To8((0x1f & base) + kLookup[0x7 & diff]);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-void decode_subblock(etc1_byte* pOut, int r, int g, int b, const int* table,
			
 
				-        etc1_uint32 low, bool second, bool flipped) {
			
 
				-    int baseX = 0;
			
 
				-    int baseY = 0;
			
 
				-    if (second) {
			
 
				-        if (flipped) {
			
 
				-            baseY = 2;
			
 
				-        } else {
			
 
				-            baseX = 2;
			
 
				-        }
			
 
				-    }
			
 
				-    for (int i = 0; i < 8; i++) {
			
 
				-        int x, y;
			
 
				-        if (flipped) {
			
 
				-            x = baseX + (i >> 1);
			
 
				-            y = baseY + (i & 1);
			
 
				-        } else {
			
 
				-            x = baseX + (i >> 2);
			
 
				-            y = baseY + (i & 3);
			
 
				-        }
			
 
				-        int k = y + (x * 4);
			
 
				-        int offset = ((low >> k) & 1) | ((low >> (k + 15)) & 2);
			
 
				-        int delta = table[offset];
			
 
				-        etc1_byte* q = pOut + 3 * (x + 4 * y);
			
 
				-        *q++ = clamp(r + delta);
			
 
				-        *q++ = clamp(g + delta);
			
 
				-        *q++ = clamp(b + delta);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// Input is an ETC1 compressed version of the data.
			
 
				-// Output is a 4 x 4 square of 3-byte pixels in form R, G, B
			
 
				-
			
 
				-void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut) {
			
 
				-    etc1_uint32 high = (pIn[0] << 24) | (pIn[1] << 16) | (pIn[2] << 8) | pIn[3];
			
 
				-    etc1_uint32 low = (pIn[4] << 24) | (pIn[5] << 16) | (pIn[6] << 8) | pIn[7];
			
 
				-    int r1, r2, g1, g2, b1, b2;
			
 
				-    if (high & 2) {
			
 
				-        // differential
			
 
				-        int rBase = high >> 27;
			
 
				-        int gBase = high >> 19;
			
 
				-        int bBase = high >> 11;
			
 
				-        r1 = convert5To8(rBase);
			
 
				-        r2 = convertDiff(rBase, high >> 24);
			
 
				-        g1 = convert5To8(gBase);
			
 
				-        g2 = convertDiff(gBase, high >> 16);
			
 
				-        b1 = convert5To8(bBase);
			
 
				-        b2 = convertDiff(bBase, high >> 8);
			
 
				-    } else {
			
 
				-        // not differential
			
 
				-        r1 = convert4To8(high >> 28);
			
 
				-        r2 = convert4To8(high >> 24);
			
 
				-        g1 = convert4To8(high >> 20);
			
 
				-        g2 = convert4To8(high >> 16);
			
 
				-        b1 = convert4To8(high >> 12);
			
 
				-        b2 = convert4To8(high >> 8);
			
 
				-    }
			
 
				-    int tableIndexA = 7 & (high >> 5);
			
 
				-    int tableIndexB = 7 & (high >> 2);
			
 
				-    const int* tableA = kModifierTable + tableIndexA * 4;
			
 
				-    const int* tableB = kModifierTable + tableIndexB * 4;
			
 
				-    bool flipped = (high & 1) != 0;
			
 
				-    decode_subblock(pOut, r1, g1, b1, tableA, low, false, flipped);
			
 
				-    decode_subblock(pOut, r2, g2, b2, tableB, low, true, flipped);
			
 
				-}
			
 
				-
			
 
				-typedef struct {
			
 
				-    etc1_uint32 high;
			
 
				-    etc1_uint32 low;
			
 
				-    etc1_uint32 score; // Lower is more accurate
			
 
				-} etc_compressed;
			
 
				-
			
 
				-static
			
 
				-inline void take_best(etc_compressed* a, const etc_compressed* b) {
			
 
				-    if (a->score > b->score) {
			
 
				-        *a = *b;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-void etc_average_colors_subblock(const etc1_byte* pIn, etc1_uint32 inMask,
			
 
				-        etc1_byte* pColors, bool flipped, bool second) {
			
 
				-    int r = 0;
			
 
				-    int g = 0;
			
 
				-    int b = 0;
			
 
				-
			
 
				-    if (flipped) {
			
 
				-        int by = 0;
			
 
				-        if (second) {
			
 
				-            by = 2;
			
 
				-        }
			
 
				-        for (int y = 0; y < 2; y++) {
			
 
				-            int yy = by + y;
			
 
				-            for (int x = 0; x < 4; x++) {
			
 
				-                int i = x + 4 * yy;
			
 
				-                if (inMask & (1 << i)) {
			
 
				-                    const etc1_byte* p = pIn + i * 3;
			
 
				-                    r += *(p++);
			
 
				-                    g += *(p++);
			
 
				-                    b += *(p++);
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    } else {
			
 
				-        int bx = 0;
			
 
				-        if (second) {
			
 
				-            bx = 2;
			
 
				-        }
			
 
				-        for (int y = 0; y < 4; y++) {
			
 
				-            for (int x = 0; x < 2; x++) {
			
 
				-                int xx = bx + x;
			
 
				-                int i = xx + 4 * y;
			
 
				-                if (inMask & (1 << i)) {
			
 
				-                    const etc1_byte* p = pIn + i * 3;
			
 
				-                    r += *(p++);
			
 
				-                    g += *(p++);
			
 
				-                    b += *(p++);
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-    pColors[0] = (etc1_byte)((r + 4) >> 3);
			
 
				-    pColors[1] = (etc1_byte)((g + 4) >> 3);
			
 
				-    pColors[2] = (etc1_byte)((b + 4) >> 3);
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-inline int square(int x) {
			
 
				-    return x * x;
			
 
				-}
			
 
				-
			
 
				-static etc1_uint32 chooseModifier(const etc1_byte* pBaseColors,
			
 
				-        const etc1_byte* pIn, etc1_uint32 *pLow, int bitIndex,
			
 
				-        const int* pModifierTable) {
			
 
				-    etc1_uint32 bestScore = ~0;
			
 
				-    int bestIndex = 0;
			
 
				-    int pixelR = pIn[0];
			
 
				-    int pixelG = pIn[1];
			
 
				-    int pixelB = pIn[2];
			
 
				-    int r = pBaseColors[0];
			
 
				-    int g = pBaseColors[1];
			
 
				-    int b = pBaseColors[2];
			
 
				-    for (int i = 0; i < 4; i++) {
			
 
				-        int modifier = pModifierTable[i];
			
 
				-        int decodedG = clamp(g + modifier);
			
 
				-        etc1_uint32 score = (etc1_uint32) (6 * square(decodedG - pixelG));
			
 
				-        if (score >= bestScore) {
			
 
				-            continue;
			
 
				-        }
			
 
				-        int decodedR = clamp(r + modifier);
			
 
				-        score += (etc1_uint32) (3 * square(decodedR - pixelR));
			
 
				-        if (score >= bestScore) {
			
 
				-            continue;
			
 
				-        }
			
 
				-        int decodedB = clamp(b + modifier);
			
 
				-        score += (etc1_uint32) square(decodedB - pixelB);
			
 
				-        if (score < bestScore) {
			
 
				-            bestScore = score;
			
 
				-            bestIndex = i;
			
 
				-        }
			
 
				-    }
			
 
				-    etc1_uint32 lowMask = (((bestIndex >> 1) << 16) | (bestIndex & 1))
			
 
				-            << bitIndex;
			
 
				-    *pLow |= lowMask;
			
 
				-    return bestScore;
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-void etc_encode_subblock_helper(const etc1_byte* pIn, etc1_uint32 inMask,
			
 
				-        etc_compressed* pCompressed, bool flipped, bool second,
			
 
				-        const etc1_byte* pBaseColors, const int* pModifierTable) {
			
 
				-    int score = pCompressed->score;
			
 
				-    if (flipped) {
			
 
				-        int by = 0;
			
 
				-        if (second) {
			
 
				-            by = 2;
			
 
				-        }
			
 
				-        for (int y = 0; y < 2; y++) {
			
 
				-            int yy = by + y;
			
 
				-            for (int x = 0; x < 4; x++) {
			
 
				-                int i = x + 4 * yy;
			
 
				-                if (inMask & (1 << i)) {
			
 
				-                    score += chooseModifier(pBaseColors, pIn + i * 3,
			
 
				-                            &pCompressed->low, yy + x * 4, pModifierTable);
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    } else {
			
 
				-        int bx = 0;
			
 
				-        if (second) {
			
 
				-            bx = 2;
			
 
				-        }
			
 
				-        for (int y = 0; y < 4; y++) {
			
 
				-            for (int x = 0; x < 2; x++) {
			
 
				-                int xx = bx + x;
			
 
				-                int i = xx + 4 * y;
			
 
				-                if (inMask & (1 << i)) {
			
 
				-                    score += chooseModifier(pBaseColors, pIn + i * 3,
			
 
				-                            &pCompressed->low, y + xx * 4, pModifierTable);
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-    pCompressed->score = score;
			
 
				-}
			
 
				-
			
 
				-static bool inRange4bitSigned(int color) {
			
 
				-    return color >= -4 && color <= 3;
			
 
				-}
			
 
				-
			
 
				-static void etc_encodeBaseColors(etc1_byte* pBaseColors,
			
 
				-        const etc1_byte* pColors, etc_compressed* pCompressed) {
			
 
				-    int r1, g1, b1, r2, g2, b2; // 8 bit base colors for sub-blocks
			
 
				-    bool differential;
			
 
				-    {
			
 
				-        int r51 = convert8To5(pColors[0]);
			
 
				-        int g51 = convert8To5(pColors[1]);
			
 
				-        int b51 = convert8To5(pColors[2]);
			
 
				-        int r52 = convert8To5(pColors[3]);
			
 
				-        int g52 = convert8To5(pColors[4]);
			
 
				-        int b52 = convert8To5(pColors[5]);
			
 
				-
			
 
				-        r1 = convert5To8(r51);
			
 
				-        g1 = convert5To8(g51);
			
 
				-        b1 = convert5To8(b51);
			
 
				-
			
 
				-        int dr = r52 - r51;
			
 
				-        int dg = g52 - g51;
			
 
				-        int db = b52 - b51;
			
 
				-
			
 
				-        differential = inRange4bitSigned(dr) && inRange4bitSigned(dg)
			
 
				-                && inRange4bitSigned(db);
			
 
				-        if (differential) {
			
 
				-            r2 = convert5To8(r51 + dr);
			
 
				-            g2 = convert5To8(g51 + dg);
			
 
				-            b2 = convert5To8(b51 + db);
			
 
				-            pCompressed->high |= (r51 << 27) | ((7 & dr) << 24) | (g51 << 19)
			
 
				-                    | ((7 & dg) << 16) | (b51 << 11) | ((7 & db) << 8) | 2;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    if (!differential) {
			
 
				-        int r41 = convert8To4(pColors[0]);
			
 
				-        int g41 = convert8To4(pColors[1]);
			
 
				-        int b41 = convert8To4(pColors[2]);
			
 
				-        int r42 = convert8To4(pColors[3]);
			
 
				-        int g42 = convert8To4(pColors[4]);
			
 
				-        int b42 = convert8To4(pColors[5]);
			
 
				-        r1 = convert4To8(r41);
			
 
				-        g1 = convert4To8(g41);
			
 
				-        b1 = convert4To8(b41);
			
 
				-        r2 = convert4To8(r42);
			
 
				-        g2 = convert4To8(g42);
			
 
				-        b2 = convert4To8(b42);
			
 
				-        pCompressed->high |= (r41 << 28) | (r42 << 24) | (g41 << 20) | (g42
			
 
				-                << 16) | (b41 << 12) | (b42 << 8);
			
 
				-    }
			
 
				-    pBaseColors[0] = r1;
			
 
				-    pBaseColors[1] = g1;
			
 
				-    pBaseColors[2] = b1;
			
 
				-    pBaseColors[3] = r2;
			
 
				-    pBaseColors[4] = g2;
			
 
				-    pBaseColors[5] = b2;
			
 
				-}
			
 
				-
			
 
				-static
			
 
				-void etc_encode_block_helper(const etc1_byte* pIn, etc1_uint32 inMask,
			
 
				-        const etc1_byte* pColors, etc_compressed* pCompressed, bool flipped) {
			
 
				-    pCompressed->score = ~0;
			
 
				-    pCompressed->high = (flipped ? 1 : 0);
			
 
				-    pCompressed->low = 0;
			
 
				-
			
 
				-    etc1_byte pBaseColors[6];
			
 
				-
			
 
				-    etc_encodeBaseColors(pBaseColors, pColors, pCompressed);
			
 
				-
			
 
				-    int originalHigh = pCompressed->high;
			
 
				-
			
 
				-    const int* pModifierTable = kModifierTable;
			
 
				-    for (int i = 0; i < 8; i++, pModifierTable += 4) {
			
 
				-        etc_compressed temp;
			
 
				-        temp.score = 0;
			
 
				-        temp.high = originalHigh | (i << 5);
			
 
				-        temp.low = 0;
			
 
				-        etc_encode_subblock_helper(pIn, inMask, &temp, flipped, false,
			
 
				-                pBaseColors, pModifierTable);
			
 
				-        take_best(pCompressed, &temp);
			
 
				-    }
			
 
				-    pModifierTable = kModifierTable;
			
 
				-    etc_compressed firstHalf = *pCompressed;
			
 
				-    for (int i = 0; i < 8; i++, pModifierTable += 4) {
			
 
				-        etc_compressed temp;
			
 
				-        temp.score = firstHalf.score;
			
 
				-        temp.high = firstHalf.high | (i << 2);
			
 
				-        temp.low = firstHalf.low;
			
 
				-        etc_encode_subblock_helper(pIn, inMask, &temp, flipped, true,
			
 
				-                pBaseColors + 3, pModifierTable);
			
 
				-        if (i == 0) {
			
 
				-            *pCompressed = temp;
			
 
				-        } else {
			
 
				-            take_best(pCompressed, &temp);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void writeBigEndian(etc1_byte* pOut, etc1_uint32 d) {
			
 
				-    pOut[0] = (etc1_byte)(d >> 24);
			
 
				-    pOut[1] = (etc1_byte)(d >> 16);
			
 
				-    pOut[2] = (etc1_byte)(d >> 8);
			
 
				-    pOut[3] = (etc1_byte) d;
			
 
				-}
			
 
				-
			
 
				-// Input is a 4 x 4 square of 3-byte pixels in form R, G, B
			
 
				-// inmask is a 16-bit mask where bit (1 << (x + y * 4)) tells whether the corresponding (x,y)
			
 
				-// pixel is valid or not. Invalid pixel color values are ignored when compressing.
			
 
				-// Output is an ETC1 compressed version of the data.
			
 
				-
			
 
				-void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 inMask,
			
 
				-        etc1_byte* pOut) {
			
 
				-    etc1_byte colors[6];
			
 
				-    etc1_byte flippedColors[6];
			
 
				-    etc_average_colors_subblock(pIn, inMask, colors, false, false);
			
 
				-    etc_average_colors_subblock(pIn, inMask, colors + 3, false, true);
			
 
				-    etc_average_colors_subblock(pIn, inMask, flippedColors, true, false);
			
 
				-    etc_average_colors_subblock(pIn, inMask, flippedColors + 3, true, true);
			
 
				-
			
 
				-    etc_compressed a, b;
			
 
				-    etc_encode_block_helper(pIn, inMask, colors, &a, false);
			
 
				-    etc_encode_block_helper(pIn, inMask, flippedColors, &b, true);
			
 
				-    take_best(&a, &b);
			
 
				-    writeBigEndian(pOut, a.high);
			
 
				-    writeBigEndian(pOut + 4, a.low);
			
 
				-}
			
 
				-
			
 
				-// Return the size of the encoded image data (does not include size of PKM header).
			
 
				-
			
 
				-etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height) {
			
 
				-    return (((width + 3) & ~3) * ((height + 3) & ~3)) >> 1;
			
 
				-}
			
 
				-
			
 
				-// Encode an entire image.
			
 
				-// pIn - pointer to the image data. Formatted such that the Red component of
			
 
				-//       pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset;
			
 
				-// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
			
 
				-
			
 
				-int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
			
 
				-        etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut) {
			
 
				-    if (pixelSize < 2 || pixelSize > 4) {
			
 
				-        return -1;
			
 
				-    }
			
 
				-    static const unsigned short kYMask[] = { 0x0, 0xf, 0xff, 0xfff, 0xffff };
			
 
				-    static const unsigned short kXMask[] = { 0x0, 0x1111, 0x3333, 0x7777,
			
 
				-            0xffff };
			
 
				-    etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
			
 
				-    etc1_byte encoded[ETC1_ENCODED_BLOCK_SIZE];
			
 
				-
			
 
				-    etc1_uint32 encodedWidth = (width + 3) & ~3;
			
 
				-    etc1_uint32 encodedHeight = (height + 3) & ~3;
			
 
				-
			
 
				-    for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
			
 
				-        etc1_uint32 yEnd = height - y;
			
 
				-        if (yEnd > 4) {
			
 
				-            yEnd = 4;
			
 
				-        }
			
 
				-        int ymask = kYMask[yEnd];
			
 
				-        for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
			
 
				-            etc1_uint32 xEnd = width - x;
			
 
				-            if (xEnd > 4) {
			
 
				-                xEnd = 4;
			
 
				-            }
			
 
				-            int mask = ymask & kXMask[xEnd];
			
 
				-            for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
			
 
				-                etc1_byte* q = block + (cy * 4) * 3;
			
 
				-                const etc1_byte* p = pIn + pixelSize * x + stride * (y + cy);
			
 
				-                if (pixelSize >= 3) {
			
 
				-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
			
 
				-                        memcpy(q, p, 3);
			
 
				-                        q += 3;
			
 
				-                        p += pixelSize;
			
 
				-                    }
			
 
				-                } else {
			
 
				-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
			
 
				-                        int pixel = (p[1] << 8) | p[0];
			
 
				-                        *q++ = convert5To8(pixel >> 11);
			
 
				-                        *q++ = convert6To8(pixel >> 5);
			
 
				-                        *q++ = convert5To8(pixel);
			
 
				-                        p += pixelSize;
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-            etc1_encode_block(block, mask, encoded);
			
 
				-            memcpy(pOut, encoded, sizeof(encoded));
			
 
				-            pOut += sizeof(encoded);
			
 
				-        }
			
 
				-    }
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				-// Decode an entire image.
			
 
				-// pIn - pointer to encoded data.
			
 
				-// pOut - pointer to the image data. Will be written such that the Red component of
			
 
				-//       pixel (x,y) is at pIn + pixelSize * x + stride * y + redOffset. Must be
			
 
				-//        large enough to store entire image.
			
 
				-
			
 
				-
			
 
				-int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
			
 
				-        etc1_uint32 width, etc1_uint32 height,
			
 
				-        etc1_uint32 pixelSize, etc1_uint32 stride) {
			
 
				-    if (pixelSize < 2 || pixelSize > 4) {
			
 
				-        return -1;
			
 
				-    }
			
 
				-    etc1_byte block[ETC1_DECODED_BLOCK_SIZE];
			
 
				-
			
 
				-    etc1_uint32 encodedWidth = (width + 3) & ~3;
			
 
				-    etc1_uint32 encodedHeight = (height + 3) & ~3;
			
 
				-
			
 
				-    for (etc1_uint32 y = 0; y < encodedHeight; y += 4) {
			
 
				-        etc1_uint32 yEnd = height - y;
			
 
				-        if (yEnd > 4) {
			
 
				-            yEnd = 4;
			
 
				-        }
			
 
				-        for (etc1_uint32 x = 0; x < encodedWidth; x += 4) {
			
 
				-            etc1_uint32 xEnd = width - x;
			
 
				-            if (xEnd > 4) {
			
 
				-                xEnd = 4;
			
 
				-            }
			
 
				-            etc1_decode_block(pIn, block);
			
 
				-            pIn += ETC1_ENCODED_BLOCK_SIZE;
			
 
				-            for (etc1_uint32 cy = 0; cy < yEnd; cy++) {
			
 
				-                const etc1_byte* q = block + (cy * 4) * 3;
			
 
				-                etc1_byte* p = pOut + pixelSize * x + stride * (y + cy);
			
 
				-                if (pixelSize >= 3) {
			
 
				-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
			
 
				-                        memcpy(p, q, 3);
			
 
				-                        q += 3;
			
 
				-                        p += pixelSize;
			
 
				-                    }
			
 
				-                } else {
			
 
				-                    for (etc1_uint32 cx = 0; cx < xEnd; cx++) {
			
 
				-                        etc1_byte r = *q++;
			
 
				-                        etc1_byte g = *q++;
			
 
				-                        etc1_byte b = *q++;
			
 
				-                        etc1_uint32 pixel = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
			
 
				-                        *p++ = (etc1_byte) pixel;
			
 
				-                        *p++ = (etc1_byte) (pixel >> 8);
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				-static const char kMagic[] = { 'P', 'K', 'M', ' ', '1', '0' };
			
 
				-
			
 
				-static const etc1_uint32 ETC1_PKM_FORMAT_OFFSET = 6;
			
 
				-static const etc1_uint32 ETC1_PKM_ENCODED_WIDTH_OFFSET = 8;
			
 
				-static const etc1_uint32 ETC1_PKM_ENCODED_HEIGHT_OFFSET = 10;
			
 
				-static const etc1_uint32 ETC1_PKM_WIDTH_OFFSET = 12;
			
 
				-static const etc1_uint32 ETC1_PKM_HEIGHT_OFFSET = 14;
			
 
				-
			
 
				-static const etc1_uint32 ETC1_RGB_NO_MIPMAPS = 0;
			
 
				-
			
 
				-static void writeBEUint16(etc1_byte* pOut, etc1_uint32 data) {
			
 
				-    pOut[0] = (etc1_byte) (data >> 8);
			
 
				-    pOut[1] = (etc1_byte) data;
			
 
				-}
			
 
				-
			
 
				-static etc1_uint32 readBEUint16(const etc1_byte* pIn) {
			
 
				-    return (pIn[0] << 8) | pIn[1];
			
 
				-}
			
 
				-
			
 
				-// Format a PKM header
			
 
				-
			
 
				-void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height) {
			
 
				-    memcpy(pHeader, kMagic, sizeof(kMagic));
			
 
				-    etc1_uint32 encodedWidth = (width + 3) & ~3;
			
 
				-    etc1_uint32 encodedHeight = (height + 3) & ~3;
			
 
				-    writeBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET, ETC1_RGB_NO_MIPMAPS);
			
 
				-    writeBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET, encodedWidth);
			
 
				-    writeBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET, encodedHeight);
			
 
				-    writeBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET, width);
			
 
				-    writeBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET, height);
			
 
				-}
			
 
				-
			
 
				-// Check if a PKM header is correctly formatted.
			
 
				-
			
 
				-etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader) {
			
 
				-    if (memcmp(pHeader, kMagic, sizeof(kMagic))) {
			
 
				-        return false;
			
 
				-    }
			
 
				-    etc1_uint32 format = readBEUint16(pHeader + ETC1_PKM_FORMAT_OFFSET);
			
 
				-    etc1_uint32 encodedWidth = readBEUint16(pHeader + ETC1_PKM_ENCODED_WIDTH_OFFSET);
			
 
				-    etc1_uint32 encodedHeight = readBEUint16(pHeader + ETC1_PKM_ENCODED_HEIGHT_OFFSET);
			
 
				-    etc1_uint32 width = readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
			
 
				-    etc1_uint32 height = readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
			
 
				-    return format == ETC1_RGB_NO_MIPMAPS &&
			
 
				-            encodedWidth >= width && encodedWidth - width < 4 &&
			
 
				-            encodedHeight >= height && encodedHeight - height < 4;
			
 
				-}
			
 
				-
			
 
				-// Read the image width from a PKM header
			
 
				-
			
 
				-etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader) {
			
 
				-    return readBEUint16(pHeader + ETC1_PKM_WIDTH_OFFSET);
			
 
				-}
			
 
				-
			
 
				-// Read the image height from a PKM header
			
 
				-
			
 
				-etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader){
			
 
				-    return readBEUint16(pHeader + ETC1_PKM_HEIGHT_OFFSET);
			
 
				-}
			
--- a/3rdparty/etc1/etc1.h
+++ b/3rdparty/etc1/etc1.h
@@ -1,114 +0,0 @@
 
				-// Copyright 2009 Google Inc.
			
 
				-//
			
 
				-// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-// you may not use this file except in compliance with the License.
			
 
				-// You may obtain a copy of the License at
			
 
				-//
			
 
				-//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-//
			
 
				-// Unless required by applicable law or agreed to in writing, software
			
 
				-// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-// See the License for the specific language governing permissions and
			
 
				-// limitations under the License.
			
 
				-
			
 
				-//////////////////////////////////////////////////////////////////////////////////////////
			
 
				-
			
 
				-// This is a fork of the AOSP project ETC1 codec. The original code can be found
			
 
				-// at the following web site:
			
 
				-// https://android.googlesource.com/platform/frameworks/native/+/master/opengl/libs/ETC1/
			
 
				-
			
 
				-//////////////////////////////////////////////////////////////////////////////////////////
			
 
				-
			
 
				-#ifndef __etc1_h__
			
 
				-#define __etc1_h__
			
 
				-
			
 
				-#define ETC1_ENCODED_BLOCK_SIZE 8
			
 
				-#define ETC1_DECODED_BLOCK_SIZE 48
			
 
				-
			
 
				-#ifndef ETC1_RGB8_OES
			
 
				-#define ETC1_RGB8_OES 0x8D64
			
 
				-#endif
			
 
				-
			
 
				-typedef unsigned char etc1_byte;
			
 
				-typedef int etc1_bool;
			
 
				-typedef unsigned int etc1_uint32;
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// Encode a block of pixels.
			
 
				-//
			
 
				-// pIn is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
			
 
				-// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
			
 
				-// value of pixel (x, y).
			
 
				-//
			
 
				-// validPixelMask is a 16-bit mask where bit (1 << (x + y * 4)) indicates whether
			
 
				-// the corresponding (x,y) pixel is valid. Invalid pixel color values are ignored when compressing.
			
 
				-//
			
 
				-// pOut is an ETC1 compressed version of the data.
			
 
				-
			
 
				-void etc1_encode_block(const etc1_byte* pIn, etc1_uint32 validPixelMask, etc1_byte* pOut);
			
 
				-
			
 
				-// Decode a block of pixels.
			
 
				-//
			
 
				-// pIn is an ETC1 compressed version of the data.
			
 
				-//
			
 
				-// pOut is a pointer to a ETC_DECODED_BLOCK_SIZE array of bytes that represent a
			
 
				-// 4 x 4 square of 3-byte pixels in form R, G, B. Byte (3 * (x + 4 * y) is the R
			
 
				-// value of pixel (x, y).
			
 
				-
			
 
				-void etc1_decode_block(const etc1_byte* pIn, etc1_byte* pOut);
			
 
				-
			
 
				-// Return the size of the encoded image data (does not include size of PKM header).
			
 
				-
			
 
				-etc1_uint32 etc1_get_encoded_data_size(etc1_uint32 width, etc1_uint32 height);
			
 
				-
			
 
				-// Encode an entire image.
			
 
				-// pIn - pointer to the image data. Formatted such that
			
 
				-//       pixel (x,y) is at pIn + pixelSize * x + stride * y;
			
 
				-// pOut - pointer to encoded data. Must be large enough to store entire encoded image.
			
 
				-// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
			
 
				-// returns non-zero if there is an error.
			
 
				-
			
 
				-int etc1_encode_image(const etc1_byte* pIn, etc1_uint32 width, etc1_uint32 height,
			
 
				-        etc1_uint32 pixelSize, etc1_uint32 stride, etc1_byte* pOut);
			
 
				-
			
 
				-// Decode an entire image.
			
 
				-// pIn - pointer to encoded data.
			
 
				-// pOut - pointer to the image data. Will be written such that
			
 
				-//        pixel (x,y) is at pIn + pixelSize * x + stride * y. Must be
			
 
				-//        large enough to store entire image.
			
 
				-// pixelSize can be 2 or 3. 2 is an GL_UNSIGNED_SHORT_5_6_5 image, 3 is a GL_BYTE RGB image.
			
 
				-// returns non-zero if there is an error.
			
 
				-
			
 
				-int etc1_decode_image(const etc1_byte* pIn, etc1_byte* pOut,
			
 
				-        etc1_uint32 width, etc1_uint32 height,
			
 
				-        etc1_uint32 pixelSize, etc1_uint32 stride);
			
 
				-
			
 
				-// Size of a PKM header, in bytes.
			
 
				-
			
 
				-#define ETC_PKM_HEADER_SIZE 16
			
 
				-
			
 
				-// Format a PKM header
			
 
				-
			
 
				-void etc1_pkm_format_header(etc1_byte* pHeader, etc1_uint32 width, etc1_uint32 height);
			
 
				-
			
 
				-// Check if a PKM header is correctly formatted.
			
 
				-
			
 
				-etc1_bool etc1_pkm_is_valid(const etc1_byte* pHeader);
			
 
				-
			
 
				-// Read the image width from a PKM header
			
 
				-
			
 
				-etc1_uint32 etc1_pkm_get_width(const etc1_byte* pHeader);
			
 
				-
			
 
				-// Read the image height from a PKM header
			
 
				-
			
 
				-etc1_uint32 etc1_pkm_get_height(const etc1_byte* pHeader);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/etc2/LICENSE.txt
+++ b/3rdparty/etc2/LICENSE.txt
@@ -1,24 +0,0 @@
 
				-Copyright (c) 2013, Bartosz Taudul <[email protected]>
			
 
				-All rights reserved.
			
 
				-
			
 
				-Redistribution and use in source and binary forms, with or without
			
 
				-modification, are permitted provided that the following conditions are met:
			
 
				-    * Redistributions of source code must retain the above copyright
			
 
				-      notice, this list of conditions and the following disclaimer.
			
 
				-    * Redistributions in binary form must reproduce the above copyright
			
 
				-      notice, this list of conditions and the following disclaimer in the
			
 
				-      documentation and/or other materials provided with the distribution.
			
 
				-    * Neither the name of the <organization> nor the
			
 
				-      names of its contributors may be used to endorse or promote products
			
 
				-      derived from this software without specific prior written permission.
			
 
				-
			
 
				-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
			
 
				-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
			
 
				-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				-DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
			
 
				-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
			
 
				-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
			
 
				-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
			
 
				-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
			
 
				-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
--- a/3rdparty/etc2/Math.hpp
+++ b/3rdparty/etc2/Math.hpp
@@ -1,90 +0,0 @@
 
				-#ifndef __DARKRL__MATH_HPP__
			
 
				-#define __DARKRL__MATH_HPP__
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <math.h>
			
 
				-
			
 
				-#include "Types.hpp"
			
 
				-
			
 
				-template<typename T>
			
 
				-inline T AlignPOT( T val )
			
 
				-{
			
 
				-    if( val == 0 ) return 1;
			
 
				-    val--;
			
 
				-    for( unsigned int i=1; i<sizeof( T ) * 8; i <<= 1 )
			
 
				-    {
			
 
				-        val |= val >> i;
			
 
				-    }
			
 
				-    return val + 1;
			
 
				-}
			
 
				-
			
 
				-inline int CountSetBits( uint32 val )
			
 
				-{
			
 
				-    val -= ( val >> 1 ) & 0x55555555;
			
 
				-    val = ( ( val >> 2 ) & 0x33333333 ) + ( val & 0x33333333 );
			
 
				-    val = ( ( val >> 4 ) + val ) & 0x0f0f0f0f;
			
 
				-    val += val >> 8;
			
 
				-    val += val >> 16;
			
 
				-    return val & 0x0000003f;
			
 
				-}
			
 
				-
			
 
				-inline int CountLeadingZeros( uint32 val )
			
 
				-{
			
 
				-    val |= val >> 1;
			
 
				-    val |= val >> 2;
			
 
				-    val |= val >> 4;
			
 
				-    val |= val >> 8;
			
 
				-    val |= val >> 16;
			
 
				-    return 32 - CountSetBits( val );
			
 
				-}
			
 
				-
			
 
				-inline float sRGB2linear( float v )
			
 
				-{
			
 
				-    const float a = 0.055f;
			
 
				-    if( v <= 0.04045f )
			
 
				-    {
			
 
				-        return v / 12.92f;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        return powf( ( v + a ) / ( 1 + a ), 2.4f );
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-inline float linear2sRGB( float v )
			
 
				-{
			
 
				-    const float a = 0.055f;
			
 
				-    if( v <= 0.0031308f )
			
 
				-    {
			
 
				-        return 12.92f * v;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        return ( 1 + a ) * pow( v, 1/2.4f ) - a;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-inline T SmoothStep( T x )
			
 
				-{
			
 
				-    return x*x*(3-2*x);
			
 
				-}
			
 
				-
			
 
				-inline uint8 clampu8( int32 val )
			
 
				-{
			
 
				-    return std::min( std::max( 0, val ), 255 );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-inline T sq( T val )
			
 
				-{
			
 
				-    return val * val;
			
 
				-}
			
 
				-
			
 
				-static inline int mul8bit( int a, int b )
			
 
				-{
			
 
				-    int t = a*b + 128;
			
 
				-    return ( t + ( t >> 8 ) ) >> 8;
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/etc2/ProcessCommon.hpp
+++ b/3rdparty/etc2/ProcessCommon.hpp
@@ -1,51 +0,0 @@
 
				-#ifndef __PROCESSCOMMON_HPP__
			
 
				-#define __PROCESSCOMMON_HPP__
			
 
				-
			
 
				-#include <assert.h>
			
 
				-#include <stddef.h>
			
 
				-
			
 
				-#include "Types.hpp"
			
 
				-
			
 
				-template<class T>
			
 
				-static size_t GetLeastError( const T* err, size_t num )
			
 
				-{
			
 
				-    size_t idx = 0;
			
 
				-    for( size_t i=1; i<num; i++ )
			
 
				-    {
			
 
				-        if( err[i] < err[idx] )
			
 
				-        {
			
 
				-            idx = i;
			
 
				-        }
			
 
				-    }
			
 
				-    return idx;
			
 
				-}
			
 
				-
			
 
				-static uint64 FixByteOrder( uint64 d )
			
 
				-{
			
 
				-    return ( ( d & 0x00000000FFFFFFFF ) ) |
			
 
				-           ( ( d & 0xFF00000000000000 ) >> 24 ) |
			
 
				-           ( ( d & 0x000000FF00000000 ) << 24 ) |
			
 
				-           ( ( d & 0x00FF000000000000 ) >> 8 ) |
			
 
				-           ( ( d & 0x0000FF0000000000 ) << 8 );
			
 
				-}
			
 
				-
			
 
				-template<class T, class S>
			
 
				-static uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id )
			
 
				-{
			
 
				-    size_t tidx[2];
			
 
				-    tidx[0] = GetLeastError( terr[0], 8 );
			
 
				-    tidx[1] = GetLeastError( terr[1], 8 );
			
 
				-
			
 
				-    d |= tidx[0] << 26;
			
 
				-    d |= tidx[1] << 29;
			
 
				-    for( int i=0; i<16; i++ )
			
 
				-    {
			
 
				-        uint64 t = tsel[i][tidx[id[i]%2]];
			
 
				-        d |= ( t & 0x1 ) << ( i + 32 );
			
 
				-        d |= ( t & 0x2 ) << ( i + 47 );
			
 
				-    }
			
 
				-
			
 
				-    return d;
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/etc2/ProcessRGB.cpp
+++ b/3rdparty/etc2/ProcessRGB.cpp
@@ -1,719 +0,0 @@
 
				-#include <string.h>
			
 
				-
			
 
				-#include "Math.hpp"
			
 
				-#include "ProcessCommon.hpp"
			
 
				-#include "ProcessRGB.hpp"
			
 
				-#include "Tables.hpp"
			
 
				-#include "Types.hpp"
			
 
				-#include "Vector.hpp"
			
 
				-
			
 
				-#include <bx/endian.h>
			
 
				-
			
 
				-#ifdef __SSE4_1__
			
 
				-#  ifdef _MSC_VER
			
 
				-#    include <intrin.h>
			
 
				-#    include <Windows.h>
			
 
				-#  else
			
 
				-#    include <x86intrin.h>
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-namespace
			
 
				-{
			
 
				-
			
 
				-typedef uint16 v4i[4];
			
 
				-
			
 
				-void Average( const uint8* data, v4i* a )
			
 
				-{
			
 
				-#ifdef __SSE4_1__
			
 
				-    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
			
 
				-    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
			
 
				-    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
			
 
				-    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
			
 
				-
			
 
				-    __m128i d0l = _mm_unpacklo_epi8(d0, _mm_setzero_si128());
			
 
				-    __m128i d0h = _mm_unpackhi_epi8(d0, _mm_setzero_si128());
			
 
				-    __m128i d1l = _mm_unpacklo_epi8(d1, _mm_setzero_si128());
			
 
				-    __m128i d1h = _mm_unpackhi_epi8(d1, _mm_setzero_si128());
			
 
				-    __m128i d2l = _mm_unpacklo_epi8(d2, _mm_setzero_si128());
			
 
				-    __m128i d2h = _mm_unpackhi_epi8(d2, _mm_setzero_si128());
			
 
				-    __m128i d3l = _mm_unpacklo_epi8(d3, _mm_setzero_si128());
			
 
				-    __m128i d3h = _mm_unpackhi_epi8(d3, _mm_setzero_si128());
			
 
				-
			
 
				-    __m128i sum0 = _mm_add_epi16(d0l, d1l);
			
 
				-    __m128i sum1 = _mm_add_epi16(d0h, d1h);
			
 
				-    __m128i sum2 = _mm_add_epi16(d2l, d3l);
			
 
				-    __m128i sum3 = _mm_add_epi16(d2h, d3h);
			
 
				-
			
 
				-    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
			
 
				-    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
			
 
				-    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
			
 
				-    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
			
 
				-    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
			
 
				-    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
			
 
				-    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
			
 
				-    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
			
 
				-
			
 
				-    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
			
 
				-    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
			
 
				-    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
			
 
				-    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
			
 
				-
			
 
				-    __m128i a0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b2, b3), _mm_set1_epi32(4)), 3);
			
 
				-    __m128i a1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b1), _mm_set1_epi32(4)), 3);
			
 
				-    __m128i a2 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b1, b3), _mm_set1_epi32(4)), 3);
			
 
				-    __m128i a3 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(b0, b2), _mm_set1_epi32(4)), 3);
			
 
				-
			
 
				-    _mm_storeu_si128((__m128i*)&a[0], _mm_packus_epi32(_mm_shuffle_epi32(a0, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a1, _MM_SHUFFLE(3, 0, 1, 2))));
			
 
				-    _mm_storeu_si128((__m128i*)&a[2], _mm_packus_epi32(_mm_shuffle_epi32(a2, _MM_SHUFFLE(3, 0, 1, 2)), _mm_shuffle_epi32(a3, _MM_SHUFFLE(3, 0, 1, 2))));
			
 
				-#else
			
 
				-    uint32 r[4];
			
 
				-    uint32 g[4];
			
 
				-    uint32 b[4];
			
 
				-
			
 
				-    memset(r, 0, sizeof(r));
			
 
				-    memset(g, 0, sizeof(g));
			
 
				-    memset(b, 0, sizeof(b));
			
 
				-
			
 
				-    for( int j=0; j<4; j++ )
			
 
				-    {
			
 
				-        for( int i=0; i<4; i++ )
			
 
				-        {
			
 
				-            int index = (j & 2) + (i >> 1);
			
 
				-            b[index] += *data++;
			
 
				-            g[index] += *data++;
			
 
				-            r[index] += *data++;
			
 
				-            data++;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    a[0][0] = uint16( (r[2] + r[3] + 4) / 8 );
			
 
				-    a[0][1] = uint16( (g[2] + g[3] + 4) / 8 );
			
 
				-    a[0][2] = uint16( (b[2] + b[3] + 4) / 8 );
			
 
				-    a[0][3] = 0;
			
 
				-    a[1][0] = uint16( (r[0] + r[1] + 4) / 8 );
			
 
				-    a[1][1] = uint16( (g[0] + g[1] + 4) / 8 );
			
 
				-    a[1][2] = uint16( (b[0] + b[1] + 4) / 8 );
			
 
				-    a[1][3] = 0;
			
 
				-    a[2][0] = uint16( (r[1] + r[3] + 4) / 8 );
			
 
				-    a[2][1] = uint16( (g[1] + g[3] + 4) / 8 );
			
 
				-    a[2][2] = uint16( (b[1] + b[3] + 4) / 8 );
			
 
				-    a[2][3] = 0;
			
 
				-    a[3][0] = uint16( (r[0] + r[2] + 4) / 8 );
			
 
				-    a[3][1] = uint16( (g[0] + g[2] + 4) / 8 );
			
 
				-    a[3][2] = uint16( (b[0] + b[2] + 4) / 8 );
			
 
				-    a[3][3] = 0;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void CalcErrorBlock( const uint8* data, uint err[4][4] )
			
 
				-{
			
 
				-#ifdef __SSE4_1__
			
 
				-    __m128i d0 = _mm_loadu_si128(((__m128i*)data) + 0);
			
 
				-    __m128i d1 = _mm_loadu_si128(((__m128i*)data) + 1);
			
 
				-    __m128i d2 = _mm_loadu_si128(((__m128i*)data) + 2);
			
 
				-    __m128i d3 = _mm_loadu_si128(((__m128i*)data) + 3);
			
 
				-
			
 
				-    __m128i dm0 = _mm_and_si128(d0, _mm_set1_epi32(0x00FFFFFF));
			
 
				-    __m128i dm1 = _mm_and_si128(d1, _mm_set1_epi32(0x00FFFFFF));
			
 
				-    __m128i dm2 = _mm_and_si128(d2, _mm_set1_epi32(0x00FFFFFF));
			
 
				-    __m128i dm3 = _mm_and_si128(d3, _mm_set1_epi32(0x00FFFFFF));
			
 
				-
			
 
				-    __m128i d0l = _mm_unpacklo_epi8(dm0, _mm_setzero_si128());
			
 
				-    __m128i d0h = _mm_unpackhi_epi8(dm0, _mm_setzero_si128());
			
 
				-    __m128i d1l = _mm_unpacklo_epi8(dm1, _mm_setzero_si128());
			
 
				-    __m128i d1h = _mm_unpackhi_epi8(dm1, _mm_setzero_si128());
			
 
				-    __m128i d2l = _mm_unpacklo_epi8(dm2, _mm_setzero_si128());
			
 
				-    __m128i d2h = _mm_unpackhi_epi8(dm2, _mm_setzero_si128());
			
 
				-    __m128i d3l = _mm_unpacklo_epi8(dm3, _mm_setzero_si128());
			
 
				-    __m128i d3h = _mm_unpackhi_epi8(dm3, _mm_setzero_si128());
			
 
				-
			
 
				-    __m128i sum0 = _mm_add_epi16(d0l, d1l);
			
 
				-    __m128i sum1 = _mm_add_epi16(d0h, d1h);
			
 
				-    __m128i sum2 = _mm_add_epi16(d2l, d3l);
			
 
				-    __m128i sum3 = _mm_add_epi16(d2h, d3h);
			
 
				-
			
 
				-    __m128i sum0l = _mm_unpacklo_epi16(sum0, _mm_setzero_si128());
			
 
				-    __m128i sum0h = _mm_unpackhi_epi16(sum0, _mm_setzero_si128());
			
 
				-    __m128i sum1l = _mm_unpacklo_epi16(sum1, _mm_setzero_si128());
			
 
				-    __m128i sum1h = _mm_unpackhi_epi16(sum1, _mm_setzero_si128());
			
 
				-    __m128i sum2l = _mm_unpacklo_epi16(sum2, _mm_setzero_si128());
			
 
				-    __m128i sum2h = _mm_unpackhi_epi16(sum2, _mm_setzero_si128());
			
 
				-    __m128i sum3l = _mm_unpacklo_epi16(sum3, _mm_setzero_si128());
			
 
				-    __m128i sum3h = _mm_unpackhi_epi16(sum3, _mm_setzero_si128());
			
 
				-
			
 
				-    __m128i b0 = _mm_add_epi32(sum0l, sum0h);
			
 
				-    __m128i b1 = _mm_add_epi32(sum1l, sum1h);
			
 
				-    __m128i b2 = _mm_add_epi32(sum2l, sum2h);
			
 
				-    __m128i b3 = _mm_add_epi32(sum3l, sum3h);
			
 
				-
			
 
				-    __m128i a0 = _mm_add_epi32(b2, b3);
			
 
				-    __m128i a1 = _mm_add_epi32(b0, b1);
			
 
				-    __m128i a2 = _mm_add_epi32(b1, b3);
			
 
				-    __m128i a3 = _mm_add_epi32(b0, b2);
			
 
				-
			
 
				-    _mm_storeu_si128((__m128i*)&err[0], a0);
			
 
				-    _mm_storeu_si128((__m128i*)&err[1], a1);
			
 
				-    _mm_storeu_si128((__m128i*)&err[2], a2);
			
 
				-    _mm_storeu_si128((__m128i*)&err[3], a3);
			
 
				-#else
			
 
				-    uint terr[4][4];
			
 
				-
			
 
				-    memset(terr, 0, 16 * sizeof(uint));
			
 
				-
			
 
				-    for( int j=0; j<4; j++ )
			
 
				-    {
			
 
				-        for( int i=0; i<4; i++ )
			
 
				-        {
			
 
				-            int index = (j & 2) + (i >> 1);
			
 
				-            uint d = *data++;
			
 
				-            terr[index][0] += d;
			
 
				-            d = *data++;
			
 
				-            terr[index][1] += d;
			
 
				-            d = *data++;
			
 
				-            terr[index][2] += d;
			
 
				-            data++;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    for( int i=0; i<3; i++ )
			
 
				-    {
			
 
				-        err[0][i] = terr[2][i] + terr[3][i];
			
 
				-        err[1][i] = terr[0][i] + terr[1][i];
			
 
				-        err[2][i] = terr[1][i] + terr[3][i];
			
 
				-        err[3][i] = terr[0][i] + terr[2][i];
			
 
				-    }
			
 
				-    for( int i=0; i<4; i++ )
			
 
				-    {
			
 
				-        err[i][3] = 0;
			
 
				-    }
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-uint CalcError( const uint block[4], const v4i& average )
			
 
				-{
			
 
				-    uint err = 0x3FFFFFFF; // Big value to prevent negative values, but small enough to prevent overflow
			
 
				-    err -= block[0] * 2 * average[2];
			
 
				-    err -= block[1] * 2 * average[1];
			
 
				-    err -= block[2] * 2 * average[0];
			
 
				-    err += 8 * ( sq( average[0] ) + sq( average[1] ) + sq( average[2] ) );
			
 
				-    return err;
			
 
				-}
			
 
				-
			
 
				-void ProcessAverages( v4i* a )
			
 
				-{
			
 
				-#ifdef __SSE4_1__
			
 
				-    for( int i=0; i<2; i++ )
			
 
				-    {
			
 
				-        __m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
			
 
				-
			
 
				-        __m128i t = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(31)), _mm_set1_epi16(128));
			
 
				-
			
 
				-        __m128i c = _mm_srli_epi16(_mm_add_epi16(t, _mm_srli_epi16(t, 8)), 8);
			
 
				-
			
 
				-        __m128i c1 = _mm_shuffle_epi32(c, _MM_SHUFFLE(3, 2, 3, 2));
			
 
				-        __m128i diff = _mm_sub_epi16(c, c1);
			
 
				-        diff = _mm_max_epi16(diff, _mm_set1_epi16(-4));
			
 
				-        diff = _mm_min_epi16(diff, _mm_set1_epi16(3));
			
 
				-
			
 
				-        __m128i co = _mm_add_epi16(c1, diff);
			
 
				-
			
 
				-        c = _mm_blend_epi16(co, c, 0xF0);
			
 
				-
			
 
				-        __m128i a0 = _mm_or_si128(_mm_slli_epi16(c, 3), _mm_srli_epi16(c, 2));
			
 
				-
			
 
				-        _mm_storeu_si128((__m128i*)a[4+i*2], a0);
			
 
				-    }
			
 
				-
			
 
				-    for( int i=0; i<2; i++ )
			
 
				-    {
			
 
				-        __m128i d = _mm_loadu_si128((__m128i*)a[i*2]);
			
 
				-
			
 
				-        __m128i t0 = _mm_add_epi16(_mm_mullo_epi16(d, _mm_set1_epi16(15)), _mm_set1_epi16(128));
			
 
				-        __m128i t1 = _mm_srli_epi16(_mm_add_epi16(t0, _mm_srli_epi16(t0, 8)), 8);
			
 
				-
			
 
				-        __m128i t2 = _mm_or_si128(t1, _mm_slli_epi16(t1, 4));
			
 
				-
			
 
				-        _mm_storeu_si128((__m128i*)a[i*2], t2);
			
 
				-    }
			
 
				-#else
			
 
				-    for( int i=0; i<2; i++ )
			
 
				-    {
			
 
				-        for( int j=0; j<3; j++ )
			
 
				-        {
			
 
				-            int32 c1 = mul8bit( a[i*2+1][j], 31 );
			
 
				-            int32 c2 = mul8bit( a[i*2][j], 31 );
			
 
				-
			
 
				-            int32 diff = c2 - c1;
			
 
				-            if( diff > 3 ) diff = 3;
			
 
				-            else if( diff < -4 ) diff = -4;
			
 
				-
			
 
				-            int32 co = c1 + diff;
			
 
				-
			
 
				-            a[5+i*2][j] = ( c1 << 3 ) | ( c1 >> 2 );
			
 
				-            a[4+i*2][j] = ( co << 3 ) | ( co >> 2 );
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    for( int i=0; i<4; i++ )
			
 
				-    {
			
 
				-        a[i][0] = g_avg2[mul8bit( a[i][0], 15 )];
			
 
				-        a[i][1] = g_avg2[mul8bit( a[i][1], 15 )];
			
 
				-        a[i][2] = g_avg2[mul8bit( a[i][2], 15 )];
			
 
				-    }
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-void EncodeAverages( uint64& _d, const v4i* a, size_t idx )
			
 
				-{
			
 
				-    uint64 d = _d;
			
 
				-    d |= ( idx << 24 );
			
 
				-    size_t base = idx << 1;
			
 
				-
			
 
				-    if( ( idx & 0x2 ) == 0 )
			
 
				-    {
			
 
				-        for( int i=0; i<3; i++ )
			
 
				-        {
			
 
				-            d |= uint64( a[base+0][i] >> 4 ) << ( i*8 );
			
 
				-            d |= uint64( a[base+1][i] >> 4 ) << ( i*8 + 4 );
			
 
				-        }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        for( int i=0; i<3; i++ )
			
 
				-        {
			
 
				-            d |= uint64( a[base+1][i] & 0xF8 ) << ( i*8 );
			
 
				-            int32 c = ( ( a[base+0][i] & 0xF8 ) - ( a[base+1][i] & 0xF8 ) ) >> 3;
			
 
				-            c &= ~0xFFFFFFF8;
			
 
				-            d |= ((uint64)c) << ( i*8 );
			
 
				-        }
			
 
				-    }
			
 
				-    _d = d;
			
 
				-}
			
 
				-
			
 
				-uint64 CheckSolid( const uint8* src )
			
 
				-{
			
 
				-#ifdef __SSE4_1__
			
 
				-    __m128i d0 = _mm_loadu_si128(((__m128i*)src) + 0);
			
 
				-    __m128i d1 = _mm_loadu_si128(((__m128i*)src) + 1);
			
 
				-    __m128i d2 = _mm_loadu_si128(((__m128i*)src) + 2);
			
 
				-    __m128i d3 = _mm_loadu_si128(((__m128i*)src) + 3);
			
 
				-
			
 
				-    __m128i c = _mm_shuffle_epi32(d0, _MM_SHUFFLE(0, 0, 0, 0));
			
 
				-
			
 
				-    __m128i c0 = _mm_cmpeq_epi8(d0, c);
			
 
				-    __m128i c1 = _mm_cmpeq_epi8(d1, c);
			
 
				-    __m128i c2 = _mm_cmpeq_epi8(d2, c);
			
 
				-    __m128i c3 = _mm_cmpeq_epi8(d3, c);
			
 
				-
			
 
				-    __m128i m0 = _mm_and_si128(c0, c1);
			
 
				-    __m128i m1 = _mm_and_si128(c2, c3);
			
 
				-    __m128i m = _mm_and_si128(m0, m1);
			
 
				-
			
 
				-    if (!_mm_testc_si128(m, _mm_set1_epi32(-1)))
			
 
				-    {
			
 
				-        return 0;
			
 
				-    }
			
 
				-#else
			
 
				-    const uint8* ptr = src + 4;
			
 
				-    for( int i=1; i<16; i++ )
			
 
				-    {
			
 
				-        if( memcmp( src, ptr, 4 ) != 0 )
			
 
				-        {
			
 
				-            return 0;
			
 
				-        }
			
 
				-        ptr += 4;
			
 
				-    }
			
 
				-#endif
			
 
				-    return 0x02000000 |
			
 
				-        ( uint( src[0] & 0xF8 ) << 16 ) |
			
 
				-        ( uint( src[1] & 0xF8 ) << 8 ) |
			
 
				-        ( uint( src[2] & 0xF8 ) );
			
 
				-}
			
 
				-
			
 
				-void PrepareAverages( v4i a[8], const uint8* src, uint err[4] )
			
 
				-{
			
 
				-    Average( src, a );
			
 
				-    ProcessAverages( a );
			
 
				-
			
 
				-    uint errblock[4][4];
			
 
				-    CalcErrorBlock( src, errblock );
			
 
				-
			
 
				-    for( int i=0; i<4; i++ )
			
 
				-    {
			
 
				-        err[i/2] += CalcError( errblock[i], a[i] );
			
 
				-        err[2+i/2] += CalcError( errblock[i], a[i+4] );
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-void FindBestFit( uint64 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
			
 
				-{
			
 
				-    for( size_t i=0; i<16; i++ )
			
 
				-    {
			
 
				-        uint16* sel = tsel[i];
			
 
				-        uint bid = id[i];
			
 
				-        uint64* ter = terr[bid%2];
			
 
				-
			
 
				-        uint8 b = *data++;
			
 
				-        uint8 g = *data++;
			
 
				-        uint8 r = *data++;
			
 
				-        data++;
			
 
				-
			
 
				-        int dr = a[bid][0] - r;
			
 
				-        int dg = a[bid][1] - g;
			
 
				-        int db = a[bid][2] - b;
			
 
				-
			
 
				-#ifdef __SSE4_1__
			
 
				-        // Reference implementation
			
 
				-
			
 
				-        __m128i pix = _mm_set1_epi32(dr * 77 + dg * 151 + db * 28);
			
 
				-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
			
 
				-        __m128i error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[0]));
			
 
				-        __m128i error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[1]));
			
 
				-        __m128i error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[0]));
			
 
				-        __m128i error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[1]));
			
 
				-
			
 
				-        __m128i index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
			
 
				-        __m128i minError0 = _mm_min_epi32(error0, error1);
			
 
				-
			
 
				-        __m128i index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
			
 
				-        __m128i minError1 = _mm_min_epi32(error2, error3);
			
 
				-
			
 
				-        __m128i minIndex0 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
			
 
				-        __m128i minError = _mm_min_epi32(minError0, minError1);
			
 
				-
			
 
				-        // Squaring the minimum error to produce correct values when adding
			
 
				-        __m128i minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
			
 
				-        __m128i squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
			
 
				-        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
			
 
				-        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
			
 
				-        __m128i minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
			
 
				-        __m128i squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
			
 
				-        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
			
 
				-        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
			
 
				-
			
 
				-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
			
 
				-        error0 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[2]));
			
 
				-        error1 = _mm_abs_epi32(_mm_add_epi32(pix, g_table256_SIMD[3]));
			
 
				-        error2 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[2]));
			
 
				-        error3 = _mm_abs_epi32(_mm_sub_epi32(pix, g_table256_SIMD[3]));
			
 
				-
			
 
				-        index0 = _mm_and_si128(_mm_cmplt_epi32(error1, error0), _mm_set1_epi32(1));
			
 
				-        minError0 = _mm_min_epi32(error0, error1);
			
 
				-
			
 
				-        index1 = _mm_sub_epi32(_mm_set1_epi32(2), _mm_cmplt_epi32(error3, error2));
			
 
				-        minError1 = _mm_min_epi32(error2, error3);
			
 
				-
			
 
				-        __m128i minIndex1 = _mm_blendv_epi8(index0, index1, _mm_cmplt_epi32(minError1, minError0));
			
 
				-        minError = _mm_min_epi32(minError0, minError1);
			
 
				-
			
 
				-        // Squaring the minimum error to produce correct values when adding
			
 
				-        minErrorLow = _mm_shuffle_epi32(minError, _MM_SHUFFLE(1, 1, 0, 0));
			
 
				-        squareErrorLow = _mm_mul_epi32(minErrorLow, minErrorLow);
			
 
				-        squareErrorLow = _mm_add_epi64(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 2));
			
 
				-        _mm_storeu_si128(((__m128i*)ter) + 2, squareErrorLow);
			
 
				-        minErrorHigh = _mm_shuffle_epi32(minError, _MM_SHUFFLE(3, 3, 2, 2));
			
 
				-        squareErrorHigh = _mm_mul_epi32(minErrorHigh, minErrorHigh);
			
 
				-        squareErrorHigh = _mm_add_epi64(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 3));
			
 
				-        _mm_storeu_si128(((__m128i*)ter) + 3, squareErrorHigh);
			
 
				-        __m128i minIndex = _mm_packs_epi32(minIndex0, minIndex1);
			
 
				-        _mm_storeu_si128((__m128i*)sel, minIndex);
			
 
				-#else
			
 
				-        int pix = dr * 77 + dg * 151 + db * 28;
			
 
				-
			
 
				-        for( int t=0; t<8; t++ )
			
 
				-        {
			
 
				-            const int64* tab = g_table256[t];
			
 
				-            uint idx = 0;
			
 
				-            uint64 err = sq( tab[0] + pix );
			
 
				-            for( int j=1; j<4; j++ )
			
 
				-            {
			
 
				-                uint64 local = sq( tab[j] + pix );
			
 
				-                if( local < err )
			
 
				-                {
			
 
				-                    err = local;
			
 
				-                    idx = j;
			
 
				-                }
			
 
				-            }
			
 
				-            *sel++ = idx;
			
 
				-            *ter++ += err;
			
 
				-        }
			
 
				-#endif
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-#ifdef __SSE4_1__
			
 
				-// Non-reference implementation, but faster. Produces same results as the AVX2 version
			
 
				-void FindBestFit( uint32 terr[2][8], uint16 tsel[16][8], v4i a[8], const uint32* id, const uint8* data )
			
 
				-{
			
 
				-    for( size_t i=0; i<16; i++ )
			
 
				-    {
			
 
				-        uint16* sel = tsel[i];
			
 
				-        uint bid = id[i];
			
 
				-        uint32* ter = terr[bid%2];
			
 
				-
			
 
				-        uint8 b = *data++;
			
 
				-        uint8 g = *data++;
			
 
				-        uint8 r = *data++;
			
 
				-        data++;
			
 
				-
			
 
				-        int dr = a[bid][0] - r;
			
 
				-        int dg = a[bid][1] - g;
			
 
				-        int db = a[bid][2] - b;
			
 
				-
			
 
				-        // The scaling values are divided by two and rounded, to allow the differences to be in the range of signed int16
			
 
				-        // This produces slightly different results, but is significant faster
			
 
				-        __m128i pixel = _mm_set1_epi16(dr * 38 + dg * 76 + db * 14);
			
 
				-        __m128i pix = _mm_abs_epi16(pixel);
			
 
				-
			
 
				-        // Taking the absolute value is way faster. The values are only used to sort, so the result will be the same.
			
 
				-        // Since the selector table is symmetrical, we need to calculate the difference only for half of the entries.
			
 
				-        __m128i error0 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[0]));
			
 
				-        __m128i error1 = _mm_abs_epi16(_mm_sub_epi16(pix, g_table128_SIMD[1]));
			
 
				-
			
 
				-        __m128i index = _mm_and_si128(_mm_cmplt_epi16(error1, error0), _mm_set1_epi16(1));
			
 
				-        __m128i minError = _mm_min_epi16(error0, error1);
			
 
				-
			
 
				-        // Exploiting symmetry of the selector table and use the sign bit
			
 
				-        // This produces slightly different results, but is needed to produce same results as AVX2 implementation
			
 
				-        __m128i indexBit = _mm_andnot_si128(_mm_srli_epi16(pixel, 15), _mm_set1_epi8(-1));
			
 
				-        __m128i minIndex = _mm_or_si128(index, _mm_add_epi16(indexBit, indexBit));
			
 
				-
			
 
				-        // Squaring the minimum error to produce correct values when adding
			
 
				-        __m128i squareErrorLo = _mm_mullo_epi16(minError, minError);
			
 
				-        __m128i squareErrorHi = _mm_mulhi_epi16(minError, minError);
			
 
				-
			
 
				-        __m128i squareErrorLow = _mm_unpacklo_epi16(squareErrorLo, squareErrorHi);
			
 
				-        __m128i squareErrorHigh = _mm_unpackhi_epi16(squareErrorLo, squareErrorHi);
			
 
				-
			
 
				-        squareErrorLow = _mm_add_epi32(squareErrorLow, _mm_loadu_si128(((__m128i*)ter) + 0));
			
 
				-        _mm_storeu_si128(((__m128i*)ter) + 0, squareErrorLow);
			
 
				-        squareErrorHigh = _mm_add_epi32(squareErrorHigh, _mm_loadu_si128(((__m128i*)ter) + 1));
			
 
				-        _mm_storeu_si128(((__m128i*)ter) + 1, squareErrorHigh);
			
 
				-
			
 
				-        _mm_storeu_si128((__m128i*)sel, minIndex);
			
 
				-    }
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-uint8_t convert6(float f)
			
 
				-{
			
 
				-    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
			
 
				-    return (i + 11 - ((i + 11) >> 7) - ((i + 4) >> 7)) >> 3;
			
 
				-}
			
 
				-
			
 
				-uint8_t convert7(float f)
			
 
				-{
			
 
				-    int i = (std::min(std::max(static_cast<int>(f), 0), 1023) - 15) >> 1;
			
 
				-    return (i + 9 - ((i + 9) >> 8) - ((i + 6) >> 8)) >> 2;
			
 
				-}
			
 
				-
			
 
				-std::pair<uint64, uint64> Planar(const uint8* src)
			
 
				-{
			
 
				-    int32 r = 0;
			
 
				-    int32 g = 0;
			
 
				-    int32 b = 0;
			
 
				-
			
 
				-    for (int i = 0; i < 16; ++i)
			
 
				-    {
			
 
				-        b += src[i * 4 + 0];
			
 
				-        g += src[i * 4 + 1];
			
 
				-        r += src[i * 4 + 2];
			
 
				-    }
			
 
				-
			
 
				-    int32 difRyz = 0;
			
 
				-    int32 difGyz = 0;
			
 
				-    int32 difByz = 0;
			
 
				-    int32 difRxz = 0;
			
 
				-    int32 difGxz = 0;
			
 
				-    int32 difBxz = 0;
			
 
				-
			
 
				-    const int32 scaling[] = { -255, -85, 85, 255 };
			
 
				-
			
 
				-    for (int i = 0; i < 16; ++i)
			
 
				-    {
			
 
				-        int32 difB = (static_cast<int>(src[i * 4 + 0]) << 4) - b;
			
 
				-        int32 difG = (static_cast<int>(src[i * 4 + 1]) << 4) - g;
			
 
				-        int32 difR = (static_cast<int>(src[i * 4 + 2]) << 4) - r;
			
 
				-
			
 
				-        difRyz += difR * scaling[i % 4];
			
 
				-        difGyz += difG * scaling[i % 4];
			
 
				-        difByz += difB * scaling[i % 4];
			
 
				-
			
 
				-        difRxz += difR * scaling[i / 4];
			
 
				-        difGxz += difG * scaling[i / 4];
			
 
				-        difBxz += difB * scaling[i / 4];
			
 
				-    }
			
 
				-
			
 
				-    const float scale = -4.0f / ((255 * 255 * 8.0f + 85 * 85 * 8.0f) * 16.0f);
			
 
				-
			
 
				-    float aR = difRxz * scale;
			
 
				-    float aG = difGxz * scale;
			
 
				-    float aB = difBxz * scale;
			
 
				-
			
 
				-    float bR = difRyz * scale;
			
 
				-    float bG = difGyz * scale;
			
 
				-    float bB = difByz * scale;
			
 
				-
			
 
				-    float dR = r * (4.0f / 16.0f);
			
 
				-    float dG = g * (4.0f / 16.0f);
			
 
				-    float dB = b * (4.0f / 16.0f);
			
 
				-
			
 
				-    // calculating the three colors RGBO, RGBH, and RGBV.  RGB = df - af * x - bf * y;
			
 
				-    float cofR = (aR *  255.0f + (bR *  255.0f + dR));
			
 
				-    float cofG = (aG *  255.0f + (bG *  255.0f + dG));
			
 
				-    float cofB = (aB *  255.0f + (bB *  255.0f + dB));
			
 
				-    float chfR = (aR * -425.0f + (bR *  255.0f + dR));
			
 
				-    float chfG = (aG * -425.0f + (bG *  255.0f + dG));
			
 
				-    float chfB = (aB * -425.0f + (bB *  255.0f + dB));
			
 
				-    float cvfR = (aR *  255.0f + (bR * -425.0f + dR));
			
 
				-    float cvfG = (aG *  255.0f + (bG * -425.0f + dG));
			
 
				-    float cvfB = (aB *  255.0f + (bB * -425.0f + dB));
			
 
				-
			
 
				-    // convert to r6g7b6
			
 
				-    int32 coR = convert6(cofR);
			
 
				-    int32 coG = convert7(cofG);
			
 
				-    int32 coB = convert6(cofB);
			
 
				-    int32 chR = convert6(chfR);
			
 
				-    int32 chG = convert7(chfG);
			
 
				-    int32 chB = convert6(chfB);
			
 
				-    int32 cvR = convert6(cvfR);
			
 
				-    int32 cvG = convert7(cvfG);
			
 
				-    int32 cvB = convert6(cvfB);
			
 
				-
			
 
				-    // Error calculation
			
 
				-    int32 ro0 = coR;
			
 
				-    int32 go0 = coG;
			
 
				-    int32 bo0 = coB;
			
 
				-    int32 ro1 = (ro0 >> 4) | (ro0 << 2);
			
 
				-    int32 go1 = (go0 >> 6) | (go0 << 1);
			
 
				-    int32 bo1 = (bo0 >> 4) | (bo0 << 2);
			
 
				-    int32 ro2 = (ro1 << 2) + 2;
			
 
				-    int32 go2 = (go1 << 2) + 2;
			
 
				-    int32 bo2 = (bo1 << 2) + 2;
			
 
				-
			
 
				-    int32 rh0 = chR;
			
 
				-    int32 gh0 = chG;
			
 
				-    int32 bh0 = chB;
			
 
				-    int32 rh1 = (rh0 >> 4) | (rh0 << 2);
			
 
				-    int32 gh1 = (gh0 >> 6) | (gh0 << 1);
			
 
				-    int32 bh1 = (bh0 >> 4) | (bh0 << 2);
			
 
				-
			
 
				-    int32 rh2 = rh1 - ro1;
			
 
				-    int32 gh2 = gh1 - go1;
			
 
				-    int32 bh2 = bh1 - bo1;
			
 
				-
			
 
				-    int32 rv0 = cvR;
			
 
				-    int32 gv0 = cvG;
			
 
				-    int32 bv0 = cvB;
			
 
				-    int32 rv1 = (rv0 >> 4) | (rv0 << 2);
			
 
				-    int32 gv1 = (gv0 >> 6) | (gv0 << 1);
			
 
				-    int32 bv1 = (bv0 >> 4) | (bv0 << 2);
			
 
				-
			
 
				-    int32 rv2 = rv1 - ro1;
			
 
				-    int32 gv2 = gv1 - go1;
			
 
				-    int32 bv2 = bv1 - bo1;
			
 
				-
			
 
				-    uint64 error = 0;
			
 
				-
			
 
				-    for (int i = 0; i < 16; ++i)
			
 
				-    {
			
 
				-        int32 cR = clampu8((rh2 * (i / 4) + rv2 * (i % 4) + ro2) >> 2);
			
 
				-        int32 cG = clampu8((gh2 * (i / 4) + gv2 * (i % 4) + go2) >> 2);
			
 
				-        int32 cB = clampu8((bh2 * (i / 4) + bv2 * (i % 4) + bo2) >> 2);
			
 
				-
			
 
				-        int32 difB = static_cast<int>(src[i * 4 + 0]) - cB;
			
 
				-        int32 difG = static_cast<int>(src[i * 4 + 1]) - cG;
			
 
				-        int32 difR = static_cast<int>(src[i * 4 + 2]) - cR;
			
 
				-
			
 
				-        int32 dif = difR * 38 + difG * 76 + difB * 14;
			
 
				-
			
 
				-        error += dif * dif;
			
 
				-    }
			
 
				-
			
 
				-    /**/
			
 
				-    uint32 rgbv = cvB | (cvG << 6) | (cvR << 13);
			
 
				-    uint32 rgbh = chB | (chG << 6) | (chR << 13);
			
 
				-    uint32 hi = rgbv | ((rgbh & 0x1FFF) << 19);
			
 
				-    uint32 lo = (chR & 0x1) | 0x2 | ((chR << 1) & 0x7C);
			
 
				-    lo |= ((coB & 0x07) <<  7) | ((coB & 0x18) <<  8) | ((coB & 0x20) << 11);
			
 
				-    lo |= ((coG & 0x3F) << 17) | ((coG & 0x40) << 18);
			
 
				-    lo |= coR << 25;
			
 
				-
			
 
				-    const int32 idx = (coR & 0x20) | ((coG & 0x20) >> 1) | ((coB & 0x1E) >> 1);
			
 
				-
			
 
				-    lo |= g_flags[idx];
			
 
				-
			
 
				-    uint64 result = static_cast<uint32>(bx::endianSwap(lo));
			
 
				-    result |= static_cast<uint64>(static_cast<uint32>(bx::endianSwap(hi))) << 32;
			
 
				-
			
 
				-    return std::make_pair(result, error);
			
 
				-}
			
 
				-
			
 
				-template<class T, class S>
			
 
				-uint64 EncodeSelectors( uint64 d, const T terr[2][8], const S tsel[16][8], const uint32* id, const uint64 value, const uint64 error)
			
 
				-{
			
 
				-    size_t tidx[2];
			
 
				-    tidx[0] = GetLeastError( terr[0], 8 );
			
 
				-    tidx[1] = GetLeastError( terr[1], 8 );
			
 
				-
			
 
				-    if ((terr[0][tidx[0]] + terr[1][tidx[1]]) >= error)
			
 
				-    {
			
 
				-        return value;
			
 
				-    }
			
 
				-
			
 
				-    d |= tidx[0] << 26;
			
 
				-    d |= tidx[1] << 29;
			
 
				-    for( int i=0; i<16; i++ )
			
 
				-    {
			
 
				-        uint64 t = tsel[i][tidx[id[i]%2]];
			
 
				-        d |= ( t & 0x1 ) << ( i + 32 );
			
 
				-        d |= ( t & 0x2 ) << ( i + 47 );
			
 
				-    }
			
 
				-
			
 
				-    return FixByteOrder(d);
			
 
				-}
			
 
				-}
			
 
				-
			
 
				-uint64 ProcessRGB( const uint8* src )
			
 
				-{
			
 
				-    uint64 d = CheckSolid( src );
			
 
				-    if( d != 0 ) return d;
			
 
				-
			
 
				-    v4i a[8];
			
 
				-    uint err[4] = {};
			
 
				-    PrepareAverages( a, src, err );
			
 
				-    size_t idx = GetLeastError( err, 4 );
			
 
				-    EncodeAverages( d, a, idx );
			
 
				-
			
 
				-#if defined __SSE4_1__ && !defined REFERENCE_IMPLEMENTATION
			
 
				-    uint32 terr[2][8] = {};
			
 
				-#else
			
 
				-    uint64 terr[2][8] = {};
			
 
				-#endif
			
 
				-    uint16 tsel[16][8];
			
 
				-    const uint32* id = g_id[idx];
			
 
				-    FindBestFit( terr, tsel, a, id, src );
			
 
				-
			
 
				-    return FixByteOrder( EncodeSelectors( d, terr, tsel, id ) );
			
 
				-}
			
 
				-
			
 
				-uint64 ProcessRGB_ETC2( const uint8* src )
			
 
				-{
			
 
				-    std::pair<uint64, uint64> result = Planar( src );
			
 
				-
			
 
				-    uint64 d = 0;
			
 
				-
			
 
				-    v4i a[8];
			
 
				-    uint err[4] = {};
			
 
				-    PrepareAverages( a, src, err );
			
 
				-    size_t idx = GetLeastError( err, 4 );
			
 
				-    EncodeAverages( d, a, idx );
			
 
				-
			
 
				-    uint64 terr[2][8] = {};
			
 
				-    uint16 tsel[16][8];
			
 
				-    const uint32* id = g_id[idx];
			
 
				-    FindBestFit( terr, tsel, a, id, src );
			
 
				-
			
 
				-    return EncodeSelectors( d, terr, tsel, id, result.first, result.second );
			
 
				-}
			
--- a/3rdparty/etc2/ProcessRGB.hpp
+++ b/3rdparty/etc2/ProcessRGB.hpp
@@ -1,9 +0,0 @@
 
				-#ifndef __PROCESSRGB_HPP__
			
 
				-#define __PROCESSRGB_HPP__
			
 
				-
			
 
				-#include "Types.hpp"
			
 
				-
			
 
				-uint64 ProcessRGB( const uint8* src );
			
 
				-uint64 ProcessRGB_ETC2( const uint8* src );
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/etc2/Tables.cpp
+++ b/3rdparty/etc2/Tables.cpp
@@ -1,109 +0,0 @@
 
				-#include "Tables.hpp"
			
 
				-
			
 
				-const int32 g_table[8][4] = {
			
 
				-    {  2,  8,   -2,   -8 },
			
 
				-    {  5, 17,   -5,  -17 },
			
 
				-    {  9, 29,   -9,  -29 },
			
 
				-    { 13, 42,  -13,  -42 },
			
 
				-    { 18, 60,  -18,  -60 },
			
 
				-    { 24, 80,  -24,  -80 },
			
 
				-    { 33, 106, -33, -106 },
			
 
				-    { 47, 183, -47, -183 }
			
 
				-};
			
 
				-
			
 
				-const int64 g_table256[8][4] = {
			
 
				-    {  2*256,  8*256,   -2*256,   -8*256 },
			
 
				-    {  5*256, 17*256,   -5*256,  -17*256 },
			
 
				-    {  9*256, 29*256,   -9*256,  -29*256 },
			
 
				-    { 13*256, 42*256,  -13*256,  -42*256 },
			
 
				-    { 18*256, 60*256,  -18*256,  -60*256 },
			
 
				-    { 24*256, 80*256,  -24*256,  -80*256 },
			
 
				-    { 33*256, 106*256, -33*256, -106*256 },
			
 
				-    { 47*256, 183*256, -47*256, -183*256 }
			
 
				-};
			
 
				-
			
 
				-const uint32 g_id[4][16] = {
			
 
				-    { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
			
 
				-    { 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2 },
			
 
				-    { 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 },
			
 
				-    { 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6 }
			
 
				-};
			
 
				-
			
 
				-const uint32 g_avg2[16] = {
			
 
				-    0x00,
			
 
				-    0x11,
			
 
				-    0x22,
			
 
				-    0x33,
			
 
				-    0x44,
			
 
				-    0x55,
			
 
				-    0x66,
			
 
				-    0x77,
			
 
				-    0x88,
			
 
				-    0x99,
			
 
				-    0xAA,
			
 
				-    0xBB,
			
 
				-    0xCC,
			
 
				-    0xDD,
			
 
				-    0xEE,
			
 
				-    0xFF
			
 
				-};
			
 
				-
			
 
				-const uint32 g_flags[64] = {
			
 
				-    0x80800402, 0x80800402, 0x80800402, 0x80800402,
			
 
				-    0x80800402, 0x80800402, 0x80800402, 0x8080E002,
			
 
				-    0x80800402, 0x80800402, 0x8080E002, 0x8080E002,
			
 
				-    0x80800402, 0x8080E002, 0x8080E002, 0x8080E002,
			
 
				-    0x80000402, 0x80000402, 0x80000402, 0x80000402,
			
 
				-    0x80000402, 0x80000402, 0x80000402, 0x8000E002,
			
 
				-    0x80000402, 0x80000402, 0x8000E002, 0x8000E002,
			
 
				-    0x80000402, 0x8000E002, 0x8000E002, 0x8000E002,
			
 
				-    0x00800402, 0x00800402, 0x00800402, 0x00800402,
			
 
				-    0x00800402, 0x00800402, 0x00800402, 0x0080E002,
			
 
				-    0x00800402, 0x00800402, 0x0080E002, 0x0080E002,
			
 
				-    0x00800402, 0x0080E002, 0x0080E002, 0x0080E002,
			
 
				-    0x00000402, 0x00000402, 0x00000402, 0x00000402,
			
 
				-    0x00000402, 0x00000402, 0x00000402, 0x0000E002,
			
 
				-    0x00000402, 0x00000402, 0x0000E002, 0x0000E002,
			
 
				-    0x00000402, 0x0000E002, 0x0000E002, 0x0000E002
			
 
				-};
			
 
				-
			
 
				-#ifdef __SSE4_1__
			
 
				-const uint8 g_flags_AVX2[64] =
			
 
				-{
			
 
				-    0x63, 0x63, 0x63, 0x63,
			
 
				-    0x63, 0x63, 0x63, 0x7D,
			
 
				-    0x63, 0x63, 0x7D, 0x7D,
			
 
				-    0x63, 0x7D, 0x7D, 0x7D,
			
 
				-    0x43, 0x43, 0x43, 0x43,
			
 
				-    0x43, 0x43, 0x43, 0x5D,
			
 
				-    0x43, 0x43, 0x5D, 0x5D,
			
 
				-    0x43, 0x5D, 0x5D, 0x5D,
			
 
				-    0x23, 0x23, 0x23, 0x23,
			
 
				-    0x23, 0x23, 0x23, 0x3D,
			
 
				-    0x23, 0x23, 0x3D, 0x3D,
			
 
				-    0x23, 0x3D, 0x3D, 0x3D,
			
 
				-    0x03, 0x03, 0x03, 0x03,
			
 
				-    0x03, 0x03, 0x03, 0x1D,
			
 
				-    0x03, 0x03, 0x1D, 0x1D,
			
 
				-    0x03, 0x1D, 0x1D, 0x1D,
			
 
				-};
			
 
				-
			
 
				-const __m128i g_table_SIMD[2] =
			
 
				-{
			
 
				-    _mm_setr_epi16(   2,   5,   9,  13,  18,  24,  33,  47),
			
 
				-    _mm_setr_epi16(   8,  17,  29,  42,  60,  80, 106, 183)
			
 
				-};
			
 
				-const __m128i g_table128_SIMD[2] =
			
 
				-{
			
 
				-    _mm_setr_epi16(   2*128,   5*128,   9*128,  13*128,  18*128,  24*128,  33*128,  47*128),
			
 
				-    _mm_setr_epi16(   8*128,  17*128,  29*128,  42*128,  60*128,  80*128, 106*128, 183*128)
			
 
				-};
			
 
				-const __m128i g_table256_SIMD[4] =
			
 
				-{
			
 
				-    _mm_setr_epi32(  2*256,   5*256,   9*256,  13*256),
			
 
				-    _mm_setr_epi32(  8*256,  17*256,  29*256,  42*256),
			
 
				-    _mm_setr_epi32( 18*256,  24*256,  33*256,  47*256),
			
 
				-    _mm_setr_epi32( 60*256,  80*256, 106*256, 183*256)
			
 
				-};
			
 
				-#endif
			
 
				-
			
--- a/3rdparty/etc2/Tables.hpp
+++ b/3rdparty/etc2/Tables.hpp
@@ -1,25 +0,0 @@
 
				-#ifndef __TABLES_HPP__
			
 
				-#define __TABLES_HPP__
			
 
				-
			
 
				-#include "Types.hpp"
			
 
				-#ifdef __SSE4_1__
			
 
				-#include <smmintrin.h>
			
 
				-#endif
			
 
				-
			
 
				-extern const int32 g_table[8][4];
			
 
				-extern const int64 g_table256[8][4];
			
 
				-
			
 
				-extern const uint32 g_id[4][16];
			
 
				-
			
 
				-extern const uint32 g_avg2[16];
			
 
				-
			
 
				-extern const uint32 g_flags[64];
			
 
				-
			
 
				-#ifdef __SSE4_1__
			
 
				-extern const uint8 g_flags_AVX2[64];
			
 
				-extern const __m128i g_table_SIMD[2];
			
 
				-extern const __m128i g_table128_SIMD[2];
			
 
				-extern const __m128i g_table256_SIMD[4];
			
 
				-#endif
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/etc2/Types.hpp
+++ b/3rdparty/etc2/Types.hpp
@@ -1,17 +0,0 @@
 
				-#ifndef __DARKRL__TYPES_HPP__
			
 
				-#define __DARKRL__TYPES_HPP__
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-
			
 
				-typedef int8_t      int8;
			
 
				-typedef uint8_t     uint8;
			
 
				-typedef int16_t     int16;
			
 
				-typedef uint16_t    uint16;
			
 
				-typedef int32_t     int32;
			
 
				-typedef uint32_t    uint32;
			
 
				-typedef int64_t     int64;
			
 
				-typedef uint64_t    uint64;
			
 
				-
			
 
				-typedef unsigned int uint;
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/etc2/Vector.hpp
+++ b/3rdparty/etc2/Vector.hpp
@@ -1,222 +0,0 @@
 
				-#ifndef __DARKRL__VECTOR_HPP__
			
 
				-#define __DARKRL__VECTOR_HPP__
			
 
				-
			
 
				-#include <assert.h>
			
 
				-#include <algorithm>
			
 
				-#include <math.h>
			
 
				-
			
 
				-#include "Math.hpp"
			
 
				-#include "Types.hpp"
			
 
				-
			
 
				-template<class T>
			
 
				-struct Vector2
			
 
				-{
			
 
				-    Vector2() : x( 0 ), y( 0 ) {}
			
 
				-    Vector2( T v ) : x( v ), y( v ) {}
			
 
				-    Vector2( T _x, T _y ) : x( _x ), y( _y ) {}
			
 
				-
			
 
				-    bool operator==( const Vector2<T>& rhs ) const { return x == rhs.x && y == rhs.y; }
			
 
				-    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
			
 
				-
			
 
				-    Vector2<T>& operator+=( const Vector2<T>& rhs )
			
 
				-    {
			
 
				-        x += rhs.x;
			
 
				-        y += rhs.y;
			
 
				-        return *this;
			
 
				-    }
			
 
				-    Vector2<T>& operator-=( const Vector2<T>& rhs )
			
 
				-    {
			
 
				-        x -= rhs.x;
			
 
				-        y -= rhs.y;
			
 
				-        return *this;
			
 
				-    }
			
 
				-    Vector2<T>& operator*=( const Vector2<T>& rhs )
			
 
				-    {
			
 
				-        x *= rhs.x;
			
 
				-        y *= rhs.y;
			
 
				-        return *this;
			
 
				-    }
			
 
				-
			
 
				-    T x, y;
			
 
				-};
			
 
				-
			
 
				-template<class T>
			
 
				-Vector2<T> operator+( const Vector2<T>& lhs, const Vector2<T>& rhs )
			
 
				-{
			
 
				-    return Vector2<T>( lhs.x + rhs.x, lhs.y + rhs.y );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector2<T> operator-( const Vector2<T>& lhs, const Vector2<T>& rhs )
			
 
				-{
			
 
				-    return Vector2<T>( lhs.x - rhs.x, lhs.y - rhs.y );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector2<T> operator*( const Vector2<T>& lhs, const float& rhs )
			
 
				-{
			
 
				-    return Vector2<T>( lhs.x * rhs, lhs.y * rhs );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector2<T> operator/( const Vector2<T>& lhs, const T& rhs )
			
 
				-{
			
 
				-    return Vector2<T>( lhs.x / rhs, lhs.y / rhs );
			
 
				-}
			
 
				-
			
 
				-
			
 
				-typedef Vector2<int32> v2i;
			
 
				-typedef Vector2<float> v2f;
			
 
				-
			
 
				-
			
 
				-template<class T>
			
 
				-struct Vector3
			
 
				-{
			
 
				-    Vector3() : x( 0 ), y( 0 ), z( 0 ) {}
			
 
				-    Vector3( T v ) : x( v ), y( v ), z( v ) {}
			
 
				-    Vector3( T _x, T _y, T _z ) : x( _x ), y( _y ), z( _z ) {}
			
 
				-    template<class Y>
			
 
				-    Vector3( const Vector3<Y>& v ) : x( T( v.x ) ), y( T( v.y ) ), z( T( v.z ) ) {}
			
 
				-
			
 
				-    T Luminance() const { return T( x * 0.3f + y * 0.59f + z * 0.11f ); }
			
 
				-    void Clamp()
			
 
				-    {
			
 
				-        x = std::min( T(1), std::max( T(0), x ) );
			
 
				-        y = std::min( T(1), std::max( T(0), y ) );
			
 
				-        z = std::min( T(1), std::max( T(0), z ) );
			
 
				-    }
			
 
				-
			
 
				-    bool operator==( const Vector3<T>& rhs ) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
			
 
				-    bool operator!=( const Vector2<T>& rhs ) const { return !( *this == rhs ); }
			
 
				-
			
 
				-    T& operator[]( uint idx ) { assert( idx < 3 ); return ((T*)this)[idx]; }
			
 
				-    const T& operator[]( uint idx ) const { assert( idx < 3 ); return ((T*)this)[idx]; }
			
 
				-
			
 
				-    Vector3<T> operator+=( const Vector3<T>& rhs )
			
 
				-    {
			
 
				-        x += rhs.x;
			
 
				-        y += rhs.y;
			
 
				-        z += rhs.z;
			
 
				-        return *this;
			
 
				-    }
			
 
				-
			
 
				-    Vector3<T> operator*=( const Vector3<T>& rhs )
			
 
				-    {
			
 
				-        x *= rhs.x;
			
 
				-        y *= rhs.y;
			
 
				-        z *= rhs.z;
			
 
				-        return *this;
			
 
				-    }
			
 
				-
			
 
				-    Vector3<T> operator*=( const float& rhs )
			
 
				-    {
			
 
				-        x *= rhs;
			
 
				-        y *= rhs;
			
 
				-        z *= rhs;
			
 
				-        return *this;
			
 
				-    }
			
 
				-
			
 
				-    T x, y, z;
			
 
				-    T padding;
			
 
				-};
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> operator+( const Vector3<T>& lhs, const Vector3<T>& rhs )
			
 
				-{
			
 
				-    return Vector3<T>( lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> operator-( const Vector3<T>& lhs, const Vector3<T>& rhs )
			
 
				-{
			
 
				-    return Vector3<T>( lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> operator*( const Vector3<T>& lhs, const Vector3<T>& rhs )
			
 
				-{
			
 
				-    return Vector3<T>( lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> operator*( const Vector3<T>& lhs, const float& rhs )
			
 
				-{
			
 
				-    return Vector3<T>( T( lhs.x * rhs ), T( lhs.y * rhs ), T( lhs.z * rhs ) );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> operator/( const Vector3<T>& lhs, const T& rhs )
			
 
				-{
			
 
				-    return Vector3<T>( lhs.x / rhs, lhs.y / rhs, lhs.z / rhs );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-bool operator<( const Vector3<T>& lhs, const Vector3<T>& rhs )
			
 
				-{
			
 
				-    return lhs.Luminance() < rhs.Luminance();
			
 
				-}
			
 
				-
			
 
				-typedef Vector3<int32> v3i;
			
 
				-typedef Vector3<float> v3f;
			
 
				-typedef Vector3<uint8> v3b;
			
 
				-
			
 
				-
			
 
				-static inline v3b v3f_to_v3b( const v3f& v )
			
 
				-{
			
 
				-    return v3b( uint8( std::min( 1.f, v.x ) * 255 ), uint8( std::min( 1.f, v.y ) * 255 ), uint8( std::min( 1.f, v.z ) * 255 ) );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> Mix( const Vector3<T>& v1, const Vector3<T>& v2, float amount )
			
 
				-{
			
 
				-    return v1 + ( v2 - v1 ) * amount;
			
 
				-}
			
 
				-
			
 
				-template<>
			
 
				-inline v3b Mix( const v3b& v1, const v3b& v2, float amount )
			
 
				-{
			
 
				-    return v3b( v3f( v1 ) + ( v3f( v2 ) - v3f( v1 ) ) * amount );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> Desaturate( const Vector3<T>& v )
			
 
				-{
			
 
				-    T l = v.Luminance();
			
 
				-    return Vector3<T>( l, l, l );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> Desaturate( const Vector3<T>& v, float mul )
			
 
				-{
			
 
				-    T l = T( v.Luminance() * mul );
			
 
				-    return Vector3<T>( l, l, l );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> pow( const Vector3<T>& base, float exponent )
			
 
				-{
			
 
				-    return Vector3<T>(
			
 
				-        pow( base.x, exponent ),
			
 
				-        pow( base.y, exponent ),
			
 
				-        pow( base.z, exponent ) );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> sRGB2linear( const Vector3<T>& v )
			
 
				-{
			
 
				-    return Vector3<T>(
			
 
				-        sRGB2linear( v.x ),
			
 
				-        sRGB2linear( v.y ),
			
 
				-        sRGB2linear( v.z ) );
			
 
				-}
			
 
				-
			
 
				-template<class T>
			
 
				-Vector3<T> linear2sRGB( const Vector3<T>& v )
			
 
				-{
			
 
				-    return Vector3<T>(
			
 
				-        linear2sRGB( v.x ),
			
 
				-        linear2sRGB( v.y ),
			
 
				-        linear2sRGB( v.z ) );
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/iqa/LICENSE
+++ b/3rdparty/iqa/LICENSE
@@ -1,32 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
--- a/3rdparty/iqa/README.txt
+++ b/3rdparty/iqa/README.txt
@@ -1,36 +0,0 @@
 
				-Doxygen documentation can be found at: http://tdistler.com/iqa

			
 
				-

			
 
				-BUILD:

			
 
				-

			
 
				-  All build artifacts end up in build/<configuration>, where <configuration> is

			
 
				-  'debug' or 'release'.

			
 
				-

			
 
				-  Windows:

			
 
				-    - Open iqa.sln, select 'Debug' or 'Release', and build. The output is a 

			
 
				-      static library 'iqa.lib'.

			
 
				-    - To run the tests under the debugger, first right-click the 'test' project,

			
 
				-      select Properties -> Configuration Properties -> Debugging and set

			
 
				-      'Working Directory' to '$(OutDir)'. Then start the application.

			
 
				-

			
 
				-  Linux:

			
 
				-    - Change directories into the root of the IQA branch you want to build.

			
 
				-    - Type `make` for a debug build, or `make RELEASE=1` for a release build.

			
 
				-      The output is a static library 'libiqa.a'.

			
 
				-    - Type `make test` (or `make test RELEASE=1`) to build the unit tests.

			
 
				-    - Type `make clean` (or `make clean RELEASE=1`) to delete all build

			
 
				-      artifacts.

			
 
				-    - To run the tests, `cd` to the build/<configuration> directory and type

			
 
				-      `./test`.

			
 
				-

			
 
				-

			
 
				-USE:

			
 
				-

			
 
				-  - Include 'iqa.h' in your source file.

			
 
				-  - Call iqa_* methods.

			
 
				-  - Link against the IQA library.

			
 
				-

			
 
				-

			
 
				-HELP & SUPPORT:

			
 
				-

			
 
				-  Further help can be found at: https://sourceforge.net/projects/iqa/support

			
 
				-

			
--- a/3rdparty/iqa/include/convolve.h
+++ b/3rdparty/iqa/include/convolve.h
@@ -1,111 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _CONVOLVE_H_
			
 
				-#define _CONVOLVE_H_
			
 
				-
			
 
				-typedef float (*_iqa_get_pixel)(const float *img, int w, int h, int x, int y, float bnd_const);
			
 
				-
			
 
				-/** Out-of-bounds array values are a mirrored reflection of the border values*/
			
 
				-float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const);
			
 
				-/** Out-of-bounds array values are set to the nearest border value */
			
 
				-float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const);
			
 
				-/** Out-of-bounds array values are set to 'bnd_const' */
			
 
				-float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const);
			
 
				-
			
 
				-
			
 
				-/** Defines a convolution kernel */
			
 
				-struct _kernel {
			
 
				-    float *kernel;          /**< Pointer to the kernel values */
			
 
				-    int w;                  /**< The kernel width */
			
 
				-    int h;                  /**< The kernel height */
			
 
				-    int normalized;         /**< 1 if the kernel values add up to 1. 0 otherwise */
			
 
				-    _iqa_get_pixel bnd_opt; /**< Defines how out-of-bounds image values are handled */
			
 
				-    float bnd_const;        /**< If 'bnd_opt' is KBND_CONSTANT, this specifies the out-of-bounds value */
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * @brief Applies the specified kernel to the image.
			
 
				- * The kernel will be applied to all areas where it fits completely within
			
 
				- * the image. The resulting image will be smaller by half the kernel width 
			
 
				- * and height (w - kw/2 and h - kh/2).
			
 
				- *
			
 
				- * @param img Image to modify
			
 
				- * @param w Image width
			
 
				- * @param h Image height
			
 
				- * @param k The kernel to apply
			
 
				- * @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
			
 
				- *               and kh are the kernel width and height). If 0, the result
			
 
				- *               will be written to the original image buffer.
			
 
				- * @param rw Optional. The width of the resulting image will be stored here.
			
 
				- * @param rh Optional. The height of the resulting image will be stored here.
			
 
				- */
			
 
				-void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh);
			
 
				-
			
 
				-/**
			
 
				- * The same as _iqa_convolve() except the kernel is applied to the entire image.
			
 
				- * In other words, the kernel is applied to all areas where the top-left corner
			
 
				- * of the kernel is in the image. Out-of-bound pixel value (off the right and
			
 
				- * bottom edges) are chosen based on the 'bnd_opt' and 'bnd_const' members of
			
 
				- * the kernel structure. The resulting array is the same size as the input
			
 
				- * image.
			
 
				- *
			
 
				- * @param img Image to modify
			
 
				- * @param w Image width
			
 
				- * @param h Image height
			
 
				- * @param k The kernel to apply
			
 
				- * @param result Buffer to hold the resulting image ((w-kw)*(h-kh), where kw
			
 
				- *               and kh are the kernel width and height). If 0, the result
			
 
				- *               will be written to the original image buffer.
			
 
				- * @return 0 if successful. Non-zero otherwise.
			
 
				- */
			
 
				-int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result);
			
 
				-
			
 
				-/**
			
 
				- * Returns the filtered version of the specified pixel. If no kernel is given,
			
 
				- * the raw pixel value is returned.
			
 
				- * 
			
 
				- * @param img Source image
			
 
				- * @param w Image width
			
 
				- * @param h Image height
			
 
				- * @param x The x location of the pixel to filter
			
 
				- * @param y The y location of the pixel to filter
			
 
				- * @param k Optional. The convolution kernel to apply to the pixel.
			
 
				- * @param kscale The scale of the kernel (for normalization). 1 for normalized
			
 
				- *               kernels. Required if 'k' is not null.
			
 
				- * @return The filtered pixel value.
			
 
				- */
			
 
				-float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale);
			
 
				-
			
 
				-
			
 
				-#endif /*_CONVOLVE_H_*/
			
--- a/3rdparty/iqa/include/decimate.h
+++ b/3rdparty/iqa/include/decimate.h
@@ -1,55 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _DECIMATE_H_
			
 
				-#define _DECIMATE_H_
			
 
				-
			
 
				-#include "convolve.h"
			
 
				-
			
 
				-/**
			
 
				- * @brief Downsamples (decimates) an image.
			
 
				- *
			
 
				- * @param img Image to modify
			
 
				- * @param w Image width
			
 
				- * @param h Image height
			
 
				- * @param factor Decimation factor
			
 
				- * @param k The kernel to apply (e.g. low-pass filter). Can be 0.
			
 
				- * @param result Buffer to hold the resulting image (w/factor*h/factor). If 0,
			
 
				- *               the result will be written to the original image buffer.
			
 
				- * @param rw Optional. The width of the resulting image will be stored here.
			
 
				- * @param rh Optional. The height of the resulting image will be stored here.
			
 
				- * @return 0 on success.
			
 
				- */
			
 
				-int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh);
			
 
				-
			
 
				-#endif /*_DECIMATE_H_*/
			
--- a/3rdparty/iqa/include/iqa.h
+++ b/3rdparty/iqa/include/iqa.h
@@ -1,134 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _IQA_H_
			
 
				-#define _IQA_H_
			
 
				-
			
 
				-#include "iqa_os.h"
			
 
				-
			
 
				-/**
			
 
				- * Allows fine-grain control of the SSIM algorithm.
			
 
				- */
			
 
				-struct iqa_ssim_args {
			
 
				-    float alpha;    /**< luminance exponent */
			
 
				-    float beta;     /**< contrast exponent */
			
 
				-    float gamma;    /**< structure exponent */
			
 
				-    int L;          /**< dynamic range (2^8 - 1)*/
			
 
				-    float K1;       /**< stabilization constant 1 */
			
 
				-    float K2;       /**< stabilization constant 2 */
			
 
				-    int f;          /**< scale factor. 0=default scaling, 1=no scaling */
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Allows fine-grain control of the MS-SSIM algorithm.
			
 
				- */
			
 
				-struct iqa_ms_ssim_args {
			
 
				-    int wang;             /**< 1=original algorithm by Wang, et al. 0=MS-SSIM* by Rouse/Hemami (default). */
			
 
				-    int gaussian;         /**< 1=11x11 Gaussian window (default). 0=8x8 linear window. */
			
 
				-    int scales;           /**< Number of scaled images to use. Default is 5. */
			
 
				-    const float *alphas;  /**< Pointer to array of alpha values for each scale. Required if 'scales' isn't 5. */
			
 
				-    const float *betas;   /**< Pointer to array of beta values for each scale. Required if 'scales' isn't 5. */
			
 
				-    const float *gammas;  /**< Pointer to array of gamma values for each scale. Required if 'scales' isn't 5. */
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Calculates the Mean Squared Error between 2 equal-sized 8-bit images.
			
 
				- * @note The images must have the same width, height, and stride.
			
 
				- * @param ref Original reference image
			
 
				- * @param cmp Distorted image
			
 
				- * @param w Width of the images
			
 
				- * @param h Height of the images
			
 
				- * @param stride The length (in bytes) of each horizontal line in the image.
			
 
				- *               This may be different from the image width.
			
 
				- * @return The MSE.
			
 
				- */
			
 
				-float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
			
 
				-
			
 
				-/**
			
 
				- * Calculates the Peak Signal-to-Noise-Ratio between 2 equal-sized 8-bit
			
 
				- * images.
			
 
				- * @note The images must have the same width, height, and stride.
			
 
				- * @param ref Original reference image
			
 
				- * @param cmp Distorted image
			
 
				- * @param w Width of the images
			
 
				- * @param h Height of the images
			
 
				- * @param stride The length (in bytes) of each horizontal line in the image.
			
 
				- *               This may be different from the image width.
			
 
				- * @return The PSNR.
			
 
				- */
			
 
				-float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride);
			
 
				-
			
 
				-/**
			
 
				- * Calculates the Structural SIMilarity between 2 equal-sized 8-bit images.
			
 
				- *
			
 
				- * See https://ece.uwaterloo.ca/~z70wang/publications/ssim.html
			
 
				- * @note The images must have the same width, height, and stride.
			
 
				- * @param ref Original reference image
			
 
				- * @param cmp Distorted image
			
 
				- * @param w Width of the images
			
 
				- * @param h Height of the images
			
 
				- * @param stride The length (in bytes) of each horizontal line in the image.
			
 
				- *               This may be different from the image width.
			
 
				- * @param gaussian 0 = 8x8 square window, 1 = 11x11 circular-symmetric Gaussian
			
 
				- * weighting.
			
 
				- * @param args Optional SSIM arguments for fine control of the algorithm. 0 for
			
 
				- * defaults. Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
			
 
				- * @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
			
 
				- */
			
 
				-float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride, 
			
 
				-    int gaussian, const struct iqa_ssim_args *args);
			
 
				-
			
 
				-/**
			
 
				- * Calculates the Multi-Scale Structural SIMilarity between 2 equal-sized 8-bit
			
 
				- * images. The default algorithm is MS-SSIM* proposed by Rouse/Hemami 2008.
			
 
				- *
			
 
				- * See https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf and
			
 
				- * http://foulard.ece.cornell.edu/publications/dmr_hvei2008_paper.pdf
			
 
				- *
			
 
				- * @note 1. The images must have the same width, height, and stride.
			
 
				- * @note 2. The minimum image width or height is 2^(scales-1) * filter, where 'filter' is 11
			
 
				- * if a Gaussian window is being used, or 9 otherwise.
			
 
				- * @param ref Original reference image
			
 
				- * @param cmp Distorted image
			
 
				- * @param w Width of the images.
			
 
				- * @param h Height of the images.
			
 
				- * @param stride The length (in bytes) of each horizontal line in the image.
			
 
				- *               This may be different from the image width.
			
 
				- * @param args Optional MS-SSIM arguments for fine control of the algorithm. 0
			
 
				- * for defaults. Defaults are wang=0, scales=5, gaussian=1.
			
 
				- * @return The mean MS-SSIM over the entire image, or INFINITY if error.
			
 
				- */
			
 
				-float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride, 
			
 
				-    const struct iqa_ms_ssim_args *args);
			
 
				-
			
 
				-#endif /*_IQA_H_*/
			
--- a/3rdparty/iqa/include/iqa_os.h
+++ b/3rdparty/iqa/include/iqa_os.h
@@ -1,68 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _OS_H_
			
 
				-#define _OS_H_
			
 
				-
			
 
				-/* Microsoft tends to implement features early, but they have a high legacy
			
 
				- * cost because they won't break existing implementations. As such, certain
			
 
				- * features we take for granted on other platforms (like C99) aren't fully
			
 
				- * implemented. This file is meant to rectify that.
			
 
				- */
			
 
				-
			
 
				-#ifdef WIN32
			
 
				-
			
 
				-#include <windows.h>
			
 
				-#include <math.h>
			
 
				-
			
 
				-#define IQA_INLINE __inline
			
 
				-
			
 
				-#ifndef INFINITY
			
 
				-    #define INFINITY (float)HUGE_VAL /**< Defined in C99 (Windows is C89) */
			
 
				-#endif /*INFINITY*/
			
 
				-
			
 
				-#ifndef NAN
			
 
				-    static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
			
 
				-    #define NAN (*(const float *) __nan) /**< Defined in C99 (Windows is C99) */
			
 
				-#endif
			
 
				-
			
 
				-#define IQA_EXPORT __declspec(dllexport)
			
 
				-
			
 
				-#else /* !Windows */
			
 
				-
			
 
				-#define IQA_INLINE inline
			
 
				-#define IQA_EXPORT
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-#endif /* _OS_H_ */
			
--- a/3rdparty/iqa/include/math_utils.h
+++ b/3rdparty/iqa/include/math_utils.h
@@ -1,64 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _MATH_UTILS_H_
			
 
				-#define _MATH_UTILS_H_
			
 
				-
			
 
				-#include "iqa_os.h"
			
 
				-#include <math.h>
			
 
				-
			
 
				-/**
			
 
				- * Rounds a float to the nearest integer.
			
 
				- */
			
 
				-IQA_EXPORT int _round(float a);
			
 
				-
			
 
				-IQA_EXPORT int _max(int x, int y);
			
 
				-
			
 
				-IQA_EXPORT int _min(int x, int y);
			
 
				-
			
 
				-
			
 
				-/** 
			
 
				- * Compares 2 floats to the specified digit of precision.
			
 
				- * @return 0 if equal, 1 otherwise.
			
 
				- */
			
 
				-IQA_EXPORT int _cmp_float(float a, float b, int digits);
			
 
				-
			
 
				-
			
 
				-/** 
			
 
				- * Compares 2 matrices with the specified precision. 'b' is assumed to be the
			
 
				- * same size as 'a' or smaller.
			
 
				- * @return 0 if equal, 1 otherwise
			
 
				- */
			
 
				-IQA_EXPORT int _matrix_cmp(const float *a, const float *b, int w, int h, int digits);
			
 
				-
			
 
				-#endif /*_MATH_UTILS_H_*/
			
--- a/3rdparty/iqa/include/ssim.h
+++ b/3rdparty/iqa/include/ssim.h
@@ -1,117 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#ifndef _SSIM_H_
			
 
				-#define _SSIM_H_
			
 
				-
			
 
				-#include "convolve.h"
			
 
				-
			
 
				-/*
			
 
				- * Circular-symmetric Gaussian weighting.
			
 
				- * h(x,y) = hg(x,y)/SUM(SUM(hg)) , for normalization to 1.0
			
 
				- * hg(x,y) = e^( -0.5*( (x^2+y^2)/sigma^2 ) ) , where sigma was 1.5
			
 
				- */
			
 
				-#define GAUSSIAN_LEN 11
			
 
				-static const float g_gaussian_window[GAUSSIAN_LEN][GAUSSIAN_LEN] = {
			
 
				-    {0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
			
 
				-    {0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
			
 
				-    {0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
			
 
				-    {0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
			
 
				-    {0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
			
 
				-    {0.000274f, 0.002021f, 0.009577f, 0.029091f, 0.056662f, 0.070762f, 0.056662f, 0.029091f, 0.009577f, 0.002021f, 0.000274f},
			
 
				-    {0.000219f, 0.001619f, 0.007668f, 0.023294f, 0.045371f, 0.056662f, 0.045371f, 0.023294f, 0.007668f, 0.001619f, 0.000219f},
			
 
				-    {0.000112f, 0.000831f, 0.003937f, 0.011960f, 0.023294f, 0.029091f, 0.023294f, 0.011960f, 0.003937f, 0.000831f, 0.000112f},
			
 
				-    {0.000037f, 0.000274f, 0.001296f, 0.003937f, 0.007668f, 0.009577f, 0.007668f, 0.003937f, 0.001296f, 0.000274f, 0.000037f},
			
 
				-    {0.000008f, 0.000058f, 0.000274f, 0.000831f, 0.001619f, 0.002021f, 0.001619f, 0.000831f, 0.000274f, 0.000058f, 0.000008f},
			
 
				-    {0.000001f, 0.000008f, 0.000037f, 0.000112f, 0.000219f, 0.000274f, 0.000219f, 0.000112f, 0.000037f, 0.000008f, 0.000001f},
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Equal weight square window.
			
 
				- * Each pixel is equally weighted (1/64) so that SUM(x) = 1.0
			
 
				- */
			
 
				-#define SQUARE_LEN 8
			
 
				-static const float g_square_window[SQUARE_LEN][SQUARE_LEN] = {
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-    {0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f, 0.015625f},
			
 
				-};
			
 
				-
			
 
				-/* Holds intermediate SSIM values for map-reduce operation. */
			
 
				-struct _ssim_int {
			
 
				-    double l;
			
 
				-    double c;
			
 
				-    double s;
			
 
				-};
			
 
				-
			
 
				-/* Defines the pointers to the map-reduce functions. */
			
 
				-typedef int (*_map)(const struct _ssim_int *, void *);
			
 
				-typedef float (*_reduce)(int, int, void *);
			
 
				-
			
 
				-/* Arguments for map-reduce. The 'context' is user-defined. */
			
 
				-struct _map_reduce {
			
 
				-    _map map;
			
 
				-    _reduce reduce;
			
 
				-    void *context;
			
 
				-};
			
 
				-
			
 
				-/**
			
 
				- * Private method that calculates the SSIM value on a pre-processed image.
			
 
				- *
			
 
				- * The input images must have stride==width. This method does not scale.
			
 
				- *
			
 
				- * @note Image buffers are modified.
			
 
				- *
			
 
				- * Map-reduce is used for doing the final SSIM calculation. The map function is
			
 
				- * called for every pixel, and the reduce is called at the end. The context is
			
 
				- * caller-defined and *not* modified by this method.
			
 
				- *
			
 
				- * @param ref Original reference image
			
 
				- * @param cmp Distorted image
			
 
				- * @param w Width of the images
			
 
				- * @param h Height of the images
			
 
				- * @param k The kernel used as the window function
			
 
				- * @param mr Optional map-reduce functions to use to calculate SSIM. Required
			
 
				- *           if 'args' is not null. Ignored if 'args' is null.
			
 
				- * @param args Optional SSIM arguments for fine control of the algorithm. 0 for defaults.
			
 
				- *             Defaults are a=b=g=1.0, L=255, K1=0.01, K2=0.03
			
 
				- * @return The mean SSIM over the entire image (MSSIM), or INFINITY if error.
			
 
				- */
			
 
				-float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args);
			
 
				-
			
 
				-#endif /* _SSIM_H_ */
			
--- a/3rdparty/iqa/source/convolve.c
+++ b/3rdparty/iqa/source/convolve.c
@@ -1,195 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "convolve.h"
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-float KBND_SYMMETRIC(const float *img, int w, int h, int x, int y, float bnd_const)
			
 
				-{
			
 
				-    (void)bnd_const;
			
 
				-    if (x<0) x=-1-x;
			
 
				-    else if (x>=w) x=(w-(x-w))-1;
			
 
				-    if (y<0) y=-1-y;
			
 
				-    else if (y>=h) y=(h-(y-h))-1;
			
 
				-    return img[y*w + x];
			
 
				-}
			
 
				-
			
 
				-float KBND_REPLICATE(const float *img, int w, int h, int x, int y, float bnd_const)
			
 
				-{
			
 
				-    (void)bnd_const;
			
 
				-    if (x<0) x=0;
			
 
				-    if (x>=w) x=w-1;
			
 
				-    if (y<0) y=0;
			
 
				-    if (y>=h) y=h-1;
			
 
				-    return img[y*w + x];
			
 
				-}
			
 
				-
			
 
				-float KBND_CONSTANT(const float *img, int w, int h, int x, int y, float bnd_const)
			
 
				-{
			
 
				-    if (x<0) x=0;
			
 
				-    if (y<0) y=0;
			
 
				-    if (x>=w || y>=h)
			
 
				-        return bnd_const;
			
 
				-    return img[y*w + x];
			
 
				-}
			
 
				-
			
 
				-static float _calc_scale(const struct _kernel *k)
			
 
				-{
			
 
				-    int ii,k_len;
			
 
				-    double sum=0.0;
			
 
				-
			
 
				-    if (k->normalized)
			
 
				-        return 1.0f;
			
 
				-    else {
			
 
				-        k_len = k->w * k->h;
			
 
				-        for (ii=0; ii<k_len; ++ii)
			
 
				-            sum += k->kernel[ii];
			
 
				-        if (sum != 0.0)
			
 
				-            return (float)(1.0 / sum);
			
 
				-        return 1.0f;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-void _iqa_convolve(float *img, int w, int h, const struct _kernel *k, float *result, int *rw, int *rh)
			
 
				-{
			
 
				-    int x,y,kx,ky,u,v;
			
 
				-    int uc = k->w/2;
			
 
				-    int vc = k->h/2;
			
 
				-    int kw_even = (k->w&1)?0:1;
			
 
				-    int kh_even = (k->h&1)?0:1;
			
 
				-    int dst_w = w - k->w + 1;
			
 
				-    int dst_h = h - k->h + 1;
			
 
				-    int img_offset,k_offset;
			
 
				-    double sum;
			
 
				-    float scale, *dst=result;
			
 
				-
			
 
				-    if (!dst)
			
 
				-        dst = img; /* Convolve in-place */
			
 
				-
			
 
				-    /* Kernel is applied to all positions where the kernel is fully contained
			
 
				-     * in the image */
			
 
				-    scale = _calc_scale(k);
			
 
				-    for (y=0; y < dst_h; ++y) {
			
 
				-        for (x=0; x < dst_w; ++x) {
			
 
				-            sum = 0.0;
			
 
				-            k_offset = 0;
			
 
				-            ky = y+vc;
			
 
				-            kx = x+uc;
			
 
				-            for (v=-vc; v <= vc-kh_even; ++v) {
			
 
				-                img_offset = (ky+v)*w + kx;
			
 
				-                for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
			
 
				-                    sum += img[img_offset+u] * k->kernel[k_offset];
			
 
				-                }
			
 
				-            }
			
 
				-            dst[y*dst_w + x] = (float)(sum * scale);
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    if (rw) *rw = dst_w;
			
 
				-    if (rh) *rh = dst_h;
			
 
				-}
			
 
				-
			
 
				-int _iqa_img_filter(float *img, int w, int h, const struct _kernel *k, float *result)
			
 
				-{
			
 
				-    int x,y;
			
 
				-    int img_offset;
			
 
				-    float scale, *dst=result;
			
 
				-
			
 
				-    if (!k || !k->bnd_opt)
			
 
				-        return 1;
			
 
				-
			
 
				-    if (!dst) {
			
 
				-        dst = (float*)malloc(w*h*sizeof(float));
			
 
				-        if (!dst)
			
 
				-            return 2;
			
 
				-    }
			
 
				-
			
 
				-    scale = _calc_scale(k);
			
 
				-
			
 
				-    /* Kernel is applied to all positions where top-left corner is in the image */
			
 
				-    for (y=0; y < h; ++y) {
			
 
				-        for (x=0; x < w; ++x) {
			
 
				-            dst[y*w + x] = _iqa_filter_pixel(img, w, h, x, y, k, scale);
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    /* If no result buffer given, copy results to image buffer */
			
 
				-    if (!result) {
			
 
				-        for (y=0; y<h; ++y) {
			
 
				-            img_offset = y*w;
			
 
				-            for (x=0; x<w; ++x, ++img_offset) {
			
 
				-                img[img_offset] = dst[img_offset];
			
 
				-            }
			
 
				-        }
			
 
				-        free(dst);
			
 
				-    }
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				-float _iqa_filter_pixel(const float *img, int w, int h, int x, int y, const struct _kernel *k, const float kscale)
			
 
				-{
			
 
				-    int u,v,uc,vc;
			
 
				-    int kw_even,kh_even;
			
 
				-    int x_edge_left,x_edge_right,y_edge_top,y_edge_bottom;
			
 
				-    int edge,img_offset,k_offset;
			
 
				-    double sum;
			
 
				-
			
 
				-    if (!k)
			
 
				-        return img[y*w + x];
			
 
				-
			
 
				-    uc = k->w/2;
			
 
				-    vc = k->h/2;
			
 
				-    kw_even = (k->w&1)?0:1;
			
 
				-    kh_even = (k->h&1)?0:1;
			
 
				-    x_edge_left  = uc;
			
 
				-    x_edge_right = w-uc;
			
 
				-    y_edge_top = vc;
			
 
				-    y_edge_bottom = h-vc;
			
 
				-
			
 
				-    edge = 0;
			
 
				-    if (x < x_edge_left || y < y_edge_top || x >= x_edge_right || y >= y_edge_bottom)
			
 
				-        edge = 1;
			
 
				-
			
 
				-    sum = 0.0;
			
 
				-    k_offset = 0;
			
 
				-    for (v=-vc; v <= vc-kh_even; ++v) {
			
 
				-        img_offset = (y+v)*w + x;
			
 
				-        for (u=-uc; u <= uc-kw_even; ++u, ++k_offset) {
			
 
				-            if (!edge)
			
 
				-                sum += img[img_offset+u] * k->kernel[k_offset];
			
 
				-            else
			
 
				-                sum += k->bnd_opt(img, w, h, x+u, y+v, k->bnd_const) * k->kernel[k_offset];
			
 
				-        }
			
 
				-    }
			
 
				-    return (float)(sum * kscale);
			
 
				-}
			
--- a/3rdparty/iqa/source/decimate.c
+++ b/3rdparty/iqa/source/decimate.c
@@ -1,59 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "decimate.h"
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-int _iqa_decimate(float *img, int w, int h, int factor, const struct _kernel *k, float *result, int *rw, int *rh)
			
 
				-{
			
 
				-    int x,y;
			
 
				-    int sw = w/factor + (w&1);
			
 
				-    int sh = h/factor + (h&1);
			
 
				-    int dst_offset;
			
 
				-    float *dst=img;
			
 
				-
			
 
				-    if (result)
			
 
				-        dst = result;
			
 
				-
			
 
				-    /* Downsample */
			
 
				-    for (y=0; y<sh; ++y) {
			
 
				-        dst_offset = y*sw;
			
 
				-        for (x=0; x<sw; ++x,++dst_offset) {
			
 
				-            dst[dst_offset] = _iqa_filter_pixel(img, w, h, x*factor, y*factor, k, 1.0f);
			
 
				-        }
			
 
				-    }
			
 
				-    
			
 
				-    if (rw) *rw = sw;
			
 
				-    if (rh) *rh = sh;
			
 
				-    return 0;
			
 
				-}
			
--- a/3rdparty/iqa/source/math_utils.c
+++ b/3rdparty/iqa/source/math_utils.c
@@ -1,82 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "math_utils.h"
			
 
				-#include <math.h>
			
 
				-
			
 
				-int _round(float a)
			
 
				-{
			
 
				-    int sign_a = a > 0.0f ? 1 : -1;
			
 
				-    return a-(int)a >= 0.5 ? (int)a + sign_a : (int)a;
			
 
				-}
			
 
				-
			
 
				-int _max(int x, int y)
			
 
				-{
			
 
				-    return x >= y ? x : y;
			
 
				-}
			
 
				-
			
 
				-int _min(int x, int y)
			
 
				-{
			
 
				-    return x <= y ? x : y;
			
 
				-}
			
 
				-
			
 
				-int _cmp_float(float a, float b, int digits)
			
 
				-{
			
 
				-    /* Round */
			
 
				-    int sign_a = a > 0.0f ? 1 : -1;
			
 
				-    int sign_b = b > 0.0f ? 1 : -1;
			
 
				-    double scale = pow(10.0, (double)digits);
			
 
				-    double ax = a * scale;
			
 
				-    double bx = b * scale;
			
 
				-    int ai = ax-(int)ax >= 0.5 ? (int)ax + sign_a : (int)ax;
			
 
				-    int bi = bx-(int)bx >= 0.5 ? (int)bx + sign_b : (int)bx;
			
 
				-
			
 
				-    /* Compare */
			
 
				-    return ai == bi ? 0 : 1;
			
 
				-}
			
 
				-
			
 
				-int _matrix_cmp(const float *a, const float *b, int w, int h, int digits)
			
 
				-{
			
 
				-    int offset;
			
 
				-    int result=0;
			
 
				-    int len=w*h;
			
 
				-    for (offset=0; offset<len; ++offset) {
			
 
				-        if (_cmp_float(a[offset], b[offset], digits)) {
			
 
				-            result = 1;
			
 
				-            break;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
--- a/3rdparty/iqa/source/ms_ssim.c
+++ b/3rdparty/iqa/source/ms_ssim.c
@@ -1,277 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "iqa.h"
			
 
				-#include "ssim.h"
			
 
				-#include "decimate.h"
			
 
				-#include <math.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-/* Default number of scales */
			
 
				-#define SCALES  5
			
 
				-
			
 
				-/* Low-pass filter for down-sampling (9/7 biorthogonal wavelet filter) */
			
 
				-#define LPF_LEN 9
			
 
				-static const float g_lpf[LPF_LEN][LPF_LEN] = {
			
 
				-   { 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
			
 
				-   {-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
			
 
				-   {-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
			
 
				-   { 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
			
 
				-   { 0.016114f,-0.010146f,-0.047149f, 0.160885f, 0.363505f, 0.160885f,-0.047149f,-0.010146f, 0.016114f},
			
 
				-   { 0.007132f,-0.004490f,-0.020867f, 0.071207f, 0.160885f, 0.071207f,-0.020867f,-0.004490f, 0.007132f},
			
 
				-   {-0.002090f, 0.001316f, 0.006115f,-0.020867f,-0.047149f,-0.020867f, 0.006115f, 0.001316f,-0.002090f},
			
 
				-   {-0.000450f, 0.000283f, 0.001316f,-0.004490f,-0.010146f,-0.004490f, 0.001316f, 0.000283f,-0.000450f},
			
 
				-   { 0.000714f,-0.000450f,-0.002090f, 0.007132f, 0.016114f, 0.007132f,-0.002090f,-0.000450f, 0.000714f},
			
 
				-};
			
 
				-
			
 
				-/* Alpha, beta, and gamma values for each scale */
			
 
				-static float g_alphas[] = { 0.0000f, 0.0000f, 0.0000f, 0.0000f, 0.1333f };
			
 
				-static float g_betas[]  = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
			
 
				-static float g_gammas[] = { 0.0448f, 0.2856f, 0.3001f, 0.2363f, 0.1333f };
			
 
				-
			
 
				-
			
 
				-struct _context {
			
 
				-    double l;  /* Luminance */
			
 
				-    double c;  /* Contrast */
			
 
				-    double s;  /* Structure */
			
 
				-    float alpha;
			
 
				-    float beta;
			
 
				-    float gamma;
			
 
				-};
			
 
				-
			
 
				-/* Called for each pixel */
			
 
				-int _ms_ssim_map(const struct _ssim_int *si, void *ctx)
			
 
				-{
			
 
				-    struct _context *ms_ctx = (struct _context*)ctx;
			
 
				-    ms_ctx->l += si->l;
			
 
				-    ms_ctx->c += si->c;
			
 
				-    ms_ctx->s += si->s;
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				-/* Called to calculate the final result */
			
 
				-float _ms_ssim_reduce(int w, int h, void *ctx)
			
 
				-{
			
 
				-    double size = (double)(w*h);
			
 
				-    struct _context *ms_ctx = (struct _context*)ctx;
			
 
				-    ms_ctx->l = pow(ms_ctx->l / size, (double)ms_ctx->alpha);
			
 
				-    ms_ctx->c = pow(ms_ctx->c / size, (double)ms_ctx->beta);
			
 
				-    ms_ctx->s = pow(fabs(ms_ctx->s / size), (double)ms_ctx->gamma);
			
 
				-    return (float)(ms_ctx->l * ms_ctx->c * ms_ctx->s);
			
 
				-}
			
 
				-
			
 
				-/* Releases the scaled buffers */
			
 
				-void _free_buffers(float **buf, int scales)
			
 
				-{
			
 
				-    int idx;
			
 
				-    for (idx=0; idx<scales; ++idx)
			
 
				-        free(buf[idx]);
			
 
				-}
			
 
				-
			
 
				-/* Allocates the scaled buffers. If error, all buffers are free'd */
			
 
				-int _alloc_buffers(float **buf, int w, int h, int scales)
			
 
				-{
			
 
				-    int idx;
			
 
				-    int cur_w = w;
			
 
				-    int cur_h = h;
			
 
				-    for (idx=0; idx<scales; ++idx) {
			
 
				-        buf[idx] = (float*)malloc(cur_w*cur_h*sizeof(float));
			
 
				-        if (!buf[idx]) {
			
 
				-            _free_buffers(buf, idx);
			
 
				-            return 1;
			
 
				-        }
			
 
				-        cur_w = cur_w/2 + (cur_w&1);
			
 
				-        cur_h = cur_h/2 + (cur_h&1);
			
 
				-    }
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * MS_SSIM(X,Y) = Lm(x,y)^aM * MULT[j=1->M]( Cj(x,y)^bj  *  Sj(x,y)^gj )
			
 
				- * where,
			
 
				- *  L = mean
			
 
				- *  C = variance
			
 
				- *  S = cross-correlation
			
 
				- *
			
 
				- *  b1=g1=0.0448, b2=g2=0.2856, b3=g3=0.3001, b4=g4=0.2363, a5=b5=g5=0.1333
			
 
				- */
			
 
				-float iqa_ms_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, 
			
 
				-    int stride, const struct iqa_ms_ssim_args *args)
			
 
				-{
			
 
				-    int wang=0;
			
 
				-    int scales=SCALES;
			
 
				-    int gauss=1;
			
 
				-    const float *alphas=g_alphas, *betas=g_betas, *gammas=g_gammas;
			
 
				-    int idx,x,y,cur_w,cur_h;
			
 
				-    int offset,src_offset;
			
 
				-    float **ref_imgs, **cmp_imgs; /* Array of pointers to scaled images */
			
 
				-    float msssim;
			
 
				-    struct _kernel lpf, window;
			
 
				-    struct iqa_ssim_args s_args;
			
 
				-    struct _map_reduce mr;
			
 
				-    struct _context ms_ctx;
			
 
				-
			
 
				-    if (args) {
			
 
				-        wang   = args->wang;
			
 
				-        gauss  = args->gaussian;
			
 
				-        scales = args->scales;
			
 
				-        if (args->alphas)
			
 
				-            alphas = args->alphas;
			
 
				-        if (args->betas)
			
 
				-            betas  = args->betas;
			
 
				-        if (args->gammas)
			
 
				-            gammas = args->gammas;
			
 
				-    }
			
 
				-
			
 
				-    /* Make sure we won't scale below 1x1 */
			
 
				-    cur_w = w;
			
 
				-    cur_h = h;
			
 
				-    for (idx=0; idx<scales; ++idx) {
			
 
				-        if ( gauss ? cur_w<GAUSSIAN_LEN || cur_h<GAUSSIAN_LEN : cur_w<LPF_LEN || cur_h<LPF_LEN )
			
 
				-            return INFINITY;
			
 
				-        cur_w /= 2;
			
 
				-        cur_h /= 2;
			
 
				-    }
			
 
				-
			
 
				-    window.kernel = (float*)g_square_window;
			
 
				-    window.w = window.h = SQUARE_LEN;
			
 
				-    window.normalized = 1;
			
 
				-    window.bnd_opt = KBND_SYMMETRIC;
			
 
				-    if (gauss) {
			
 
				-        window.kernel = (float*)g_gaussian_window;
			
 
				-        window.w = window.h = GAUSSIAN_LEN;
			
 
				-    }
			
 
				-
			
 
				-    mr.map     = _ms_ssim_map;
			
 
				-    mr.reduce  = _ms_ssim_reduce;
			
 
				-
			
 
				-    /* Allocate the scaled image buffers */
			
 
				-    ref_imgs = (float**)malloc(scales*sizeof(float*));
			
 
				-    cmp_imgs = (float**)malloc(scales*sizeof(float*));
			
 
				-    if (!ref_imgs || !cmp_imgs) {
			
 
				-        if (ref_imgs) free(ref_imgs);
			
 
				-        if (cmp_imgs) free(cmp_imgs);
			
 
				-        return INFINITY;
			
 
				-    }
			
 
				-    if (_alloc_buffers(ref_imgs, w, h, scales)) {
			
 
				-        free(ref_imgs);
			
 
				-        free(cmp_imgs);
			
 
				-        return INFINITY;
			
 
				-    }
			
 
				-    if (_alloc_buffers(cmp_imgs, w, h, scales)) {
			
 
				-        _free_buffers(ref_imgs, scales);
			
 
				-        free(ref_imgs);
			
 
				-        free(cmp_imgs);
			
 
				-        return INFINITY;
			
 
				-    }
			
 
				-
			
 
				-    /* Copy original images into first scale buffer, forcing stride = width. */
			
 
				-    for (y=0; y<h; ++y) {
			
 
				-        src_offset = y*stride;
			
 
				-        offset = y*w;
			
 
				-        for (x=0; x<w; ++x, ++offset, ++src_offset) {
			
 
				-            ref_imgs[0][offset] = (float)ref[src_offset];
			
 
				-            cmp_imgs[0][offset] = (float)cmp[src_offset];
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    /* Create scaled versions of the images */
			
 
				-    cur_w=w;
			
 
				-    cur_h=h;
			
 
				-    lpf.kernel = (float*)g_lpf;
			
 
				-    lpf.w = lpf.h = LPF_LEN;
			
 
				-    lpf.normalized = 1;
			
 
				-    lpf.bnd_opt = KBND_SYMMETRIC;
			
 
				-    for (idx=1; idx<scales; ++idx) {
			
 
				-        if (_iqa_decimate(ref_imgs[idx-1], cur_w, cur_h, 2, &lpf, ref_imgs[idx], 0, 0) ||
			
 
				-            _iqa_decimate(cmp_imgs[idx-1], cur_w, cur_h, 2, &lpf, cmp_imgs[idx], &cur_w, &cur_h))
			
 
				-        {
			
 
				-            _free_buffers(ref_imgs, scales);
			
 
				-            _free_buffers(cmp_imgs, scales);
			
 
				-            free(ref_imgs);
			
 
				-            free(cmp_imgs);
			
 
				-            return INFINITY;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    cur_w=w;
			
 
				-    cur_h=h;
			
 
				-    msssim = 1.0;
			
 
				-    for (idx=0; idx<scales; ++idx) {
			
 
				-
			
 
				-        ms_ctx.l = 0;
			
 
				-        ms_ctx.c = 0;
			
 
				-        ms_ctx.s = 0;
			
 
				-        ms_ctx.alpha = alphas[idx];
			
 
				-        ms_ctx.beta  = betas[idx];
			
 
				-        ms_ctx.gamma = gammas[idx];
			
 
				-
			
 
				-        if (!wang) {
			
 
				-            /* MS-SSIM* (Rouse/Hemami) */
			
 
				-            s_args.alpha = 1.0f;
			
 
				-            s_args.beta  = 1.0f;
			
 
				-            s_args.gamma = 1.0f;
			
 
				-            s_args.K1 = 0.0f; /* Force stabilization constants to 0 */
			
 
				-            s_args.K2 = 0.0f;
			
 
				-            s_args.L  = 255;
			
 
				-            s_args.f  = 1; /* Don't resize */
			
 
				-            mr.context = &ms_ctx;
			
 
				-            msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
			
 
				-        }
			
 
				-        else {
			
 
				-            /* MS-SSIM (Wang) */
			
 
				-            s_args.alpha = 1.0f;
			
 
				-            s_args.beta  = 1.0f;
			
 
				-            s_args.gamma = 1.0f;
			
 
				-            s_args.K1 = 0.01f;
			
 
				-            s_args.K2 = 0.03f;
			
 
				-            s_args.L  = 255;
			
 
				-            s_args.f  = 1; /* Don't resize */
			
 
				-            mr.context = &ms_ctx;
			
 
				-            msssim *= _iqa_ssim(ref_imgs[idx], cmp_imgs[idx], cur_w, cur_h, &window, &mr, &s_args);
			
 
				-        }
			
 
				-
			
 
				-        if (msssim == INFINITY)
			
 
				-            break;
			
 
				-        cur_w = cur_w/2 + (cur_w&1);
			
 
				-        cur_h = cur_h/2 + (cur_h&1);
			
 
				-    }
			
 
				-
			
 
				-    _free_buffers(ref_imgs, scales);
			
 
				-    _free_buffers(cmp_imgs, scales);
			
 
				-    free(ref_imgs);
			
 
				-    free(cmp_imgs);
			
 
				-
			
 
				-    return msssim;
			
 
				-}
			
--- a/3rdparty/iqa/source/mse.c
+++ b/3rdparty/iqa/source/mse.c
@@ -1,50 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "iqa.h"
			
 
				-
			
 
				-/* MSE(a,b) = 1/N * SUM((a-b)^2) */
			
 
				-float iqa_mse(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
			
 
				-{
			
 
				-    int error, offset;
			
 
				-    unsigned long long sum=0;
			
 
				-    int ww,hh;
			
 
				-    for (hh=0; hh<h; ++hh) {
			
 
				-        offset = hh*stride;
			
 
				-        for (ww=0; ww<w; ++ww, ++offset) {
			
 
				-            error = ref[offset] - cmp[offset];
			
 
				-            sum += error * error;
			
 
				-        }
			
 
				-    }
			
 
				-    return (float)( (double)sum / (double)(w*h) );
			
 
				-}
			
--- a/3rdparty/iqa/source/psnr.c
+++ b/3rdparty/iqa/source/psnr.c
@@ -1,42 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "iqa.h"
			
 
				-#include <math.h>
			
 
				-
			
 
				-/* PSNR(a,b) = 10*log10(L^2 / MSE(a,b)), where L=2^b - 1 (8bit = 255) */
			
 
				-float iqa_psnr(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride)
			
 
				-{
			
 
				-    const int L_sqd = 255 * 255;
			
 
				-    return (float)( 10.0 * log10( L_sqd / iqa_mse(ref,cmp,w,h,stride) ) );
			
 
				-}
			
--- a/3rdparty/iqa/source/ssim.c
+++ b/3rdparty/iqa/source/ssim.c
@@ -1,322 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2011, Tom Distler (http://tdistler.com)
			
 
				- * All rights reserved.
			
 
				- *
			
 
				- * The BSD License
			
 
				- *
			
 
				- * Redistribution and use in source and binary forms, with or without
			
 
				- * modification, are permitted provided that the following conditions are met:
			
 
				- *
			
 
				- * - Redistributions of source code must retain the above copyright notice, 
			
 
				- *   this list of conditions and the following disclaimer.
			
 
				- *
			
 
				- * - Redistributions in binary form must reproduce the above copyright notice,
			
 
				- *   this list of conditions and the following disclaimer in the documentation
			
 
				- *   and/or other materials provided with the distribution.
			
 
				- *
			
 
				- * - Neither the name of the tdistler.com nor the names of its contributors may
			
 
				- *   be used to endorse or promote products derived from this software without
			
 
				- *   specific prior written permission.
			
 
				- *
			
 
				- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
			
 
				- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
			
 
				- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
			
 
				- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
			
 
				- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
			
 
				- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
			
 
				- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
			
 
				- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
			
 
				- * POSSIBILITY OF SUCH DAMAGE.
			
 
				- */
			
 
				-
			
 
				-#include "iqa.h"
			
 
				-#include "convolve.h"
			
 
				-#include "decimate.h"
			
 
				-#include "math_utils.h"
			
 
				-#include "ssim.h"
			
 
				-#include <stdlib.h>
			
 
				-#include <math.h>
			
 
				-
			
 
				-
			
 
				-/* Forward declarations. */
			
 
				-IQA_INLINE static double _calc_luminance(float, float, float, float);
			
 
				-IQA_INLINE static double _calc_contrast(double, float, float, float, float);
			
 
				-IQA_INLINE static double _calc_structure(float, double, float, float, float, float);
			
 
				-static int _ssim_map(const struct _ssim_int *, void *);
			
 
				-static float _ssim_reduce(int, int, void *);
			
 
				-
			
 
				-/* 
			
 
				- * SSIM(x,y)=(2*ux*uy + C1)*(2sxy + C2) / (ux^2 + uy^2 + C1)*(sx^2 + sy^2 + C2)
			
 
				- * where,
			
 
				- *  ux = SUM(w*x)
			
 
				- *  sx = (SUM(w*(x-ux)^2)^0.5
			
 
				- *  sxy = SUM(w*(x-ux)*(y-uy))
			
 
				- *
			
 
				- * Returns mean SSIM. MSSIM(X,Y) = 1/M * SUM(SSIM(x,y))
			
 
				- */
			
 
				-float iqa_ssim(const unsigned char *ref, const unsigned char *cmp, int w, int h, int stride,
			
 
				-    int gaussian, const struct iqa_ssim_args *args)
			
 
				-{
			
 
				-    int scale;
			
 
				-    int x,y,src_offset,offset;
			
 
				-    float *ref_f,*cmp_f;
			
 
				-    struct _kernel low_pass;
			
 
				-    struct _kernel window;
			
 
				-    float result;
			
 
				-    double ssim_sum=0.0;
			
 
				-    struct _map_reduce mr;
			
 
				-
			
 
				-    /* Initialize algorithm parameters */
			
 
				-    scale = _max( 1, _round( (float)_min(w,h) / 256.0f ) );
			
 
				-    if (args) {
			
 
				-        if(args->f)
			
 
				-            scale = args->f;
			
 
				-        mr.map     = _ssim_map;
			
 
				-        mr.reduce  = _ssim_reduce;
			
 
				-        mr.context = (void*)&ssim_sum;
			
 
				-    }
			
 
				-    window.kernel = (float*)g_square_window;
			
 
				-    window.w = window.h = SQUARE_LEN;
			
 
				-    window.normalized = 1;
			
 
				-    window.bnd_opt = KBND_SYMMETRIC;
			
 
				-    if (gaussian) {
			
 
				-        window.kernel = (float*)g_gaussian_window;
			
 
				-        window.w = window.h = GAUSSIAN_LEN;
			
 
				-    }
			
 
				-
			
 
				-    /* Convert image values to floats. Forcing stride = width. */
			
 
				-    ref_f = (float*)malloc(w*h*sizeof(float));
			
 
				-    cmp_f = (float*)malloc(w*h*sizeof(float));
			
 
				-    if (!ref_f || !cmp_f) {
			
 
				-        if (ref_f) free(ref_f);
			
 
				-        if (cmp_f) free(cmp_f);
			
 
				-        return INFINITY;
			
 
				-    }
			
 
				-    for (y=0; y<h; ++y) {
			
 
				-        src_offset = y*stride;
			
 
				-        offset = y*w;
			
 
				-        for (x=0; x<w; ++x, ++offset, ++src_offset) {
			
 
				-            ref_f[offset] = (float)ref[src_offset];
			
 
				-            cmp_f[offset] = (float)cmp[src_offset];
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    /* Scale the images down if required */
			
 
				-    if (scale > 1) {
			
 
				-        /* Generate simple low-pass filter */
			
 
				-        low_pass.kernel = (float*)malloc(scale*scale*sizeof(float));
			
 
				-        if (!low_pass.kernel) {
			
 
				-            free(ref_f);
			
 
				-            free(cmp_f);
			
 
				-            return INFINITY;
			
 
				-        }
			
 
				-        low_pass.w = low_pass.h = scale;
			
 
				-        low_pass.normalized = 0;
			
 
				-        low_pass.bnd_opt = KBND_SYMMETRIC;
			
 
				-        for (offset=0; offset<scale*scale; ++offset)
			
 
				-            low_pass.kernel[offset] = 1.0f/(scale*scale);
			
 
				-
			
 
				-        /* Resample */
			
 
				-        if (_iqa_decimate(ref_f, w, h, scale, &low_pass, 0, 0, 0) ||
			
 
				-            _iqa_decimate(cmp_f, w, h, scale, &low_pass, 0, &w, &h)) { /* Update w/h */
			
 
				-            free(ref_f);
			
 
				-            free(cmp_f);
			
 
				-            free(low_pass.kernel);
			
 
				-            return INFINITY;
			
 
				-        }
			
 
				-        free(low_pass.kernel);
			
 
				-    }
			
 
				-
			
 
				-    result = _iqa_ssim(ref_f, cmp_f, w, h, &window, &mr, args);
			
 
				-    
			
 
				-    free(ref_f);
			
 
				-    free(cmp_f);
			
 
				-
			
 
				-    return result;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/* _iqa_ssim */
			
 
				-float _iqa_ssim(float *ref, float *cmp, int w, int h, const struct _kernel *k, const struct _map_reduce *mr, const struct iqa_ssim_args *args)
			
 
				-{
			
 
				-    float alpha=1.0f, beta=1.0f, gamma=1.0f;
			
 
				-    int L=255;
			
 
				-    float K1=0.01f, K2=0.03f;
			
 
				-    float C1,C2,C3;
			
 
				-    int x,y,offset;
			
 
				-    float *ref_mu,*cmp_mu,*ref_sigma_sqd,*cmp_sigma_sqd,*sigma_both;
			
 
				-    double ssim_sum, numerator, denominator;
			
 
				-    double luminance_comp, contrast_comp, structure_comp, sigma_root;
			
 
				-    struct _ssim_int sint;
			
 
				-
			
 
				-    /* Initialize algorithm parameters */
			
 
				-    if (args) {
			
 
				-        if (!mr)
			
 
				-            return INFINITY;
			
 
				-        alpha = args->alpha;
			
 
				-        beta  = args->beta;
			
 
				-        gamma = args->gamma;
			
 
				-        L     = args->L;
			
 
				-        K1    = args->K1;
			
 
				-        K2    = args->K2;
			
 
				-    }
			
 
				-    C1 = (K1*L)*(K1*L);
			
 
				-    C2 = (K2*L)*(K2*L);
			
 
				-    C3 = C2 / 2.0f;
			
 
				-
			
 
				-    ref_mu = (float*)malloc(w*h*sizeof(float));
			
 
				-    cmp_mu = (float*)malloc(w*h*sizeof(float));
			
 
				-    ref_sigma_sqd = (float*)malloc(w*h*sizeof(float));
			
 
				-    cmp_sigma_sqd = (float*)malloc(w*h*sizeof(float));
			
 
				-    sigma_both = (float*)malloc(w*h*sizeof(float));
			
 
				-    if (!ref_mu || !cmp_mu || !ref_sigma_sqd || !cmp_sigma_sqd || !sigma_both) {
			
 
				-        if (ref_mu) free(ref_mu);
			
 
				-        if (cmp_mu) free(cmp_mu);
			
 
				-        if (ref_sigma_sqd) free(ref_sigma_sqd);
			
 
				-        if (cmp_sigma_sqd) free(cmp_sigma_sqd);
			
 
				-        if (sigma_both) free(sigma_both);
			
 
				-        return INFINITY;
			
 
				-    }
			
 
				-
			
 
				-    /* Calculate mean */
			
 
				-    _iqa_convolve(ref, w, h, k, ref_mu, 0, 0);
			
 
				-    _iqa_convolve(cmp, w, h, k, cmp_mu, 0, 0);
			
 
				-
			
 
				-    for (y=0; y<h; ++y) {
			
 
				-        offset = y*w;
			
 
				-        for (x=0; x<w; ++x, ++offset) {
			
 
				-            ref_sigma_sqd[offset] = ref[offset] * ref[offset];
			
 
				-            cmp_sigma_sqd[offset] = cmp[offset] * cmp[offset];
			
 
				-            sigma_both[offset] = ref[offset] * cmp[offset];
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    /* Calculate sigma */
			
 
				-    _iqa_convolve(ref_sigma_sqd, w, h, k, 0, 0, 0);
			
 
				-    _iqa_convolve(cmp_sigma_sqd, w, h, k, 0, 0, 0);
			
 
				-    _iqa_convolve(sigma_both, w, h, k, 0, &w, &h); /* Update the width and height */
			
 
				-
			
 
				-    /* The convolution results are smaller by the kernel width and height */
			
 
				-    for (y=0; y<h; ++y) {
			
 
				-        offset = y*w;
			
 
				-        for (x=0; x<w; ++x, ++offset) {
			
 
				-            ref_sigma_sqd[offset] -= ref_mu[offset] * ref_mu[offset];
			
 
				-            cmp_sigma_sqd[offset] -= cmp_mu[offset] * cmp_mu[offset];
			
 
				-            sigma_both[offset] -= ref_mu[offset] * cmp_mu[offset];
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    ssim_sum = 0.0;
			
 
				-    for (y=0; y<h; ++y) {
			
 
				-        offset = y*w;
			
 
				-        for (x=0; x<w; ++x, ++offset) {
			
 
				-
			
 
				-            if (!args) {
			
 
				-                /* The default case */
			
 
				-                numerator   = (2.0 * ref_mu[offset] * cmp_mu[offset] + C1) * (2.0 * sigma_both[offset] + C2);
			
 
				-                denominator = (ref_mu[offset]*ref_mu[offset] + cmp_mu[offset]*cmp_mu[offset] + C1) * 
			
 
				-                    (ref_sigma_sqd[offset] + cmp_sigma_sqd[offset] + C2);
			
 
				-                ssim_sum += numerator / denominator;
			
 
				-            }
			
 
				-            else {
			
 
				-                /* User tweaked alpha, beta, or gamma */
			
 
				-
			
 
				-                /* passing a negative number to sqrt() cause a domain error */
			
 
				-                if (ref_sigma_sqd[offset] < 0.0f)
			
 
				-                    ref_sigma_sqd[offset] = 0.0f;
			
 
				-                if (cmp_sigma_sqd[offset] < 0.0f)
			
 
				-                    cmp_sigma_sqd[offset] = 0.0f;
			
 
				-                sigma_root = sqrt(ref_sigma_sqd[offset] * cmp_sigma_sqd[offset]);
			
 
				-
			
 
				-                luminance_comp = _calc_luminance(ref_mu[offset], cmp_mu[offset], C1, alpha);
			
 
				-                contrast_comp  = _calc_contrast(sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C2, beta);
			
 
				-                structure_comp = _calc_structure(sigma_both[offset], sigma_root, ref_sigma_sqd[offset], cmp_sigma_sqd[offset], C3, gamma);
			
 
				-
			
 
				-                sint.l = luminance_comp;
			
 
				-                sint.c = contrast_comp;
			
 
				-                sint.s = structure_comp;
			
 
				-
			
 
				-                if (mr->map(&sint, mr->context))
			
 
				-                    return INFINITY;
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    free(ref_mu);
			
 
				-    free(cmp_mu);
			
 
				-    free(ref_sigma_sqd);
			
 
				-    free(cmp_sigma_sqd);
			
 
				-    free(sigma_both);
			
 
				-
			
 
				-    if (!args)
			
 
				-        return (float)(ssim_sum / (double)(w*h));
			
 
				-    return mr->reduce(w, h, mr->context);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/* _ssim_map */
			
 
				-int _ssim_map(const struct _ssim_int *si, void *ctx)
			
 
				-{
			
 
				-    double *ssim_sum = (double*)ctx;
			
 
				-    *ssim_sum += si->l * si->c * si->s;
			
 
				-    return 0;
			
 
				-}
			
 
				-
			
 
				-/* _ssim_reduce */
			
 
				-float _ssim_reduce(int w, int h, void *ctx)
			
 
				-{
			
 
				-    double *ssim_sum = (double*)ctx;
			
 
				-    return (float)(*ssim_sum / (double)(w*h));
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/* _calc_luminance */
			
 
				-IQA_INLINE static double _calc_luminance(float mu1, float mu2, float C1, float alpha)
			
 
				-{
			
 
				-    double result;
			
 
				-    float sign;
			
 
				-    /* For MS-SSIM* */
			
 
				-    if (C1 == 0 && mu1*mu1 == 0 && mu2*mu2 == 0)
			
 
				-        return 1.0;
			
 
				-    result = (2.0 * mu1 * mu2 + C1) / (mu1*mu1 + mu2*mu2 + C1);
			
 
				-    if (alpha == 1.0f)
			
 
				-        return result;
			
 
				-    sign = result < 0.0 ? -1.0f : 1.0f;
			
 
				-    return sign * pow(fabs(result),(double)alpha);
			
 
				-}
			
 
				-
			
 
				-/* _calc_contrast */
			
 
				-IQA_INLINE static double _calc_contrast(double sigma_comb_12, float sigma1_sqd, float sigma2_sqd, float C2, float beta)
			
 
				-{
			
 
				-    double result;
			
 
				-    float sign;
			
 
				-    /* For MS-SSIM* */
			
 
				-    if (C2 == 0 && sigma1_sqd + sigma2_sqd == 0)
			
 
				-        return 1.0;
			
 
				-    result = (2.0 * sigma_comb_12 + C2) / (sigma1_sqd + sigma2_sqd + C2);
			
 
				-    if (beta == 1.0f)
			
 
				-        return result;
			
 
				-    sign = result < 0.0 ? -1.0f : 1.0f;
			
 
				-    return sign * pow(fabs(result),(double)beta);
			
 
				-}
			
 
				-
			
 
				-/* _calc_structure */
			
 
				-IQA_INLINE static double _calc_structure(float sigma_12, double sigma_comb_12, float sigma1, float sigma2, float C3, float gamma)
			
 
				-{
			
 
				-    double result;
			
 
				-    float sign;
			
 
				-    /* For MS-SSIM* */
			
 
				-    if (C3 == 0 && sigma_comb_12 == 0) {
			
 
				-        if (sigma1 == 0 && sigma2 == 0)
			
 
				-            return 1.0;
			
 
				-        else if (sigma1 == 0 || sigma2 == 0)
			
 
				-            return 0.0;
			
 
				-    }
			
 
				-    result = (sigma_12 + C3) / (sigma_comb_12 + C3);
			
 
				-    if (gamma == 1.0f)
			
 
				-        return result;
			
 
				-    sign = result < 0.0 ? -1.0f : 1.0f;
			
 
				-    return sign * pow(fabs(result),(double)gamma);
			
 
				-}
			
--- a/3rdparty/libsquish/LICENSE
+++ b/3rdparty/libsquish/LICENSE
@@ -1,20 +0,0 @@
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
--- a/3rdparty/libsquish/README
+++ b/3rdparty/libsquish/README
@@ -1,35 +0,0 @@
 
				-LICENSE
			
 
				--------
			
 
				-
			
 
				-The squish library is distributed under the terms and conditions of the MIT
			
 
				-license. This license is specified at the top of each source file and must be
			
 
				-preserved in its entirety.
			
 
				-
			
 
				-BUILDING AND INSTALLING THE LIBRARY
			
 
				------------------------------------
			
 
				-
			
 
				-If you are using Visual Studio 2003 or above under Windows then load the Visual
			
 
				-Studio 2003 project in the vs7 folder. By default, the library is built using
			
 
				-SSE2 optimisations. To change this either change or remove the SQUISH_USE_SSE=2
			
 
				-from the preprocessor symbols.
			
 
				-
			
 
				-If you are using a Mac then load the Xcode 2.2 project in the distribution. By
			
 
				-default, the library is built using Altivec optimisations. To change this
			
 
				-either change or remove SQUISH_USE_ALTIVEC=1 from the preprocessor symbols. I
			
 
				-guess I'll have to think about changing this for the new Intel Macs that are
			
 
				-rolling out...
			
 
				-
			
 
				-If you are using unix then first edit the config file in the base directory of
			
 
				-the distribution, enabling Altivec or SSE with the USE_ALTIVEC or USE_SSE
			
 
				-variables, and editing the optimisation flags passed to the C++ compiler if
			
 
				-necessary. Then make can be used to build the library, and make install (from
			
 
				-the superuser account) can be used to install (into /usr/local by default).
			
 
				-
			
 
				-REPORTING BUGS OR FEATURE REQUESTS
			
 
				-----------------------------------
			
 
				-
			
 
				-Feedback can be sent to Simon Brown (the developer) at [email protected]
			
 
				-
			
 
				-New releases are announced on the squish library homepage at
			
 
				-http://sjbrown.co.uk/?code=squish
			
 
				-
			
--- a/3rdparty/libsquish/alpha.cpp
+++ b/3rdparty/libsquish/alpha.cpp
@@ -1,350 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "alpha.h"
			
 
				-
			
 
				-#include <climits>
			
 
				-#include <algorithm>
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-static int FloatToInt( float a, int limit )
			
 
				-{
			
 
				-	// use ANSI round-to-zero behaviour to get round-to-nearest
			
 
				-	int i = ( int )( a + 0.5f );
			
 
				-
			
 
				-	// clamp to the limit
			
 
				-	if( i < 0 )
			
 
				-		i = 0;
			
 
				-	else if( i > limit )
			
 
				-		i = limit; 
			
 
				-
			
 
				-	// done
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
			
 
				-{
			
 
				-	u8* bytes = reinterpret_cast< u8* >( block );
			
 
				-	
			
 
				-	// quantise and pack the alpha values pairwise
			
 
				-	for( int i = 0; i < 8; ++i )
			
 
				-	{
			
 
				-		// quantise down to 4 bits
			
 
				-		float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
			
 
				-		float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
			
 
				-		int quant1 = FloatToInt( alpha1, 15 );
			
 
				-		int quant2 = FloatToInt( alpha2, 15 );
			
 
				-		
			
 
				-		// set alpha to zero where masked
			
 
				-		int bit1 = 1 << ( 2*i );
			
 
				-		int bit2 = 1 << ( 2*i + 1 );
			
 
				-		if( ( mask & bit1 ) == 0 )
			
 
				-			quant1 = 0;
			
 
				-		if( ( mask & bit2 ) == 0 )
			
 
				-			quant2 = 0;
			
 
				-
			
 
				-		// pack into the byte
			
 
				-		bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void DecompressAlphaDxt3( u8* rgba, void const* block )
			
 
				-{
			
 
				-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
			
 
				-	
			
 
				-	// unpack the alpha values pairwise
			
 
				-	for( int i = 0; i < 8; ++i )
			
 
				-	{
			
 
				-		// quantise down to 4 bits
			
 
				-		u8 quant = bytes[i];
			
 
				-		
			
 
				-		// unpack the values
			
 
				-		u8 lo = quant & 0x0f;
			
 
				-		u8 hi = quant & 0xf0;
			
 
				-
			
 
				-		// convert back up to bytes
			
 
				-		rgba[8*i + 3] = lo | ( lo << 4 );
			
 
				-		rgba[8*i + 7] = hi | ( hi >> 4 );
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void FixRange( int& min, int& max, int steps )
			
 
				-{
			
 
				-	if( max - min < steps )
			
 
				-		max = std::min( min + steps, 255 );
			
 
				-	if( max - min < steps )
			
 
				-		min = std::max( 0, max - steps );
			
 
				-}
			
 
				-
			
 
				-static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
			
 
				-{
			
 
				-	// fit each alpha value to the codebook
			
 
				-	int err = 0;
			
 
				-	for( int i = 0; i < 16; ++i )
			
 
				-	{
			
 
				-		// check this pixel is valid
			
 
				-		int bit = 1 << i;
			
 
				-		if( ( mask & bit ) == 0 )
			
 
				-		{
			
 
				-			// use the first code
			
 
				-			indices[i] = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-		
			
 
				-		// find the least error and corresponding index
			
 
				-		int value = rgba[4*i + 3];
			
 
				-		int least = INT_MAX;
			
 
				-		int index = 0;
			
 
				-		for( int j = 0; j < 8; ++j )
			
 
				-		{
			
 
				-			// get the squared error from this code
			
 
				-			int dist = ( int )value - ( int )codes[j];
			
 
				-			dist *= dist;
			
 
				-			
			
 
				-			// compare with the best so far
			
 
				-			if( dist < least )
			
 
				-			{
			
 
				-				least = dist;
			
 
				-				index = j;
			
 
				-			}
			
 
				-		}
			
 
				-		
			
 
				-		// save this index and accumulate the error
			
 
				-		indices[i] = ( u8 )index;
			
 
				-		err += least;
			
 
				-	}
			
 
				-	
			
 
				-	// return the total error
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				-static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
			
 
				-{
			
 
				-	u8* bytes = reinterpret_cast< u8* >( block );
			
 
				-	
			
 
				-	// write the first two bytes
			
 
				-	bytes[0] = ( u8 )alpha0;
			
 
				-	bytes[1] = ( u8 )alpha1;
			
 
				-	
			
 
				-	// pack the indices with 3 bits each
			
 
				-	u8* dest = bytes + 2;
			
 
				-	u8 const* src = indices;
			
 
				-	for( int i = 0; i < 2; ++i )
			
 
				-	{
			
 
				-		// pack 8 3-bit values
			
 
				-		int value = 0;
			
 
				-		for( int j = 0; j < 8; ++j )
			
 
				-		{
			
 
				-			int index = *src++;
			
 
				-			value |= ( index << 3*j );
			
 
				-		}
			
 
				-			
			
 
				-		// store in 3 bytes
			
 
				-		for( int j = 0; j < 3; ++j )
			
 
				-		{
			
 
				-			int byte = ( value >> 8*j ) & 0xff;
			
 
				-			*dest++ = ( u8 )byte;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
			
 
				-{
			
 
				-	// check the relative values of the endpoints
			
 
				-	if( alpha0 > alpha1 )
			
 
				-	{
			
 
				-		// swap the indices
			
 
				-		u8 swapped[16];
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-		{
			
 
				-			u8 index = indices[i];
			
 
				-			if( index == 0 )
			
 
				-				swapped[i] = 1;
			
 
				-			else if( index == 1 )
			
 
				-				swapped[i] = 0;
			
 
				-			else if( index <= 5 )
			
 
				-				swapped[i] = 7 - index;
			
 
				-			else 
			
 
				-				swapped[i] = index;
			
 
				-		}
			
 
				-		
			
 
				-		// write the block
			
 
				-		WriteAlphaBlock( alpha1, alpha0, swapped, block );
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// write the block
			
 
				-		WriteAlphaBlock( alpha0, alpha1, indices, block );
			
 
				-	}	
			
 
				-}
			
 
				-
			
 
				-static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
			
 
				-{
			
 
				-	// check the relative values of the endpoints
			
 
				-	if( alpha0 < alpha1 )
			
 
				-	{
			
 
				-		// swap the indices
			
 
				-		u8 swapped[16];
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-		{
			
 
				-			u8 index = indices[i];
			
 
				-			if( index == 0 )
			
 
				-				swapped[i] = 1;
			
 
				-			else if( index == 1 )
			
 
				-				swapped[i] = 0;
			
 
				-			else
			
 
				-				swapped[i] = 9 - index;
			
 
				-		}
			
 
				-		
			
 
				-		// write the block
			
 
				-		WriteAlphaBlock( alpha1, alpha0, swapped, block );
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// write the block
			
 
				-		WriteAlphaBlock( alpha0, alpha1, indices, block );
			
 
				-	}	
			
 
				-}
			
 
				-
			
 
				-void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
			
 
				-{
			
 
				-	// get the range for 5-alpha and 7-alpha interpolation
			
 
				-	int min5 = 255;
			
 
				-	int max5 = 0;
			
 
				-	int min7 = 255;
			
 
				-	int max7 = 0;
			
 
				-	for( int i = 0; i < 16; ++i )
			
 
				-	{
			
 
				-		// check this pixel is valid
			
 
				-		int bit = 1 << i;
			
 
				-		if( ( mask & bit ) == 0 )
			
 
				-			continue;
			
 
				-
			
 
				-		// incorporate into the min/max
			
 
				-		int value = rgba[4*i + 3];
			
 
				-		if( value < min7 )
			
 
				-			min7 = value;
			
 
				-		if( value > max7 )
			
 
				-			max7 = value;
			
 
				-		if( value != 0 && value < min5 )
			
 
				-			min5 = value;
			
 
				-		if( value != 255 && value > max5 )
			
 
				-			max5 = value;
			
 
				-	}
			
 
				-	
			
 
				-	// handle the case that no valid range was found
			
 
				-	if( min5 > max5 )
			
 
				-		min5 = max5;
			
 
				-	if( min7 > max7 )
			
 
				-		min7 = max7;
			
 
				-		
			
 
				-	// fix the range to be the minimum in each case
			
 
				-	FixRange( min5, max5, 5 );
			
 
				-	FixRange( min7, max7, 7 );
			
 
				-	
			
 
				-	// set up the 5-alpha code book
			
 
				-	u8 codes5[8];
			
 
				-	codes5[0] = ( u8 )min5;
			
 
				-	codes5[1] = ( u8 )max5;
			
 
				-	for( int i = 1; i < 5; ++i )
			
 
				-		codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
			
 
				-	codes5[6] = 0;
			
 
				-	codes5[7] = 255;
			
 
				-	
			
 
				-	// set up the 7-alpha code book
			
 
				-	u8 codes7[8];
			
 
				-	codes7[0] = ( u8 )min7;
			
 
				-	codes7[1] = ( u8 )max7;
			
 
				-	for( int i = 1; i < 7; ++i )
			
 
				-		codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
			
 
				-		
			
 
				-	// fit the data to both code books
			
 
				-	u8 indices5[16];
			
 
				-	u8 indices7[16];
			
 
				-	int err5 = FitCodes( rgba, mask, codes5, indices5 );
			
 
				-	int err7 = FitCodes( rgba, mask, codes7, indices7 );
			
 
				-	
			
 
				-	// save the block with least error
			
 
				-	if( err5 <= err7 )
			
 
				-		WriteAlphaBlock5( min5, max5, indices5, block );
			
 
				-	else
			
 
				-		WriteAlphaBlock7( min7, max7, indices7, block );
			
 
				-}
			
 
				-
			
 
				-void DecompressAlphaDxt5( u8* rgba, void const* block )
			
 
				-{
			
 
				-	// get the two alpha values
			
 
				-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
			
 
				-	int alpha0 = bytes[0];
			
 
				-	int alpha1 = bytes[1];
			
 
				-	
			
 
				-	// compare the values to build the codebook
			
 
				-	u8 codes[8];
			
 
				-	codes[0] = ( u8 )alpha0;
			
 
				-	codes[1] = ( u8 )alpha1;
			
 
				-	if( alpha0 <= alpha1 )
			
 
				-	{
			
 
				-		// use 5-alpha codebook
			
 
				-		for( int i = 1; i < 5; ++i )
			
 
				-			codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
			
 
				-		codes[6] = 0;
			
 
				-		codes[7] = 255;
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// use 7-alpha codebook
			
 
				-		for( int i = 1; i < 7; ++i )
			
 
				-			codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
			
 
				-	}
			
 
				-	
			
 
				-	// decode the indices
			
 
				-	u8 indices[16];
			
 
				-	u8 const* src = bytes + 2;
			
 
				-	u8* dest = indices;
			
 
				-	for( int i = 0; i < 2; ++i )
			
 
				-	{
			
 
				-		// grab 3 bytes
			
 
				-		int value = 0;
			
 
				-		for( int j = 0; j < 3; ++j )
			
 
				-		{
			
 
				-			int byte = *src++;
			
 
				-			value |= ( byte << 8*j );
			
 
				-		}
			
 
				-		
			
 
				-		// unpack 8 3-bit values from it
			
 
				-		for( int j = 0; j < 8; ++j )
			
 
				-		{
			
 
				-			int index = ( value >> 3*j ) & 0x7;
			
 
				-			*dest++ = ( u8 )index;
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				-	// write out the indexed codebook values
			
 
				-	for( int i = 0; i < 16; ++i )
			
 
				-		rgba[4*i + 3] = codes[indices[i]];
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/alpha.h
+++ b/3rdparty/libsquish/alpha.h
@@ -1,41 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_ALPHA_H
			
 
				-#define SQUISH_ALPHA_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
			
 
				-void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
			
 
				-
			
 
				-void DecompressAlphaDxt3( u8* rgba, void const* block );
			
 
				-void DecompressAlphaDxt5( u8* rgba, void const* block );
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_ALPHA_H
			
--- a/3rdparty/libsquish/clusterfit.cpp
+++ b/3rdparty/libsquish/clusterfit.cpp
@@ -1,392 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-	Copyright (c) 2007 Ignacio Castano                   [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "clusterfit.h"
			
 
				-#include "colourset.h"
			
 
				-#include "colourblock.h"
			
 
				-#include <cfloat>
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric ) 
			
 
				-  : ColourFit( colours, flags )
			
 
				-{
			
 
				-	// set the iteration count
			
 
				-	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
			
 
				-
			
 
				-	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
			
 
				-	if( metric )
			
 
				-		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
			
 
				-	else
			
 
				-		m_metric = VEC4_CONST( 1.0f );	
			
 
				-
			
 
				-	// initialise the best error
			
 
				-	m_besterror = VEC4_CONST( FLT_MAX );
			
 
				-
			
 
				-	// cache some values
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec3 const* values = m_colours->GetPoints();
			
 
				-
			
 
				-	// get the covariance matrix
			
 
				-	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
			
 
				-	
			
 
				-	// compute the principle component
			
 
				-	m_principle = ComputePrincipleComponent( covariance );
			
 
				-}
			
 
				-
			
 
				-bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
			
 
				-{
			
 
				-	// cache some values
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec3 const* values = m_colours->GetPoints();
			
 
				-
			
 
				-	// build the list of dot products
			
 
				-	float dps[16];
			
 
				-	u8* order = ( u8* )m_order + 16*iteration;
			
 
				-	for( int i = 0; i < count; ++i )
			
 
				-	{
			
 
				-		dps[i] = Dot( values[i], axis );
			
 
				-		order[i] = ( u8 )i;
			
 
				-	}
			
 
				-		
			
 
				-	// stable sort using them
			
 
				-	for( int i = 0; i < count; ++i )
			
 
				-	{
			
 
				-		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
			
 
				-		{
			
 
				-			std::swap( dps[j], dps[j - 1] );
			
 
				-			std::swap( order[j], order[j - 1] );
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				-	// check this ordering is unique
			
 
				-	for( int it = 0; it < iteration; ++it )
			
 
				-	{
			
 
				-		u8 const* prev = ( u8* )m_order + 16*it;
			
 
				-		bool same = true;
			
 
				-		for( int i = 0; i < count; ++i )
			
 
				-		{
			
 
				-			if( order[i] != prev[i] )
			
 
				-			{
			
 
				-				same = false;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-		if( same )
			
 
				-			return false;
			
 
				-	}
			
 
				-	
			
 
				-	// copy the ordering and weight all the points
			
 
				-	Vec3 const* unweighted = m_colours->GetPoints();
			
 
				-	float const* weights = m_colours->GetWeights();
			
 
				-	m_xsum_wsum = VEC4_CONST( 0.0f );
			
 
				-	for( int i = 0; i < count; ++i )
			
 
				-	{
			
 
				-		int j = order[i];
			
 
				-		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
			
 
				-		Vec4 w( weights[j] );
			
 
				-		Vec4 x = p*w;
			
 
				-		m_points_weights[i] = x;
			
 
				-		m_xsum_wsum += x;
			
 
				-	}
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-void ClusterFit::Compress3( void* block )
			
 
				-{
			
 
				-	// declare variables
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec4 const two = VEC4_CONST( 2.0 );
			
 
				-	Vec4 const one = VEC4_CONST( 1.0f );
			
 
				-	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
			
 
				-	Vec4 const zero = VEC4_CONST( 0.0f );
			
 
				-	Vec4 const half = VEC4_CONST( 0.5f );
			
 
				-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
			
 
				-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
			
 
				-
			
 
				-	// prepare an ordering using the principle axis
			
 
				-	ConstructOrdering( m_principle, 0 );
			
 
				-	
			
 
				-	// check all possible clusters and iterate on the total order
			
 
				-	Vec4 beststart = VEC4_CONST( 0.0f );
			
 
				-	Vec4 bestend = VEC4_CONST( 0.0f );
			
 
				-	Vec4 besterror = m_besterror;
			
 
				-	u8 bestindices[16];
			
 
				-	int bestiteration = 0;
			
 
				-	int besti = 0, bestj = 0;
			
 
				-	
			
 
				-	// loop over iterations (we avoid the case that all points in first or last cluster)
			
 
				-	for( int iterationIndex = 0;; )
			
 
				-	{
			
 
				-		// first cluster [0,i) is at the start
			
 
				-		Vec4 part0 = VEC4_CONST( 0.0f );
			
 
				-		for( int i = 0; i < count; ++i )
			
 
				-		{
			
 
				-			// second cluster [i,j) is half along
			
 
				-			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
			
 
				-			int jmin = ( i == 0 ) ? 1 : i;
			
 
				-			for( int j = jmin;; )
			
 
				-			{
			
 
				-				// last cluster [j,count) is at the end
			
 
				-				Vec4 part2 = m_xsum_wsum - part1 - part0;
			
 
				-				
			
 
				-				// compute least squares terms directly
			
 
				-				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
			
 
				-				Vec4 alpha2_sum = alphax_sum.SplatW();
			
 
				-
			
 
				-				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
			
 
				-				Vec4 beta2_sum = betax_sum.SplatW();
			
 
				-
			
 
				-				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
			
 
				-
			
 
				-				// compute the least-squares optimal points
			
 
				-				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
			
 
				-				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
			
 
				-				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
			
 
				-
			
 
				-				// clamp to the grid
			
 
				-				a = Min( one, Max( zero, a ) );
			
 
				-				b = Min( one, Max( zero, b ) );
			
 
				-				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
			
 
				-				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
			
 
				-				
			
 
				-				// compute the error (we skip the constant xxsum)
			
 
				-				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
			
 
				-				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
			
 
				-				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
			
 
				-				Vec4 e4 = MultiplyAdd( two, e3, e1 );
			
 
				-
			
 
				-				// apply the metric to the error term
			
 
				-				Vec4 e5 = e4*m_metric;
			
 
				-				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
			
 
				-				
			
 
				-				// keep the solution if it wins
			
 
				-				if( CompareAnyLessThan( error, besterror ) )
			
 
				-				{
			
 
				-					beststart = a;
			
 
				-					bestend = b;
			
 
				-					besti = i;
			
 
				-					bestj = j;
			
 
				-					besterror = error;
			
 
				-					bestiteration = iterationIndex;
			
 
				-				}
			
 
				-
			
 
				-				// advance
			
 
				-				if( j == count )
			
 
				-					break;
			
 
				-				part1 += m_points_weights[j];
			
 
				-				++j;
			
 
				-			}
			
 
				-
			
 
				-			// advance
			
 
				-			part0 += m_points_weights[i];
			
 
				-		}
			
 
				-		
			
 
				-		// stop if we didn't improve in this iteration
			
 
				-		if( bestiteration != iterationIndex )
			
 
				-			break;
			
 
				-			
			
 
				-		// advance if possible
			
 
				-		++iterationIndex;
			
 
				-		if( iterationIndex == m_iterationCount )
			
 
				-			break;
			
 
				-			
			
 
				-		// stop if a new iteration is an ordering that has already been tried
			
 
				-		Vec3 axis = ( bestend - beststart ).GetVec3();
			
 
				-		if( !ConstructOrdering( axis, iterationIndex ) )
			
 
				-			break;
			
 
				-	}
			
 
				-		
			
 
				-	// save the block if necessary
			
 
				-	if( CompareAnyLessThan( besterror, m_besterror ) )
			
 
				-	{
			
 
				-		// remap the indices
			
 
				-		u8 const* order = ( u8* )m_order + 16*bestiteration;
			
 
				-
			
 
				-		u8 unordered[16];
			
 
				-		for( int m = 0; m < besti; ++m )
			
 
				-			unordered[order[m]] = 0;
			
 
				-		for( int m = besti; m < bestj; ++m )
			
 
				-			unordered[order[m]] = 2;
			
 
				-		for( int m = bestj; m < count; ++m )
			
 
				-			unordered[order[m]] = 1;
			
 
				-
			
 
				-		m_colours->RemapIndices( unordered, bestindices );
			
 
				-		
			
 
				-		// save the block
			
 
				-		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
			
 
				-
			
 
				-		// save the error
			
 
				-		m_besterror = besterror;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void ClusterFit::Compress4( void* block )
			
 
				-{
			
 
				-	// declare variables
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec4 const two = VEC4_CONST( 2.0f );
			
 
				-	Vec4 const one = VEC4_CONST( 1.0f );
			
 
				-	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
			
 
				-	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
			
 
				-	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
			
 
				-	Vec4 const zero = VEC4_CONST( 0.0f );
			
 
				-	Vec4 const half = VEC4_CONST( 0.5f );
			
 
				-	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
			
 
				-	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
			
 
				-
			
 
				-	// prepare an ordering using the principle axis
			
 
				-	ConstructOrdering( m_principle, 0 );
			
 
				-	
			
 
				-	// check all possible clusters and iterate on the total order
			
 
				-	Vec4 beststart = VEC4_CONST( 0.0f );
			
 
				-	Vec4 bestend = VEC4_CONST( 0.0f );
			
 
				-	Vec4 besterror = m_besterror;
			
 
				-	u8 bestindices[16];
			
 
				-	int bestiteration = 0;
			
 
				-	int besti = 0, bestj = 0, bestk = 0;
			
 
				-	
			
 
				-	// loop over iterations (we avoid the case that all points in first or last cluster)
			
 
				-	for( int iterationIndex = 0;; )
			
 
				-	{
			
 
				-		// first cluster [0,i) is at the start
			
 
				-		Vec4 part0 = VEC4_CONST( 0.0f );
			
 
				-		for( int i = 0; i < count; ++i )
			
 
				-		{
			
 
				-			// second cluster [i,j) is one third along
			
 
				-			Vec4 part1 = VEC4_CONST( 0.0f );
			
 
				-			for( int j = i;; )
			
 
				-			{
			
 
				-				// third cluster [j,k) is two thirds along
			
 
				-				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
			
 
				-				int kmin = ( j == 0 ) ? 1 : j;
			
 
				-				for( int k = kmin;; )
			
 
				-				{
			
 
				-					// last cluster [k,count) is at the end
			
 
				-					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
			
 
				-
			
 
				-					// compute least squares terms directly
			
 
				-					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
			
 
				-					Vec4 const alpha2_sum = alphax_sum.SplatW();
			
 
				-					
			
 
				-					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
			
 
				-					Vec4 const beta2_sum = betax_sum.SplatW();
			
 
				-					
			
 
				-					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
			
 
				-
			
 
				-					// compute the least-squares optimal points
			
 
				-					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
			
 
				-					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
			
 
				-					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
			
 
				-
			
 
				-					// clamp to the grid
			
 
				-					a = Min( one, Max( zero, a ) );
			
 
				-					b = Min( one, Max( zero, b ) );
			
 
				-					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
			
 
				-					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
			
 
				-					
			
 
				-					// compute the error (we skip the constant xxsum)
			
 
				-					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
			
 
				-					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
			
 
				-					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
			
 
				-					Vec4 e4 = MultiplyAdd( two, e3, e1 );
			
 
				-
			
 
				-					// apply the metric to the error term
			
 
				-					Vec4 e5 = e4*m_metric;
			
 
				-					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
			
 
				-
			
 
				-					// keep the solution if it wins
			
 
				-					if( CompareAnyLessThan( error, besterror ) )
			
 
				-					{
			
 
				-						beststart = a;
			
 
				-						bestend = b;
			
 
				-						besterror = error;
			
 
				-						besti = i;
			
 
				-						bestj = j;
			
 
				-						bestk = k;
			
 
				-						bestiteration = iterationIndex;
			
 
				-					}
			
 
				-
			
 
				-					// advance
			
 
				-					if( k == count )
			
 
				-						break;
			
 
				-					part2 += m_points_weights[k];
			
 
				-					++k;
			
 
				-				}
			
 
				-
			
 
				-				// advance
			
 
				-				if( j == count )
			
 
				-					break;
			
 
				-				part1 += m_points_weights[j];
			
 
				-				++j;
			
 
				-			}
			
 
				-
			
 
				-			// advance
			
 
				-			part0 += m_points_weights[i];
			
 
				-		}
			
 
				-		
			
 
				-		// stop if we didn't improve in this iteration
			
 
				-		if( bestiteration != iterationIndex )
			
 
				-			break;
			
 
				-			
			
 
				-		// advance if possible
			
 
				-		++iterationIndex;
			
 
				-		if( iterationIndex == m_iterationCount )
			
 
				-			break;
			
 
				-			
			
 
				-		// stop if a new iteration is an ordering that has already been tried
			
 
				-		Vec3 axis = ( bestend - beststart ).GetVec3();
			
 
				-		if( !ConstructOrdering( axis, iterationIndex ) )
			
 
				-			break;
			
 
				-	}
			
 
				-
			
 
				-	// save the block if necessary
			
 
				-	if( CompareAnyLessThan( besterror, m_besterror ) )
			
 
				-	{
			
 
				-		// remap the indices
			
 
				-		u8 const* order = ( u8* )m_order + 16*bestiteration;
			
 
				-
			
 
				-		u8 unordered[16];
			
 
				-		for( int m = 0; m < besti; ++m )
			
 
				-			unordered[order[m]] = 0;
			
 
				-		for( int m = besti; m < bestj; ++m )
			
 
				-			unordered[order[m]] = 2;
			
 
				-		for( int m = bestj; m < bestk; ++m )
			
 
				-			unordered[order[m]] = 3;
			
 
				-		for( int m = bestk; m < count; ++m )
			
 
				-			unordered[order[m]] = 1;
			
 
				-
			
 
				-		m_colours->RemapIndices( unordered, bestindices );
			
 
				-		
			
 
				-		// save the block
			
 
				-		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
			
 
				-
			
 
				-		// save the error
			
 
				-		m_besterror = besterror;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/clusterfit.h
+++ b/3rdparty/libsquish/clusterfit.h
@@ -1,61 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-	Copyright (c) 2007 Ignacio Castano                   [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_CLUSTERFIT_H
			
 
				-#define SQUISH_CLUSTERFIT_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-#include "maths.h"
			
 
				-#include "simd.h"
			
 
				-#include "colourfit.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-class ClusterFit : public ColourFit
			
 
				-{
			
 
				-public:
			
 
				-	ClusterFit( ColourSet const* colours, int flags, float* metric );
			
 
				-	
			
 
				-private:
			
 
				-	bool ConstructOrdering( Vec3 const& axis, int iteration );
			
 
				-
			
 
				-	virtual void Compress3( void* block );
			
 
				-	virtual void Compress4( void* block );
			
 
				-
			
 
				-	enum { kMaxIterations = 8 };
			
 
				-
			
 
				-	int m_iterationCount;
			
 
				-	Vec3 m_principle;
			
 
				-	u8 m_order[16*kMaxIterations];
			
 
				-	Vec4 m_points_weights[16];
			
 
				-	Vec4 m_xsum_wsum;
			
 
				-	Vec4 m_metric;
			
 
				-	Vec4 m_besterror;
			
 
				-};
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_CLUSTERFIT_H
			
--- a/3rdparty/libsquish/colourblock.cpp
+++ b/3rdparty/libsquish/colourblock.cpp
@@ -1,214 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "colourblock.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-static int FloatToInt( float a, int limit )
			
 
				-{
			
 
				-	// use ANSI round-to-zero behaviour to get round-to-nearest
			
 
				-	int i = ( int )( a + 0.5f );
			
 
				-
			
 
				-	// clamp to the limit
			
 
				-	if( i < 0 )
			
 
				-		i = 0;
			
 
				-	else if( i > limit )
			
 
				-		i = limit; 
			
 
				-
			
 
				-	// done
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-static int FloatTo565( Vec3::Arg colour )
			
 
				-{
			
 
				-	// get the components in the correct range
			
 
				-	int r = FloatToInt( 31.0f*colour.X(), 31 );
			
 
				-	int g = FloatToInt( 63.0f*colour.Y(), 63 );
			
 
				-	int b = FloatToInt( 31.0f*colour.Z(), 31 );
			
 
				-	
			
 
				-	// pack into a single value
			
 
				-	return ( r << 11 ) | ( g << 5 ) | b;
			
 
				-}
			
 
				-
			
 
				-static void WriteColourBlock( int a, int b, u8* indices, void* block )
			
 
				-{
			
 
				-	// get the block as bytes
			
 
				-	u8* bytes = ( u8* )block;
			
 
				-
			
 
				-	// write the endpoints
			
 
				-	bytes[0] = ( u8 )( a & 0xff );
			
 
				-	bytes[1] = ( u8 )( a >> 8 );
			
 
				-	bytes[2] = ( u8 )( b & 0xff );
			
 
				-	bytes[3] = ( u8 )( b >> 8 );
			
 
				-	
			
 
				-	// write the indices
			
 
				-	for( int i = 0; i < 4; ++i )
			
 
				-	{
			
 
				-		u8 const* ind = indices + 4*i;
			
 
				-		bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
			
 
				-{
			
 
				-	// get the packed values
			
 
				-	int a = FloatTo565( start );
			
 
				-	int b = FloatTo565( end );
			
 
				-
			
 
				-	// remap the indices
			
 
				-	u8 remapped[16];
			
 
				-	if( a <= b )
			
 
				-	{
			
 
				-		// use the indices directly
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-			remapped[i] = indices[i];
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// swap a and b
			
 
				-		std::swap( a, b );
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-		{
			
 
				-			if( indices[i] == 0 )
			
 
				-				remapped[i] = 1;
			
 
				-			else if( indices[i] == 1 )
			
 
				-				remapped[i] = 0;
			
 
				-			else
			
 
				-				remapped[i] = indices[i];
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				-	// write the block
			
 
				-	WriteColourBlock( a, b, remapped, block );
			
 
				-}
			
 
				-
			
 
				-void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
			
 
				-{
			
 
				-	// get the packed values
			
 
				-	int a = FloatTo565( start );
			
 
				-	int b = FloatTo565( end );
			
 
				-
			
 
				-	// remap the indices
			
 
				-	u8 remapped[16];
			
 
				-	if( a < b )
			
 
				-	{
			
 
				-		// swap a and b
			
 
				-		std::swap( a, b );
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-			remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
			
 
				-	}
			
 
				-	else if( a == b )
			
 
				-	{
			
 
				-		// use index 0
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-			remapped[i] = 0;
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// use the indices directly
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-			remapped[i] = indices[i];
			
 
				-	}
			
 
				-	
			
 
				-	// write the block
			
 
				-	WriteColourBlock( a, b, remapped, block );
			
 
				-}
			
 
				-
			
 
				-static int Unpack565( u8 const* packed, u8* colour )
			
 
				-{
			
 
				-	// build the packed value
			
 
				-	int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
			
 
				-	
			
 
				-	// get the components in the stored range
			
 
				-	u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
			
 
				-	u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
			
 
				-	u8 blue = ( u8 )( value & 0x1f );
			
 
				-
			
 
				-	// scale up to 8 bits
			
 
				-	colour[0] = ( red << 3 ) | ( red >> 2 );
			
 
				-	colour[1] = ( green << 2 ) | ( green >> 4 );
			
 
				-	colour[2] = ( blue << 3 ) | ( blue >> 2 );
			
 
				-	colour[3] = 255;
			
 
				-	
			
 
				-	// return the value
			
 
				-	return value;
			
 
				-}
			
 
				-
			
 
				-void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
			
 
				-{
			
 
				-	// get the block bytes
			
 
				-	u8 const* bytes = reinterpret_cast< u8 const* >( block );
			
 
				-	
			
 
				-	// unpack the endpoints
			
 
				-	u8 codes[16];
			
 
				-	int a = Unpack565( bytes, codes );
			
 
				-	int b = Unpack565( bytes + 2, codes + 4 );
			
 
				-	
			
 
				-	// generate the midpoints
			
 
				-	for( int i = 0; i < 3; ++i )
			
 
				-	{
			
 
				-		int c = codes[i];
			
 
				-		int d = codes[4 + i];
			
 
				-
			
 
				-		if( isDxt1 && a <= b )
			
 
				-		{
			
 
				-			codes[8 + i] = ( u8 )( ( c + d )/2 );
			
 
				-			codes[12 + i] = 0;
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
			
 
				-			codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
			
 
				-		}
			
 
				-	}
			
 
				-	
			
 
				-	// fill in alpha for the intermediate values
			
 
				-	codes[8 + 3] = 255;
			
 
				-	codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
			
 
				-	
			
 
				-	// unpack the indices
			
 
				-	u8 indices[16];
			
 
				-	for( int i = 0; i < 4; ++i )
			
 
				-	{
			
 
				-		u8* ind = indices + 4*i;
			
 
				-		u8 packed = bytes[4 + i];
			
 
				-		
			
 
				-		ind[0] = packed & 0x3;
			
 
				-		ind[1] = ( packed >> 2 ) & 0x3;
			
 
				-		ind[2] = ( packed >> 4 ) & 0x3;
			
 
				-		ind[3] = ( packed >> 6 ) & 0x3;
			
 
				-	}
			
 
				-
			
 
				-	// store out the colours
			
 
				-	for( int i = 0; i < 16; ++i )
			
 
				-	{
			
 
				-		u8 offset = 4*indices[i];
			
 
				-		for( int j = 0; j < 4; ++j )
			
 
				-			rgba[4*i + j] = codes[offset + j];
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/colourblock.h
+++ b/3rdparty/libsquish/colourblock.h
@@ -1,41 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_COLOURBLOCK_H
			
 
				-#define SQUISH_COLOURBLOCK_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-#include "maths.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
			
 
				-void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
			
 
				-
			
 
				-void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_COLOURBLOCK_H
			
--- a/3rdparty/libsquish/colourfit.cpp
+++ b/3rdparty/libsquish/colourfit.cpp
@@ -1,54 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "colourfit.h"
			
 
				-#include "colourset.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-ColourFit::ColourFit( ColourSet const* colours, int flags ) 
			
 
				-  : m_colours( colours ), 
			
 
				-	m_flags( flags )
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-ColourFit::~ColourFit()
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-void ColourFit::Compress( void* block )
			
 
				-{
			
 
				-	bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
			
 
				-	if( isDxt1 )
			
 
				-	{
			
 
				-		Compress3( block );
			
 
				-		if( !m_colours->IsTransparent() )
			
 
				-			Compress4( block );
			
 
				-	}
			
 
				-	else
			
 
				-		Compress4( block );
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/colourfit.h
+++ b/3rdparty/libsquish/colourfit.h
@@ -1,56 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_COLOURFIT_H
			
 
				-#define SQUISH_COLOURFIT_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-#include "maths.h"
			
 
				-
			
 
				-#include <climits>
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-class ColourSet;
			
 
				-
			
 
				-class ColourFit
			
 
				-{
			
 
				-public:
			
 
				-	ColourFit( ColourSet const* colours, int flags );
			
 
				-	virtual ~ColourFit();
			
 
				-
			
 
				-	void Compress( void* block );
			
 
				-
			
 
				-protected:
			
 
				-	virtual void Compress3( void* block ) = 0;
			
 
				-	virtual void Compress4( void* block ) = 0;
			
 
				-
			
 
				-	ColourSet const* m_colours;
			
 
				-	int m_flags;
			
 
				-};
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_COLOURFIT_H
			
--- a/3rdparty/libsquish/colourset.cpp
+++ b/3rdparty/libsquish/colourset.cpp
@@ -1,121 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "colourset.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
			
 
				-  : m_count( 0 ), 
			
 
				-	m_transparent( false )
			
 
				-{
			
 
				-	// check the compression mode for dxt1
			
 
				-	bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
			
 
				-	bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
			
 
				-
			
 
				-	// create the minimal set
			
 
				-	for( int i = 0; i < 16; ++i )
			
 
				-	{
			
 
				-		// check this pixel is enabled
			
 
				-		int bit = 1 << i;
			
 
				-		if( ( mask & bit ) == 0 )
			
 
				-		{
			
 
				-			m_remap[i] = -1;
			
 
				-			continue;
			
 
				-		}
			
 
				-	
			
 
				-		// check for transparent pixels when using dxt1
			
 
				-		if( isDxt1 && rgba[4*i + 3] < 128 )
			
 
				-		{
			
 
				-			m_remap[i] = -1;
			
 
				-			m_transparent = true;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		// loop over previous points for a match
			
 
				-		for( int j = 0;; ++j )
			
 
				-		{
			
 
				-			// allocate a new point
			
 
				-			if( j == i )
			
 
				-			{
			
 
				-				// normalise coordinates to [0,1]
			
 
				-				float x = ( float )rgba[4*i] / 255.0f;
			
 
				-				float y = ( float )rgba[4*i + 1] / 255.0f;
			
 
				-				float z = ( float )rgba[4*i + 2] / 255.0f;
			
 
				-				
			
 
				-				// ensure there is always non-zero weight even for zero alpha
			
 
				-				float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
			
 
				-
			
 
				-				// add the point
			
 
				-				m_points[m_count] = Vec3( x, y, z );
			
 
				-				m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
			
 
				-				m_remap[i] = m_count;
			
 
				-				
			
 
				-				// advance
			
 
				-				++m_count;
			
 
				-				break;
			
 
				-			}
			
 
				-		
			
 
				-			// check for a match
			
 
				-			int oldbit = 1 << j;
			
 
				-			bool match = ( ( mask & oldbit ) != 0 )
			
 
				-				&& ( rgba[4*i] == rgba[4*j] )
			
 
				-				&& ( rgba[4*i + 1] == rgba[4*j + 1] )
			
 
				-				&& ( rgba[4*i + 2] == rgba[4*j + 2] )
			
 
				-				&& ( rgba[4*j + 3] >= 128 || !isDxt1 );
			
 
				-			if( match )
			
 
				-			{
			
 
				-				// get the index of the match
			
 
				-				int index = m_remap[j];
			
 
				-				
			
 
				-				// ensure there is always non-zero weight even for zero alpha
			
 
				-				float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
			
 
				-
			
 
				-				// map to this point and increase the weight
			
 
				-				m_weights[index] += ( weightByAlpha ? w : 1.0f );
			
 
				-				m_remap[i] = index;
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// square root the weights
			
 
				-	for( int i = 0; i < m_count; ++i )
			
 
				-		m_weights[i] = std::sqrt( m_weights[i] );
			
 
				-}
			
 
				-
			
 
				-void ColourSet::RemapIndices( u8 const* source, u8* target ) const
			
 
				-{
			
 
				-	for( int i = 0; i < 16; ++i )
			
 
				-	{
			
 
				-		int j = m_remap[i];
			
 
				-		if( j == -1 )
			
 
				-			target[i] = 3;
			
 
				-		else
			
 
				-			target[i] = source[j];
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/colourset.h
+++ b/3rdparty/libsquish/colourset.h
@@ -1,58 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_COLOURSET_H
			
 
				-#define SQUISH_COLOURSET_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-#include "maths.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-/*! @brief Represents a set of block colours
			
 
				-*/
			
 
				-class ColourSet
			
 
				-{
			
 
				-public:
			
 
				-	ColourSet( u8 const* rgba, int mask, int flags );
			
 
				-
			
 
				-	int GetCount() const { return m_count; }
			
 
				-	Vec3 const* GetPoints() const { return m_points; }
			
 
				-	float const* GetWeights() const { return m_weights; }
			
 
				-	bool IsTransparent() const { return m_transparent; }
			
 
				-
			
 
				-	void RemapIndices( u8 const* source, u8* target ) const;
			
 
				-
			
 
				-private:
			
 
				-	int m_count;
			
 
				-	Vec3 m_points[16];
			
 
				-	float m_weights[16];
			
 
				-	int m_remap[16];
			
 
				-	bool m_transparent;
			
 
				-};
			
 
				-
			
 
				-} // namespace sqish
			
 
				-
			
 
				-#endif // ndef SQUISH_COLOURSET_H
			
--- a/3rdparty/libsquish/config.h
+++ b/3rdparty/libsquish/config.h
@@ -1,49 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_CONFIG_H
			
 
				-#define SQUISH_CONFIG_H
			
 
				-
			
 
				-// Set to 1 when building squish to use Altivec instructions.
			
 
				-#ifndef SQUISH_USE_ALTIVEC
			
 
				-#define SQUISH_USE_ALTIVEC 0
			
 
				-#endif
			
 
				-
			
 
				-// Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
			
 
				-#ifndef SQUISH_USE_SSE
			
 
				-#define SQUISH_USE_SSE 0
			
 
				-#endif
			
 
				-
			
 
				-// Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
			
 
				-#if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
			
 
				-#error "Cannot enable both Altivec and SSE!"
			
 
				-#endif
			
 
				-#if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
			
 
				-#define SQUISH_USE_SIMD 1
			
 
				-#else
			
 
				-#define SQUISH_USE_SIMD 0
			
 
				-#endif
			
 
				-
			
 
				-#endif // ndef SQUISH_CONFIG_H
			
--- a/3rdparty/libsquish/maths.cpp
+++ b/3rdparty/libsquish/maths.cpp
@@ -1,259 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-/*! @file
			
 
				-
			
 
				-	The symmetric eigensystem solver algorithm is from 
			
 
				-	http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
			
 
				-*/
			
 
				-
			
 
				-#include "maths.h"
			
 
				-#include "simd.h"
			
 
				-#include <cfloat>
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
			
 
				-{
			
 
				-	// compute the centroid
			
 
				-	float total = 0.0f;
			
 
				-	Vec3 centroid( 0.0f );
			
 
				-	for( int i = 0; i < n; ++i )
			
 
				-	{
			
 
				-		total += weights[i];
			
 
				-		centroid += weights[i]*points[i];
			
 
				-	}
			
 
				-	if( total > FLT_EPSILON )
			
 
				-		centroid /= total;
			
 
				-
			
 
				-	// accumulate the covariance matrix
			
 
				-	Sym3x3 covariance( 0.0f );
			
 
				-	for( int i = 0; i < n; ++i )
			
 
				-	{
			
 
				-		Vec3 a = points[i] - centroid;
			
 
				-		Vec3 b = weights[i]*a;
			
 
				-		
			
 
				-		covariance[0] += a.X()*b.X();
			
 
				-		covariance[1] += a.X()*b.Y();
			
 
				-		covariance[2] += a.X()*b.Z();
			
 
				-		covariance[3] += a.Y()*b.Y();
			
 
				-		covariance[4] += a.Y()*b.Z();
			
 
				-		covariance[5] += a.Z()*b.Z();
			
 
				-	}
			
 
				-	
			
 
				-	// return it
			
 
				-	return covariance;
			
 
				-}
			
 
				-
			
 
				-#if 0
			
 
				-
			
 
				-static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
			
 
				-{
			
 
				-	// compute M
			
 
				-	Sym3x3 m;
			
 
				-	m[0] = matrix[0] - evalue;
			
 
				-	m[1] = matrix[1];
			
 
				-	m[2] = matrix[2];
			
 
				-	m[3] = matrix[3] - evalue;
			
 
				-	m[4] = matrix[4];
			
 
				-	m[5] = matrix[5] - evalue;
			
 
				-
			
 
				-	// compute U
			
 
				-	Sym3x3 u;
			
 
				-	u[0] = m[3]*m[5] - m[4]*m[4];
			
 
				-	u[1] = m[2]*m[4] - m[1]*m[5];
			
 
				-	u[2] = m[1]*m[4] - m[2]*m[3];
			
 
				-	u[3] = m[0]*m[5] - m[2]*m[2];
			
 
				-	u[4] = m[1]*m[2] - m[4]*m[0];
			
 
				-	u[5] = m[0]*m[3] - m[1]*m[1];
			
 
				-
			
 
				-	// find the largest component
			
 
				-	float mc = std::fabs( u[0] );
			
 
				-	int mi = 0;
			
 
				-	for( int i = 1; i < 6; ++i )
			
 
				-	{
			
 
				-		float c = std::fabs( u[i] );
			
 
				-		if( c > mc )
			
 
				-		{
			
 
				-			mc = c;
			
 
				-			mi = i;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// pick the column with this component
			
 
				-	switch( mi )
			
 
				-	{
			
 
				-	case 0:
			
 
				-		return Vec3( u[0], u[1], u[2] );
			
 
				-
			
 
				-	case 1:
			
 
				-	case 3:
			
 
				-		return Vec3( u[1], u[3], u[4] );
			
 
				-
			
 
				-	default:
			
 
				-		return Vec3( u[2], u[4], u[5] );
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
			
 
				-{
			
 
				-	// compute M
			
 
				-	Sym3x3 m;
			
 
				-	m[0] = matrix[0] - evalue;
			
 
				-	m[1] = matrix[1];
			
 
				-	m[2] = matrix[2];
			
 
				-	m[3] = matrix[3] - evalue;
			
 
				-	m[4] = matrix[4];
			
 
				-	m[5] = matrix[5] - evalue;
			
 
				-
			
 
				-	// find the largest component
			
 
				-	float mc = std::fabs( m[0] );
			
 
				-	int mi = 0;
			
 
				-	for( int i = 1; i < 6; ++i )
			
 
				-	{
			
 
				-		float c = std::fabs( m[i] );
			
 
				-		if( c > mc )
			
 
				-		{
			
 
				-			mc = c;
			
 
				-			mi = i;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// pick the first eigenvector based on this index
			
 
				-	switch( mi )
			
 
				-	{
			
 
				-	case 0:
			
 
				-	case 1:
			
 
				-		return Vec3( -m[1], m[0], 0.0f );
			
 
				-
			
 
				-	case 2:
			
 
				-		return Vec3( m[2], 0.0f, -m[0] );
			
 
				-
			
 
				-	case 3:
			
 
				-	case 4:
			
 
				-		return Vec3( 0.0f, -m[4], m[3] );
			
 
				-
			
 
				-	default:
			
 
				-		return Vec3( 0.0f, -m[5], m[4] );
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
			
 
				-{
			
 
				-	// compute the cubic coefficients
			
 
				-	float c0 = matrix[0]*matrix[3]*matrix[5] 
			
 
				-		+ 2.0f*matrix[1]*matrix[2]*matrix[4] 
			
 
				-		- matrix[0]*matrix[4]*matrix[4] 
			
 
				-		- matrix[3]*matrix[2]*matrix[2] 
			
 
				-		- matrix[5]*matrix[1]*matrix[1];
			
 
				-	float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
			
 
				-		- matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
			
 
				-	float c2 = matrix[0] + matrix[3] + matrix[5];
			
 
				-
			
 
				-	// compute the quadratic coefficients
			
 
				-	float a = c1 - ( 1.0f/3.0f )*c2*c2;
			
 
				-	float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
			
 
				-
			
 
				-	// compute the root count check
			
 
				-	float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
			
 
				-
			
 
				-	// test the multiplicity
			
 
				-	if( FLT_EPSILON < Q )
			
 
				-	{
			
 
				-		// only one root, which implies we have a multiple of the identity
			
 
				-        return Vec3( 1.0f );
			
 
				-	}
			
 
				-	else if( Q < -FLT_EPSILON )
			
 
				-	{
			
 
				-		// three distinct roots
			
 
				-		float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
			
 
				-		float rho = std::sqrt( 0.25f*b*b - Q );
			
 
				-
			
 
				-		float rt = std::pow( rho, 1.0f/3.0f );
			
 
				-		float ct = std::cos( theta/3.0f );
			
 
				-		float st = std::sin( theta/3.0f );
			
 
				-
			
 
				-		float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
			
 
				-		float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
			
 
				-		float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
			
 
				-
			
 
				-		// pick the larger
			
 
				-		if( std::fabs( l2 ) > std::fabs( l1 ) )
			
 
				-			l1 = l2;
			
 
				-		if( std::fabs( l3 ) > std::fabs( l1 ) )
			
 
				-			l1 = l3;
			
 
				-
			
 
				-		// get the eigenvector
			
 
				-		return GetMultiplicity1Evector( matrix, l1 );
			
 
				-	}
			
 
				-	else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
			
 
				-	{
			
 
				-		// two roots
			
 
				-		float rt;
			
 
				-		if( b < 0.0f )
			
 
				-			rt = -std::pow( -0.5f*b, 1.0f/3.0f );
			
 
				-		else
			
 
				-			rt = std::pow( 0.5f*b, 1.0f/3.0f );
			
 
				-		
			
 
				-		float l1 = ( 1.0f/3.0f )*c2 + rt;		// repeated
			
 
				-		float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
			
 
				-		
			
 
				-		// get the eigenvector
			
 
				-		if( std::fabs( l1 ) > std::fabs( l2 ) )
			
 
				-			return GetMultiplicity2Evector( matrix, l1 );
			
 
				-		else
			
 
				-			return GetMultiplicity1Evector( matrix, l2 );
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#else
			
 
				-
			
 
				-#define POWER_ITERATION_COUNT 	8
			
 
				-
			
 
				-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
			
 
				-{
			
 
				-	Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
			
 
				-	Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
			
 
				-	Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
			
 
				-	Vec4 v = VEC4_CONST( 1.0f );
			
 
				-	for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
			
 
				-	{
			
 
				-		// matrix multiply
			
 
				-		Vec4 w = row0*v.SplatX();
			
 
				-		w = MultiplyAdd(row1, v.SplatY(), w);
			
 
				-		w = MultiplyAdd(row2, v.SplatZ(), w);
			
 
				-
			
 
				-		// get max component from xyz in all channels
			
 
				-		Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
			
 
				-
			
 
				-		// divide through and advance
			
 
				-		v = w*Reciprocal(a);
			
 
				-	}
			
 
				-	return v.GetVec3();
			
 
				-}
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/maths.h
+++ b/3rdparty/libsquish/maths.h
@@ -1,233 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_MATHS_H
			
 
				-#define SQUISH_MATHS_H
			
 
				-
			
 
				-#include <cmath>
			
 
				-#include <algorithm>
			
 
				-#include "config.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-class Vec3
			
 
				-{
			
 
				-public:
			
 
				-	typedef Vec3 const& Arg;
			
 
				-
			
 
				-	Vec3()
			
 
				-	{
			
 
				-	}
			
 
				-
			
 
				-	explicit Vec3( float s )
			
 
				-	{
			
 
				-		m_x = s;
			
 
				-		m_y = s;
			
 
				-		m_z = s;
			
 
				-	}
			
 
				-
			
 
				-	Vec3( float x, float y, float z )
			
 
				-	{
			
 
				-		m_x = x;
			
 
				-		m_y = y;
			
 
				-		m_z = z;
			
 
				-	}
			
 
				-	
			
 
				-	float X() const { return m_x; }
			
 
				-	float Y() const { return m_y; }
			
 
				-	float Z() const { return m_z; }
			
 
				-	
			
 
				-	Vec3 operator-() const
			
 
				-	{
			
 
				-		return Vec3( -m_x, -m_y, -m_z );
			
 
				-	}
			
 
				-	
			
 
				-	Vec3& operator+=( Arg v )
			
 
				-	{
			
 
				-		m_x += v.m_x;
			
 
				-		m_y += v.m_y;
			
 
				-		m_z += v.m_z;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec3& operator-=( Arg v )
			
 
				-	{
			
 
				-		m_x -= v.m_x;
			
 
				-		m_y -= v.m_y;
			
 
				-		m_z -= v.m_z;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec3& operator*=( Arg v )
			
 
				-	{
			
 
				-		m_x *= v.m_x;
			
 
				-		m_y *= v.m_y;
			
 
				-		m_z *= v.m_z;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec3& operator*=( float s )
			
 
				-	{
			
 
				-		m_x *= s;
			
 
				-		m_y *= s;
			
 
				-		m_z *= s;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec3& operator/=( Arg v )
			
 
				-	{
			
 
				-		m_x /= v.m_x;
			
 
				-		m_y /= v.m_y;
			
 
				-		m_z /= v.m_z;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec3& operator/=( float s )
			
 
				-	{
			
 
				-		float t = 1.0f/s;
			
 
				-		m_x *= t;
			
 
				-		m_y *= t;
			
 
				-		m_z *= t;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator+( Arg left, Arg right )
			
 
				-	{
			
 
				-		Vec3 copy( left );
			
 
				-		return copy += right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator-( Arg left, Arg right )
			
 
				-	{
			
 
				-		Vec3 copy( left );
			
 
				-		return copy -= right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator*( Arg left, Arg right )
			
 
				-	{
			
 
				-		Vec3 copy( left );
			
 
				-		return copy *= right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator*( Arg left, float right )
			
 
				-	{
			
 
				-		Vec3 copy( left );
			
 
				-		return copy *= right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator*( float left, Arg right )
			
 
				-	{
			
 
				-		Vec3 copy( right );
			
 
				-		return copy *= left;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator/( Arg left, Arg right )
			
 
				-	{
			
 
				-		Vec3 copy( left );
			
 
				-		return copy /= right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 operator/( Arg left, float right )
			
 
				-	{
			
 
				-		Vec3 copy( left );
			
 
				-		return copy /= right;
			
 
				-	}
			
 
				-	
			
 
				-	friend float Dot( Arg left, Arg right )
			
 
				-	{
			
 
				-		return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec3 Min( Arg left, Arg right )
			
 
				-	{
			
 
				-		return Vec3(
			
 
				-			std::min( left.m_x, right.m_x ), 
			
 
				-			std::min( left.m_y, right.m_y ), 
			
 
				-			std::min( left.m_z, right.m_z )
			
 
				-		);
			
 
				-	}
			
 
				-
			
 
				-	friend Vec3 Max( Arg left, Arg right )
			
 
				-	{
			
 
				-		return Vec3(
			
 
				-			std::max( left.m_x, right.m_x ), 
			
 
				-			std::max( left.m_y, right.m_y ), 
			
 
				-			std::max( left.m_z, right.m_z )
			
 
				-		);
			
 
				-	}
			
 
				-
			
 
				-	friend Vec3 Truncate( Arg v )
			
 
				-	{
			
 
				-		return Vec3(
			
 
				-			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
			
 
				-			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
			
 
				-			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
			
 
				-		);
			
 
				-	}
			
 
				-
			
 
				-private:
			
 
				-	float m_x;
			
 
				-	float m_y;
			
 
				-	float m_z;
			
 
				-};
			
 
				-
			
 
				-inline float LengthSquared( Vec3::Arg v )
			
 
				-{
			
 
				-	return Dot( v, v );
			
 
				-}
			
 
				-
			
 
				-class Sym3x3
			
 
				-{
			
 
				-public:
			
 
				-	Sym3x3()
			
 
				-	{
			
 
				-	}
			
 
				-
			
 
				-	Sym3x3( float s )
			
 
				-	{
			
 
				-		for( int i = 0; i < 6; ++i )
			
 
				-			m_x[i] = s;
			
 
				-	}
			
 
				-
			
 
				-	float operator[]( int index ) const
			
 
				-	{
			
 
				-		return m_x[index];
			
 
				-	}
			
 
				-
			
 
				-	float& operator[]( int index )
			
 
				-	{
			
 
				-		return m_x[index];
			
 
				-	}
			
 
				-
			
 
				-private:
			
 
				-	float m_x[6];
			
 
				-};
			
 
				-
			
 
				-Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
			
 
				-Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_MATHS_H
			
--- a/3rdparty/libsquish/rangefit.cpp
+++ b/3rdparty/libsquish/rangefit.cpp
@@ -1,201 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "rangefit.h"
			
 
				-#include "colourset.h"
			
 
				-#include "colourblock.h"
			
 
				-#include <cfloat>
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric ) 
			
 
				-  : ColourFit( colours, flags )
			
 
				-{
			
 
				-	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
			
 
				-	if( metric )
			
 
				-		m_metric = Vec3( metric[0], metric[1], metric[2] );
			
 
				-	else
			
 
				-		m_metric = Vec3( 1.0f );	
			
 
				-
			
 
				-	// initialise the best error
			
 
				-	m_besterror = FLT_MAX;
			
 
				-
			
 
				-	// cache some values
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec3 const* values = m_colours->GetPoints();
			
 
				-	float const* weights = m_colours->GetWeights();
			
 
				-	
			
 
				-	// get the covariance matrix
			
 
				-	Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
			
 
				-	
			
 
				-	// compute the principle component
			
 
				-	Vec3 principle = ComputePrincipleComponent( covariance );
			
 
				-
			
 
				-	// get the min and max range as the codebook endpoints
			
 
				-	Vec3 start( 0.0f );
			
 
				-	Vec3 end( 0.0f );
			
 
				-	if( count > 0 )
			
 
				-	{
			
 
				-		float min, max;
			
 
				-		
			
 
				-		// compute the range
			
 
				-		start = end = values[0];
			
 
				-		min = max = Dot( values[0], principle );
			
 
				-		for( int i = 1; i < count; ++i )
			
 
				-		{
			
 
				-			float val = Dot( values[i], principle );
			
 
				-			if( val < min )
			
 
				-			{
			
 
				-				start = values[i];
			
 
				-				min = val;
			
 
				-			}
			
 
				-			else if( val > max )
			
 
				-			{
			
 
				-				end = values[i];
			
 
				-				max = val;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-			
			
 
				-	// clamp the output to [0, 1]
			
 
				-	Vec3 const one( 1.0f );
			
 
				-	Vec3 const zero( 0.0f );
			
 
				-	start = Min( one, Max( zero, start ) );
			
 
				-	end = Min( one, Max( zero, end ) );
			
 
				-
			
 
				-	// clamp to the grid and save
			
 
				-	Vec3 const grid( 31.0f, 63.0f, 31.0f );
			
 
				-	Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
			
 
				-	Vec3 const half( 0.5f );
			
 
				-	m_start = Truncate( grid*start + half )*gridrcp;
			
 
				-	m_end = Truncate( grid*end + half )*gridrcp;
			
 
				-}
			
 
				-
			
 
				-void RangeFit::Compress3( void* block )
			
 
				-{
			
 
				-	// cache some values
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec3 const* values = m_colours->GetPoints();
			
 
				-	
			
 
				-	// create a codebook
			
 
				-	Vec3 codes[3];
			
 
				-	codes[0] = m_start;
			
 
				-	codes[1] = m_end;
			
 
				-	codes[2] = 0.5f*m_start + 0.5f*m_end;
			
 
				-
			
 
				-	// match each point to the closest code
			
 
				-	u8 closest[16];
			
 
				-	float error = 0.0f;
			
 
				-	for( int i = 0; i < count; ++i )
			
 
				-	{
			
 
				-		// find the closest code
			
 
				-		float dist = FLT_MAX;
			
 
				-		int idx = 0;
			
 
				-		for( int j = 0; j < 3; ++j )
			
 
				-		{
			
 
				-			float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
			
 
				-			if( d < dist )
			
 
				-			{
			
 
				-				dist = d;
			
 
				-				idx = j;
			
 
				-			}
			
 
				-		}
			
 
				-		
			
 
				-		// save the index
			
 
				-		closest[i] = ( u8 )idx;
			
 
				-		
			
 
				-		// accumulate the error
			
 
				-		error += dist;
			
 
				-	}
			
 
				-	
			
 
				-	// save this scheme if it wins
			
 
				-	if( error < m_besterror )
			
 
				-	{
			
 
				-		// remap the indices
			
 
				-		u8 indices[16];
			
 
				-		m_colours->RemapIndices( closest, indices );
			
 
				-		
			
 
				-		// save the block
			
 
				-		WriteColourBlock3( m_start, m_end, indices, block );
			
 
				-		
			
 
				-		// save the error
			
 
				-		m_besterror = error;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void RangeFit::Compress4( void* block )
			
 
				-{
			
 
				-	// cache some values
			
 
				-	int const count = m_colours->GetCount();
			
 
				-	Vec3 const* values = m_colours->GetPoints();
			
 
				-	
			
 
				-	// create a codebook
			
 
				-	Vec3 codes[4];
			
 
				-	codes[0] = m_start;
			
 
				-	codes[1] = m_end;
			
 
				-	codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
			
 
				-	codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
			
 
				-
			
 
				-	// match each point to the closest code
			
 
				-	u8 closest[16];
			
 
				-	float error = 0.0f;
			
 
				-	for( int i = 0; i < count; ++i )
			
 
				-	{
			
 
				-		// find the closest code
			
 
				-		float dist = FLT_MAX;
			
 
				-		int idx = 0;
			
 
				-		for( int j = 0; j < 4; ++j )
			
 
				-		{
			
 
				-			float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
			
 
				-			if( d < dist )
			
 
				-			{
			
 
				-				dist = d;
			
 
				-				idx = j;
			
 
				-			}
			
 
				-		}
			
 
				-		
			
 
				-		// save the index
			
 
				-		closest[i] = ( u8 )idx;
			
 
				-		
			
 
				-		// accumulate the error
			
 
				-		error += dist;
			
 
				-	}
			
 
				-	
			
 
				-	// save this scheme if it wins
			
 
				-	if( error < m_besterror )
			
 
				-	{
			
 
				-		// remap the indices
			
 
				-		u8 indices[16];
			
 
				-		m_colours->RemapIndices( closest, indices );
			
 
				-		
			
 
				-		// save the block
			
 
				-		WriteColourBlock4( m_start, m_end, indices, block );
			
 
				-
			
 
				-		// save the error
			
 
				-		m_besterror = error;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/rangefit.h
+++ b/3rdparty/libsquish/rangefit.h
@@ -1,54 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_RANGEFIT_H
			
 
				-#define SQUISH_RANGEFIT_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-#include "colourfit.h"
			
 
				-#include "maths.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-class ColourSet;
			
 
				-
			
 
				-class RangeFit : public ColourFit
			
 
				-{
			
 
				-public:
			
 
				-	RangeFit( ColourSet const* colours, int flags, float* metric );
			
 
				-	
			
 
				-private:
			
 
				-	virtual void Compress3( void* block );
			
 
				-	virtual void Compress4( void* block );
			
 
				-	
			
 
				-	Vec3 m_metric;
			
 
				-	Vec3 m_start;
			
 
				-	Vec3 m_end;
			
 
				-	float m_besterror;
			
 
				-};
			
 
				-
			
 
				-} // squish
			
 
				-
			
 
				-#endif // ndef SQUISH_RANGEFIT_H
			
--- a/3rdparty/libsquish/simd.h
+++ b/3rdparty/libsquish/simd.h
@@ -1,32 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_SIMD_H
			
 
				-#define SQUISH_SIMD_H
			
 
				-
			
 
				-#include "maths.h"
			
 
				-#include "simd_float.h"
			
 
				-
			
 
				-#endif // ndef SQUISH_SIMD_H
			
--- a/3rdparty/libsquish/simd_float.h
+++ b/3rdparty/libsquish/simd_float.h
@@ -1,183 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_SIMD_FLOAT_H
			
 
				-#define SQUISH_SIMD_FLOAT_H
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-#define VEC4_CONST( X ) Vec4( X )
			
 
				-
			
 
				-class Vec4
			
 
				-{
			
 
				-public:
			
 
				-	typedef Vec4 const& Arg;
			
 
				-
			
 
				-	Vec4() {}
			
 
				-		
			
 
				-	explicit Vec4( float s )
			
 
				-	  : m_x( s ),
			
 
				-		m_y( s ),
			
 
				-		m_z( s ),
			
 
				-		m_w( s )
			
 
				-	{
			
 
				-	}
			
 
				-	
			
 
				-	Vec4( float x, float y, float z, float w )
			
 
				-	  : m_x( x ),
			
 
				-		m_y( y ),
			
 
				-		m_z( z ),
			
 
				-		m_w( w )
			
 
				-	{
			
 
				-	}
			
 
				-	
			
 
				-	Vec3 GetVec3() const
			
 
				-	{
			
 
				-		return Vec3( m_x, m_y, m_z );
			
 
				-	}
			
 
				-	
			
 
				-	Vec4 SplatX() const { return Vec4( m_x ); }
			
 
				-	Vec4 SplatY() const { return Vec4( m_y ); }
			
 
				-	Vec4 SplatZ() const { return Vec4( m_z ); }
			
 
				-	Vec4 SplatW() const { return Vec4( m_w ); }
			
 
				-
			
 
				-	Vec4& operator+=( Arg v )
			
 
				-	{
			
 
				-		m_x += v.m_x;
			
 
				-		m_y += v.m_y;
			
 
				-		m_z += v.m_z;
			
 
				-		m_w += v.m_w;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec4& operator-=( Arg v )
			
 
				-	{
			
 
				-		m_x -= v.m_x;
			
 
				-		m_y -= v.m_y;
			
 
				-		m_z -= v.m_z;
			
 
				-		m_w -= v.m_w;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	Vec4& operator*=( Arg v )
			
 
				-	{
			
 
				-		m_x *= v.m_x;
			
 
				-		m_y *= v.m_y;
			
 
				-		m_z *= v.m_z;
			
 
				-		m_w *= v.m_w;
			
 
				-		return *this;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
			
 
				-	{
			
 
				-		Vec4 copy( left );
			
 
				-		return copy += right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
			
 
				-	{
			
 
				-		Vec4 copy( left );
			
 
				-		return copy -= right;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
			
 
				-	{
			
 
				-		Vec4 copy( left );
			
 
				-		return copy *= right;
			
 
				-	}
			
 
				-	
			
 
				-	//! Returns a*b + c
			
 
				-	friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
			
 
				-	{
			
 
				-		return a*b + c;
			
 
				-	}
			
 
				-	
			
 
				-	//! Returns -( a*b - c )
			
 
				-	friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
			
 
				-	{
			
 
				-		return c - a*b;
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 Reciprocal( Vec4::Arg v )
			
 
				-	{
			
 
				-		return Vec4( 
			
 
				-			1.0f/v.m_x, 
			
 
				-			1.0f/v.m_y, 
			
 
				-			1.0f/v.m_z, 
			
 
				-			1.0f/v.m_w 
			
 
				-		);
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
			
 
				-	{
			
 
				-		return Vec4( 
			
 
				-			std::min( left.m_x, right.m_x ), 
			
 
				-			std::min( left.m_y, right.m_y ), 
			
 
				-			std::min( left.m_z, right.m_z ), 
			
 
				-			std::min( left.m_w, right.m_w ) 
			
 
				-		);
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
			
 
				-	{
			
 
				-		return Vec4( 
			
 
				-			std::max( left.m_x, right.m_x ), 
			
 
				-			std::max( left.m_y, right.m_y ), 
			
 
				-			std::max( left.m_z, right.m_z ), 
			
 
				-			std::max( left.m_w, right.m_w ) 
			
 
				-		);
			
 
				-	}
			
 
				-	
			
 
				-	friend Vec4 Truncate( Vec4::Arg v )
			
 
				-	{
			
 
				-		return Vec4(
			
 
				-			v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ), 
			
 
				-			v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ), 
			
 
				-			v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
			
 
				-			v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
			
 
				-		);
			
 
				-	}
			
 
				-	
			
 
				-	friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right ) 
			
 
				-	{
			
 
				-		return left.m_x < right.m_x
			
 
				-			|| left.m_y < right.m_y
			
 
				-			|| left.m_z < right.m_z
			
 
				-			|| left.m_w < right.m_w;
			
 
				-	}
			
 
				-	
			
 
				-private:
			
 
				-	float m_x;
			
 
				-	float m_y;
			
 
				-	float m_z;
			
 
				-	float m_w;
			
 
				-};
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_SIMD_FLOAT_H
			
 
				-
			
--- a/3rdparty/libsquish/singlecolourfit.cpp
+++ b/3rdparty/libsquish/singlecolourfit.cpp
@@ -1,172 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "singlecolourfit.h"
			
 
				-#include "colourset.h"
			
 
				-#include "colourblock.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-struct SourceBlock
			
 
				-{
			
 
				-	u8 start;
			
 
				-	u8 end;
			
 
				-	u8 error;
			
 
				-};
			
 
				-
			
 
				-struct SingleColourLookup
			
 
				-{
			
 
				-	SourceBlock sources[2];
			
 
				-};
			
 
				-
			
 
				-#include "singlecolourlookup.inl"
			
 
				-
			
 
				-static int FloatToInt( float a, int limit )
			
 
				-{
			
 
				-	// use ANSI round-to-zero behaviour to get round-to-nearest
			
 
				-	int i = ( int )( a + 0.5f );
			
 
				-
			
 
				-	// clamp to the limit
			
 
				-	if( i < 0 )
			
 
				-		i = 0;
			
 
				-	else if( i > limit )
			
 
				-		i = limit; 
			
 
				-
			
 
				-	// done
			
 
				-	return i;
			
 
				-}
			
 
				-
			
 
				-SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
			
 
				-  : ColourFit( colours, flags )
			
 
				-{
			
 
				-	// grab the single colour
			
 
				-	Vec3 const* values = m_colours->GetPoints();
			
 
				-	m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
			
 
				-	m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
			
 
				-	m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
			
 
				-		
			
 
				-	// initialise the best error
			
 
				-	m_besterror = INT_MAX;
			
 
				-}
			
 
				-
			
 
				-void SingleColourFit::Compress3( void* block )
			
 
				-{
			
 
				-	// build the table of lookups
			
 
				-	SingleColourLookup const* const lookups[] = 
			
 
				-	{
			
 
				-		lookup_5_3, 
			
 
				-		lookup_6_3, 
			
 
				-		lookup_5_3
			
 
				-	};
			
 
				-	
			
 
				-	// find the best end-points and index
			
 
				-	ComputeEndPoints( lookups );
			
 
				-	
			
 
				-	// build the block if we win
			
 
				-	if( m_error < m_besterror )
			
 
				-	{
			
 
				-		// remap the indices
			
 
				-		u8 indices[16];
			
 
				-		m_colours->RemapIndices( &m_index, indices );
			
 
				-		
			
 
				-		// save the block
			
 
				-		WriteColourBlock3( m_start, m_end, indices, block );
			
 
				-
			
 
				-		// save the error
			
 
				-		m_besterror = m_error;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void SingleColourFit::Compress4( void* block )
			
 
				-{
			
 
				-	// build the table of lookups
			
 
				-	SingleColourLookup const* const lookups[] = 
			
 
				-	{
			
 
				-		lookup_5_4, 
			
 
				-		lookup_6_4, 
			
 
				-		lookup_5_4
			
 
				-	};
			
 
				-	
			
 
				-	// find the best end-points and index
			
 
				-	ComputeEndPoints( lookups );
			
 
				-	
			
 
				-	// build the block if we win
			
 
				-	if( m_error < m_besterror )
			
 
				-	{
			
 
				-		// remap the indices
			
 
				-		u8 indices[16];
			
 
				-		m_colours->RemapIndices( &m_index, indices );
			
 
				-		
			
 
				-		// save the block
			
 
				-		WriteColourBlock4( m_start, m_end, indices, block );
			
 
				-
			
 
				-		// save the error
			
 
				-		m_besterror = m_error;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
			
 
				-{
			
 
				-	// check each index combination (endpoint or intermediate)
			
 
				-	m_error = INT_MAX;
			
 
				-	for( int index = 0; index < 2; ++index )
			
 
				-	{
			
 
				-		// check the error for this codebook index
			
 
				-		SourceBlock const* sources[3];
			
 
				-		int error = 0;
			
 
				-		for( int channel = 0; channel < 3; ++channel )
			
 
				-		{
			
 
				-			// grab the lookup table and index for this channel
			
 
				-			SingleColourLookup const* lookup = lookups[channel];
			
 
				-			int target = m_colour[channel];
			
 
				-			
			
 
				-			// store a pointer to the source for this channel
			
 
				-			sources[channel] = lookup[target].sources + index;
			
 
				-			
			
 
				-			// accumulate the error
			
 
				-			int diff = sources[channel]->error;
			
 
				-			error += diff*diff;			
			
 
				-		}
			
 
				-		
			
 
				-		// keep it if the error is lower
			
 
				-		if( error < m_error )
			
 
				-		{
			
 
				-			m_start = Vec3(
			
 
				-				( float )sources[0]->start/31.0f, 
			
 
				-				( float )sources[1]->start/63.0f, 
			
 
				-				( float )sources[2]->start/31.0f
			
 
				-			);
			
 
				-			m_end = Vec3(
			
 
				-				( float )sources[0]->end/31.0f, 
			
 
				-				( float )sources[1]->end/63.0f, 
			
 
				-				( float )sources[2]->end/31.0f
			
 
				-			);
			
 
				-			m_index = ( u8 )( 2*index );
			
 
				-			m_error = error;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/singlecolourfit.h
+++ b/3rdparty/libsquish/singlecolourfit.h
@@ -1,58 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_SINGLECOLOURFIT_H
			
 
				-#define SQUISH_SINGLECOLOURFIT_H
			
 
				-
			
 
				-#include "squish.h"
			
 
				-#include "colourfit.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-class ColourSet;
			
 
				-struct SingleColourLookup;
			
 
				-
			
 
				-class SingleColourFit : public ColourFit
			
 
				-{
			
 
				-public:
			
 
				-	SingleColourFit( ColourSet const* colours, int flags );
			
 
				-	
			
 
				-private:
			
 
				-	virtual void Compress3( void* block );
			
 
				-	virtual void Compress4( void* block );
			
 
				-	
			
 
				-	void ComputeEndPoints( SingleColourLookup const* const* lookups );
			
 
				-	
			
 
				-	u8 m_colour[3];
			
 
				-	Vec3 m_start;
			
 
				-	Vec3 m_end;
			
 
				-	u8 m_index;
			
 
				-	int m_error;
			
 
				-	int m_besterror;
			
 
				-};
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_SINGLECOLOURFIT_H
			
--- a/3rdparty/libsquish/singlecolourlookup.inl
+++ b/3rdparty/libsquish/singlecolourlookup.inl
@@ -1,1064 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-
			
 
				-static SingleColourLookup const lookup_5_3[] = 
			
 
				-{
			
 
				-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
			
 
				-	{ { { 0, 0, 1 }, { 0, 0, 1 } } },
			
 
				-	{ { { 0, 0, 2 }, { 0, 0, 2 } } },
			
 
				-	{ { { 0, 0, 3 }, { 0, 1, 1 } } },
			
 
				-	{ { { 0, 0, 4 }, { 0, 1, 0 } } },
			
 
				-	{ { { 1, 0, 3 }, { 0, 1, 1 } } },
			
 
				-	{ { { 1, 0, 2 }, { 0, 1, 2 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
			
 
				-	{ { { 1, 0, 0 }, { 0, 2, 0 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
			
 
				-	{ { { 1, 0, 2 }, { 0, 2, 2 } } },
			
 
				-	{ { { 1, 0, 3 }, { 0, 3, 1 } } },
			
 
				-	{ { { 1, 0, 4 }, { 0, 3, 0 } } },
			
 
				-	{ { { 2, 0, 3 }, { 0, 3, 1 } } },
			
 
				-	{ { { 2, 0, 2 }, { 0, 3, 2 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
			
 
				-	{ { { 2, 0, 0 }, { 0, 4, 0 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
			
 
				-	{ { { 2, 0, 2 }, { 0, 4, 2 } } },
			
 
				-	{ { { 2, 0, 3 }, { 0, 5, 1 } } },
			
 
				-	{ { { 2, 0, 4 }, { 0, 5, 0 } } },
			
 
				-	{ { { 3, 0, 3 }, { 0, 5, 1 } } },
			
 
				-	{ { { 3, 0, 2 }, { 0, 5, 2 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
			
 
				-	{ { { 3, 0, 0 }, { 0, 6, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
			
 
				-	{ { { 3, 0, 2 }, { 0, 6, 2 } } },
			
 
				-	{ { { 3, 0, 3 }, { 0, 7, 1 } } },
			
 
				-	{ { { 3, 0, 4 }, { 0, 7, 0 } } },
			
 
				-	{ { { 4, 0, 4 }, { 0, 7, 1 } } },
			
 
				-	{ { { 4, 0, 3 }, { 0, 7, 2 } } },
			
 
				-	{ { { 4, 0, 2 }, { 1, 7, 1 } } },
			
 
				-	{ { { 4, 0, 1 }, { 1, 7, 0 } } },
			
 
				-	{ { { 4, 0, 0 }, { 0, 8, 0 } } },
			
 
				-	{ { { 4, 0, 1 }, { 0, 8, 1 } } },
			
 
				-	{ { { 4, 0, 2 }, { 2, 7, 1 } } },
			
 
				-	{ { { 4, 0, 3 }, { 2, 7, 0 } } },
			
 
				-	{ { { 4, 0, 4 }, { 0, 9, 0 } } },
			
 
				-	{ { { 5, 0, 3 }, { 0, 9, 1 } } },
			
 
				-	{ { { 5, 0, 2 }, { 3, 7, 1 } } },
			
 
				-	{ { { 5, 0, 1 }, { 3, 7, 0 } } },
			
 
				-	{ { { 5, 0, 0 }, { 0, 10, 0 } } },
			
 
				-	{ { { 5, 0, 1 }, { 0, 10, 1 } } },
			
 
				-	{ { { 5, 0, 2 }, { 0, 10, 2 } } },
			
 
				-	{ { { 5, 0, 3 }, { 0, 11, 1 } } },
			
 
				-	{ { { 5, 0, 4 }, { 0, 11, 0 } } },
			
 
				-	{ { { 6, 0, 3 }, { 0, 11, 1 } } },
			
 
				-	{ { { 6, 0, 2 }, { 0, 11, 2 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
			
 
				-	{ { { 6, 0, 0 }, { 0, 12, 0 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
			
 
				-	{ { { 6, 0, 2 }, { 0, 12, 2 } } },
			
 
				-	{ { { 6, 0, 3 }, { 0, 13, 1 } } },
			
 
				-	{ { { 6, 0, 4 }, { 0, 13, 0 } } },
			
 
				-	{ { { 7, 0, 3 }, { 0, 13, 1 } } },
			
 
				-	{ { { 7, 0, 2 }, { 0, 13, 2 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
			
 
				-	{ { { 7, 0, 0 }, { 0, 14, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
			
 
				-	{ { { 7, 0, 2 }, { 0, 14, 2 } } },
			
 
				-	{ { { 7, 0, 3 }, { 0, 15, 1 } } },
			
 
				-	{ { { 7, 0, 4 }, { 0, 15, 0 } } },
			
 
				-	{ { { 8, 0, 4 }, { 0, 15, 1 } } },
			
 
				-	{ { { 8, 0, 3 }, { 0, 15, 2 } } },
			
 
				-	{ { { 8, 0, 2 }, { 1, 15, 1 } } },
			
 
				-	{ { { 8, 0, 1 }, { 1, 15, 0 } } },
			
 
				-	{ { { 8, 0, 0 }, { 0, 16, 0 } } },
			
 
				-	{ { { 8, 0, 1 }, { 0, 16, 1 } } },
			
 
				-	{ { { 8, 0, 2 }, { 2, 15, 1 } } },
			
 
				-	{ { { 8, 0, 3 }, { 2, 15, 0 } } },
			
 
				-	{ { { 8, 0, 4 }, { 0, 17, 0 } } },
			
 
				-	{ { { 9, 0, 3 }, { 0, 17, 1 } } },
			
 
				-	{ { { 9, 0, 2 }, { 3, 15, 1 } } },
			
 
				-	{ { { 9, 0, 1 }, { 3, 15, 0 } } },
			
 
				-	{ { { 9, 0, 0 }, { 0, 18, 0 } } },
			
 
				-	{ { { 9, 0, 1 }, { 0, 18, 1 } } },
			
 
				-	{ { { 9, 0, 2 }, { 0, 18, 2 } } },
			
 
				-	{ { { 9, 0, 3 }, { 0, 19, 1 } } },
			
 
				-	{ { { 9, 0, 4 }, { 0, 19, 0 } } },
			
 
				-	{ { { 10, 0, 3 }, { 0, 19, 1 } } },
			
 
				-	{ { { 10, 0, 2 }, { 0, 19, 2 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
			
 
				-	{ { { 10, 0, 0 }, { 0, 20, 0 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
			
 
				-	{ { { 10, 0, 2 }, { 0, 20, 2 } } },
			
 
				-	{ { { 10, 0, 3 }, { 0, 21, 1 } } },
			
 
				-	{ { { 10, 0, 4 }, { 0, 21, 0 } } },
			
 
				-	{ { { 11, 0, 3 }, { 0, 21, 1 } } },
			
 
				-	{ { { 11, 0, 2 }, { 0, 21, 2 } } },
			
 
				-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
			
 
				-	{ { { 11, 0, 0 }, { 0, 22, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
			
 
				-	{ { { 11, 0, 2 }, { 0, 22, 2 } } },
			
 
				-	{ { { 11, 0, 3 }, { 0, 23, 1 } } },
			
 
				-	{ { { 11, 0, 4 }, { 0, 23, 0 } } },
			
 
				-	{ { { 12, 0, 4 }, { 0, 23, 1 } } },
			
 
				-	{ { { 12, 0, 3 }, { 0, 23, 2 } } },
			
 
				-	{ { { 12, 0, 2 }, { 1, 23, 1 } } },
			
 
				-	{ { { 12, 0, 1 }, { 1, 23, 0 } } },
			
 
				-	{ { { 12, 0, 0 }, { 0, 24, 0 } } },
			
 
				-	{ { { 12, 0, 1 }, { 0, 24, 1 } } },
			
 
				-	{ { { 12, 0, 2 }, { 2, 23, 1 } } },
			
 
				-	{ { { 12, 0, 3 }, { 2, 23, 0 } } },
			
 
				-	{ { { 12, 0, 4 }, { 0, 25, 0 } } },
			
 
				-	{ { { 13, 0, 3 }, { 0, 25, 1 } } },
			
 
				-	{ { { 13, 0, 2 }, { 3, 23, 1 } } },
			
 
				-	{ { { 13, 0, 1 }, { 3, 23, 0 } } },
			
 
				-	{ { { 13, 0, 0 }, { 0, 26, 0 } } },
			
 
				-	{ { { 13, 0, 1 }, { 0, 26, 1 } } },
			
 
				-	{ { { 13, 0, 2 }, { 0, 26, 2 } } },
			
 
				-	{ { { 13, 0, 3 }, { 0, 27, 1 } } },
			
 
				-	{ { { 13, 0, 4 }, { 0, 27, 0 } } },
			
 
				-	{ { { 14, 0, 3 }, { 0, 27, 1 } } },
			
 
				-	{ { { 14, 0, 2 }, { 0, 27, 2 } } },
			
 
				-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
			
 
				-	{ { { 14, 0, 0 }, { 0, 28, 0 } } },
			
 
				-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
			
 
				-	{ { { 14, 0, 2 }, { 0, 28, 2 } } },
			
 
				-	{ { { 14, 0, 3 }, { 0, 29, 1 } } },
			
 
				-	{ { { 14, 0, 4 }, { 0, 29, 0 } } },
			
 
				-	{ { { 15, 0, 3 }, { 0, 29, 1 } } },
			
 
				-	{ { { 15, 0, 2 }, { 0, 29, 2 } } },
			
 
				-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
			
 
				-	{ { { 15, 0, 0 }, { 0, 30, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
			
 
				-	{ { { 15, 0, 2 }, { 0, 30, 2 } } },
			
 
				-	{ { { 15, 0, 3 }, { 0, 31, 1 } } },
			
 
				-	{ { { 15, 0, 4 }, { 0, 31, 0 } } },
			
 
				-	{ { { 16, 0, 4 }, { 0, 31, 1 } } },
			
 
				-	{ { { 16, 0, 3 }, { 0, 31, 2 } } },
			
 
				-	{ { { 16, 0, 2 }, { 1, 31, 1 } } },
			
 
				-	{ { { 16, 0, 1 }, { 1, 31, 0 } } },
			
 
				-	{ { { 16, 0, 0 }, { 4, 28, 0 } } },
			
 
				-	{ { { 16, 0, 1 }, { 4, 28, 1 } } },
			
 
				-	{ { { 16, 0, 2 }, { 2, 31, 1 } } },
			
 
				-	{ { { 16, 0, 3 }, { 2, 31, 0 } } },
			
 
				-	{ { { 16, 0, 4 }, { 4, 29, 0 } } },
			
 
				-	{ { { 17, 0, 3 }, { 4, 29, 1 } } },
			
 
				-	{ { { 17, 0, 2 }, { 3, 31, 1 } } },
			
 
				-	{ { { 17, 0, 1 }, { 3, 31, 0 } } },
			
 
				-	{ { { 17, 0, 0 }, { 4, 30, 0 } } },
			
 
				-	{ { { 17, 0, 1 }, { 4, 30, 1 } } },
			
 
				-	{ { { 17, 0, 2 }, { 4, 30, 2 } } },
			
 
				-	{ { { 17, 0, 3 }, { 4, 31, 1 } } },
			
 
				-	{ { { 17, 0, 4 }, { 4, 31, 0 } } },
			
 
				-	{ { { 18, 0, 3 }, { 4, 31, 1 } } },
			
 
				-	{ { { 18, 0, 2 }, { 4, 31, 2 } } },
			
 
				-	{ { { 18, 0, 1 }, { 5, 31, 1 } } },
			
 
				-	{ { { 18, 0, 0 }, { 5, 31, 0 } } },
			
 
				-	{ { { 18, 0, 1 }, { 5, 31, 1 } } },
			
 
				-	{ { { 18, 0, 2 }, { 5, 31, 2 } } },
			
 
				-	{ { { 18, 0, 3 }, { 6, 31, 1 } } },
			
 
				-	{ { { 18, 0, 4 }, { 6, 31, 0 } } },
			
 
				-	{ { { 19, 0, 3 }, { 6, 31, 1 } } },
			
 
				-	{ { { 19, 0, 2 }, { 6, 31, 2 } } },
			
 
				-	{ { { 19, 0, 1 }, { 7, 31, 1 } } },
			
 
				-	{ { { 19, 0, 0 }, { 7, 31, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 7, 31, 1 } } },
			
 
				-	{ { { 19, 0, 2 }, { 7, 31, 2 } } },
			
 
				-	{ { { 19, 0, 3 }, { 8, 31, 1 } } },
			
 
				-	{ { { 19, 0, 4 }, { 8, 31, 0 } } },
			
 
				-	{ { { 20, 0, 4 }, { 8, 31, 1 } } },
			
 
				-	{ { { 20, 0, 3 }, { 8, 31, 2 } } },
			
 
				-	{ { { 20, 0, 2 }, { 9, 31, 1 } } },
			
 
				-	{ { { 20, 0, 1 }, { 9, 31, 0 } } },
			
 
				-	{ { { 20, 0, 0 }, { 12, 28, 0 } } },
			
 
				-	{ { { 20, 0, 1 }, { 12, 28, 1 } } },
			
 
				-	{ { { 20, 0, 2 }, { 10, 31, 1 } } },
			
 
				-	{ { { 20, 0, 3 }, { 10, 31, 0 } } },
			
 
				-	{ { { 20, 0, 4 }, { 12, 29, 0 } } },
			
 
				-	{ { { 21, 0, 3 }, { 12, 29, 1 } } },
			
 
				-	{ { { 21, 0, 2 }, { 11, 31, 1 } } },
			
 
				-	{ { { 21, 0, 1 }, { 11, 31, 0 } } },
			
 
				-	{ { { 21, 0, 0 }, { 12, 30, 0 } } },
			
 
				-	{ { { 21, 0, 1 }, { 12, 30, 1 } } },
			
 
				-	{ { { 21, 0, 2 }, { 12, 30, 2 } } },
			
 
				-	{ { { 21, 0, 3 }, { 12, 31, 1 } } },
			
 
				-	{ { { 21, 0, 4 }, { 12, 31, 0 } } },
			
 
				-	{ { { 22, 0, 3 }, { 12, 31, 1 } } },
			
 
				-	{ { { 22, 0, 2 }, { 12, 31, 2 } } },
			
 
				-	{ { { 22, 0, 1 }, { 13, 31, 1 } } },
			
 
				-	{ { { 22, 0, 0 }, { 13, 31, 0 } } },
			
 
				-	{ { { 22, 0, 1 }, { 13, 31, 1 } } },
			
 
				-	{ { { 22, 0, 2 }, { 13, 31, 2 } } },
			
 
				-	{ { { 22, 0, 3 }, { 14, 31, 1 } } },
			
 
				-	{ { { 22, 0, 4 }, { 14, 31, 0 } } },
			
 
				-	{ { { 23, 0, 3 }, { 14, 31, 1 } } },
			
 
				-	{ { { 23, 0, 2 }, { 14, 31, 2 } } },
			
 
				-	{ { { 23, 0, 1 }, { 15, 31, 1 } } },
			
 
				-	{ { { 23, 0, 0 }, { 15, 31, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 15, 31, 1 } } },
			
 
				-	{ { { 23, 0, 2 }, { 15, 31, 2 } } },
			
 
				-	{ { { 23, 0, 3 }, { 16, 31, 1 } } },
			
 
				-	{ { { 23, 0, 4 }, { 16, 31, 0 } } },
			
 
				-	{ { { 24, 0, 4 }, { 16, 31, 1 } } },
			
 
				-	{ { { 24, 0, 3 }, { 16, 31, 2 } } },
			
 
				-	{ { { 24, 0, 2 }, { 17, 31, 1 } } },
			
 
				-	{ { { 24, 0, 1 }, { 17, 31, 0 } } },
			
 
				-	{ { { 24, 0, 0 }, { 20, 28, 0 } } },
			
 
				-	{ { { 24, 0, 1 }, { 20, 28, 1 } } },
			
 
				-	{ { { 24, 0, 2 }, { 18, 31, 1 } } },
			
 
				-	{ { { 24, 0, 3 }, { 18, 31, 0 } } },
			
 
				-	{ { { 24, 0, 4 }, { 20, 29, 0 } } },
			
 
				-	{ { { 25, 0, 3 }, { 20, 29, 1 } } },
			
 
				-	{ { { 25, 0, 2 }, { 19, 31, 1 } } },
			
 
				-	{ { { 25, 0, 1 }, { 19, 31, 0 } } },
			
 
				-	{ { { 25, 0, 0 }, { 20, 30, 0 } } },
			
 
				-	{ { { 25, 0, 1 }, { 20, 30, 1 } } },
			
 
				-	{ { { 25, 0, 2 }, { 20, 30, 2 } } },
			
 
				-	{ { { 25, 0, 3 }, { 20, 31, 1 } } },
			
 
				-	{ { { 25, 0, 4 }, { 20, 31, 0 } } },
			
 
				-	{ { { 26, 0, 3 }, { 20, 31, 1 } } },
			
 
				-	{ { { 26, 0, 2 }, { 20, 31, 2 } } },
			
 
				-	{ { { 26, 0, 1 }, { 21, 31, 1 } } },
			
 
				-	{ { { 26, 0, 0 }, { 21, 31, 0 } } },
			
 
				-	{ { { 26, 0, 1 }, { 21, 31, 1 } } },
			
 
				-	{ { { 26, 0, 2 }, { 21, 31, 2 } } },
			
 
				-	{ { { 26, 0, 3 }, { 22, 31, 1 } } },
			
 
				-	{ { { 26, 0, 4 }, { 22, 31, 0 } } },
			
 
				-	{ { { 27, 0, 3 }, { 22, 31, 1 } } },
			
 
				-	{ { { 27, 0, 2 }, { 22, 31, 2 } } },
			
 
				-	{ { { 27, 0, 1 }, { 23, 31, 1 } } },
			
 
				-	{ { { 27, 0, 0 }, { 23, 31, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 23, 31, 1 } } },
			
 
				-	{ { { 27, 0, 2 }, { 23, 31, 2 } } },
			
 
				-	{ { { 27, 0, 3 }, { 24, 31, 1 } } },
			
 
				-	{ { { 27, 0, 4 }, { 24, 31, 0 } } },
			
 
				-	{ { { 28, 0, 4 }, { 24, 31, 1 } } },
			
 
				-	{ { { 28, 0, 3 }, { 24, 31, 2 } } },
			
 
				-	{ { { 28, 0, 2 }, { 25, 31, 1 } } },
			
 
				-	{ { { 28, 0, 1 }, { 25, 31, 0 } } },
			
 
				-	{ { { 28, 0, 0 }, { 28, 28, 0 } } },
			
 
				-	{ { { 28, 0, 1 }, { 28, 28, 1 } } },
			
 
				-	{ { { 28, 0, 2 }, { 26, 31, 1 } } },
			
 
				-	{ { { 28, 0, 3 }, { 26, 31, 0 } } },
			
 
				-	{ { { 28, 0, 4 }, { 28, 29, 0 } } },
			
 
				-	{ { { 29, 0, 3 }, { 28, 29, 1 } } },
			
 
				-	{ { { 29, 0, 2 }, { 27, 31, 1 } } },
			
 
				-	{ { { 29, 0, 1 }, { 27, 31, 0 } } },
			
 
				-	{ { { 29, 0, 0 }, { 28, 30, 0 } } },
			
 
				-	{ { { 29, 0, 1 }, { 28, 30, 1 } } },
			
 
				-	{ { { 29, 0, 2 }, { 28, 30, 2 } } },
			
 
				-	{ { { 29, 0, 3 }, { 28, 31, 1 } } },
			
 
				-	{ { { 29, 0, 4 }, { 28, 31, 0 } } },
			
 
				-	{ { { 30, 0, 3 }, { 28, 31, 1 } } },
			
 
				-	{ { { 30, 0, 2 }, { 28, 31, 2 } } },
			
 
				-	{ { { 30, 0, 1 }, { 29, 31, 1 } } },
			
 
				-	{ { { 30, 0, 0 }, { 29, 31, 0 } } },
			
 
				-	{ { { 30, 0, 1 }, { 29, 31, 1 } } },
			
 
				-	{ { { 30, 0, 2 }, { 29, 31, 2 } } },
			
 
				-	{ { { 30, 0, 3 }, { 30, 31, 1 } } },
			
 
				-	{ { { 30, 0, 4 }, { 30, 31, 0 } } },
			
 
				-	{ { { 31, 0, 3 }, { 30, 31, 1 } } },
			
 
				-	{ { { 31, 0, 2 }, { 30, 31, 2 } } },
			
 
				-	{ { { 31, 0, 1 }, { 31, 31, 1 } } },
			
 
				-	{ { { 31, 0, 0 }, { 31, 31, 0 } } }
			
 
				-};
			
 
				-
			
 
				-static SingleColourLookup const lookup_6_3[] = 
			
 
				-{
			
 
				-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
			
 
				-	{ { { 0, 0, 1 }, { 0, 1, 1 } } },
			
 
				-	{ { { 0, 0, 2 }, { 0, 1, 0 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 2, 1 } } },
			
 
				-	{ { { 1, 0, 0 }, { 0, 2, 0 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
			
 
				-	{ { { 1, 0, 2 }, { 0, 3, 0 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 4, 1 } } },
			
 
				-	{ { { 2, 0, 0 }, { 0, 4, 0 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 5, 1 } } },
			
 
				-	{ { { 2, 0, 2 }, { 0, 5, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 6, 1 } } },
			
 
				-	{ { { 3, 0, 0 }, { 0, 6, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 7, 1 } } },
			
 
				-	{ { { 3, 0, 2 }, { 0, 7, 0 } } },
			
 
				-	{ { { 4, 0, 1 }, { 0, 8, 1 } } },
			
 
				-	{ { { 4, 0, 0 }, { 0, 8, 0 } } },
			
 
				-	{ { { 4, 0, 1 }, { 0, 9, 1 } } },
			
 
				-	{ { { 4, 0, 2 }, { 0, 9, 0 } } },
			
 
				-	{ { { 5, 0, 1 }, { 0, 10, 1 } } },
			
 
				-	{ { { 5, 0, 0 }, { 0, 10, 0 } } },
			
 
				-	{ { { 5, 0, 1 }, { 0, 11, 1 } } },
			
 
				-	{ { { 5, 0, 2 }, { 0, 11, 0 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 12, 1 } } },
			
 
				-	{ { { 6, 0, 0 }, { 0, 12, 0 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 13, 1 } } },
			
 
				-	{ { { 6, 0, 2 }, { 0, 13, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 14, 1 } } },
			
 
				-	{ { { 7, 0, 0 }, { 0, 14, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 15, 1 } } },
			
 
				-	{ { { 7, 0, 2 }, { 0, 15, 0 } } },
			
 
				-	{ { { 8, 0, 1 }, { 0, 16, 1 } } },
			
 
				-	{ { { 8, 0, 0 }, { 0, 16, 0 } } },
			
 
				-	{ { { 8, 0, 1 }, { 0, 17, 1 } } },
			
 
				-	{ { { 8, 0, 2 }, { 0, 17, 0 } } },
			
 
				-	{ { { 9, 0, 1 }, { 0, 18, 1 } } },
			
 
				-	{ { { 9, 0, 0 }, { 0, 18, 0 } } },
			
 
				-	{ { { 9, 0, 1 }, { 0, 19, 1 } } },
			
 
				-	{ { { 9, 0, 2 }, { 0, 19, 0 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 20, 1 } } },
			
 
				-	{ { { 10, 0, 0 }, { 0, 20, 0 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 21, 1 } } },
			
 
				-	{ { { 10, 0, 2 }, { 0, 21, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 0, 22, 1 } } },
			
 
				-	{ { { 11, 0, 0 }, { 0, 22, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 0, 23, 1 } } },
			
 
				-	{ { { 11, 0, 2 }, { 0, 23, 0 } } },
			
 
				-	{ { { 12, 0, 1 }, { 0, 24, 1 } } },
			
 
				-	{ { { 12, 0, 0 }, { 0, 24, 0 } } },
			
 
				-	{ { { 12, 0, 1 }, { 0, 25, 1 } } },
			
 
				-	{ { { 12, 0, 2 }, { 0, 25, 0 } } },
			
 
				-	{ { { 13, 0, 1 }, { 0, 26, 1 } } },
			
 
				-	{ { { 13, 0, 0 }, { 0, 26, 0 } } },
			
 
				-	{ { { 13, 0, 1 }, { 0, 27, 1 } } },
			
 
				-	{ { { 13, 0, 2 }, { 0, 27, 0 } } },
			
 
				-	{ { { 14, 0, 1 }, { 0, 28, 1 } } },
			
 
				-	{ { { 14, 0, 0 }, { 0, 28, 0 } } },
			
 
				-	{ { { 14, 0, 1 }, { 0, 29, 1 } } },
			
 
				-	{ { { 14, 0, 2 }, { 0, 29, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 0, 30, 1 } } },
			
 
				-	{ { { 15, 0, 0 }, { 0, 30, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 0, 31, 1 } } },
			
 
				-	{ { { 15, 0, 2 }, { 0, 31, 0 } } },
			
 
				-	{ { { 16, 0, 2 }, { 1, 31, 1 } } },
			
 
				-	{ { { 16, 0, 1 }, { 1, 31, 0 } } },
			
 
				-	{ { { 16, 0, 0 }, { 0, 32, 0 } } },
			
 
				-	{ { { 16, 0, 1 }, { 2, 31, 0 } } },
			
 
				-	{ { { 16, 0, 2 }, { 0, 33, 0 } } },
			
 
				-	{ { { 17, 0, 1 }, { 3, 31, 0 } } },
			
 
				-	{ { { 17, 0, 0 }, { 0, 34, 0 } } },
			
 
				-	{ { { 17, 0, 1 }, { 4, 31, 0 } } },
			
 
				-	{ { { 17, 0, 2 }, { 0, 35, 0 } } },
			
 
				-	{ { { 18, 0, 1 }, { 5, 31, 0 } } },
			
 
				-	{ { { 18, 0, 0 }, { 0, 36, 0 } } },
			
 
				-	{ { { 18, 0, 1 }, { 6, 31, 0 } } },
			
 
				-	{ { { 18, 0, 2 }, { 0, 37, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 7, 31, 0 } } },
			
 
				-	{ { { 19, 0, 0 }, { 0, 38, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 8, 31, 0 } } },
			
 
				-	{ { { 19, 0, 2 }, { 0, 39, 0 } } },
			
 
				-	{ { { 20, 0, 1 }, { 9, 31, 0 } } },
			
 
				-	{ { { 20, 0, 0 }, { 0, 40, 0 } } },
			
 
				-	{ { { 20, 0, 1 }, { 10, 31, 0 } } },
			
 
				-	{ { { 20, 0, 2 }, { 0, 41, 0 } } },
			
 
				-	{ { { 21, 0, 1 }, { 11, 31, 0 } } },
			
 
				-	{ { { 21, 0, 0 }, { 0, 42, 0 } } },
			
 
				-	{ { { 21, 0, 1 }, { 12, 31, 0 } } },
			
 
				-	{ { { 21, 0, 2 }, { 0, 43, 0 } } },
			
 
				-	{ { { 22, 0, 1 }, { 13, 31, 0 } } },
			
 
				-	{ { { 22, 0, 0 }, { 0, 44, 0 } } },
			
 
				-	{ { { 22, 0, 1 }, { 14, 31, 0 } } },
			
 
				-	{ { { 22, 0, 2 }, { 0, 45, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 15, 31, 0 } } },
			
 
				-	{ { { 23, 0, 0 }, { 0, 46, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 0, 47, 1 } } },
			
 
				-	{ { { 23, 0, 2 }, { 0, 47, 0 } } },
			
 
				-	{ { { 24, 0, 1 }, { 0, 48, 1 } } },
			
 
				-	{ { { 24, 0, 0 }, { 0, 48, 0 } } },
			
 
				-	{ { { 24, 0, 1 }, { 0, 49, 1 } } },
			
 
				-	{ { { 24, 0, 2 }, { 0, 49, 0 } } },
			
 
				-	{ { { 25, 0, 1 }, { 0, 50, 1 } } },
			
 
				-	{ { { 25, 0, 0 }, { 0, 50, 0 } } },
			
 
				-	{ { { 25, 0, 1 }, { 0, 51, 1 } } },
			
 
				-	{ { { 25, 0, 2 }, { 0, 51, 0 } } },
			
 
				-	{ { { 26, 0, 1 }, { 0, 52, 1 } } },
			
 
				-	{ { { 26, 0, 0 }, { 0, 52, 0 } } },
			
 
				-	{ { { 26, 0, 1 }, { 0, 53, 1 } } },
			
 
				-	{ { { 26, 0, 2 }, { 0, 53, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 0, 54, 1 } } },
			
 
				-	{ { { 27, 0, 0 }, { 0, 54, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 0, 55, 1 } } },
			
 
				-	{ { { 27, 0, 2 }, { 0, 55, 0 } } },
			
 
				-	{ { { 28, 0, 1 }, { 0, 56, 1 } } },
			
 
				-	{ { { 28, 0, 0 }, { 0, 56, 0 } } },
			
 
				-	{ { { 28, 0, 1 }, { 0, 57, 1 } } },
			
 
				-	{ { { 28, 0, 2 }, { 0, 57, 0 } } },
			
 
				-	{ { { 29, 0, 1 }, { 0, 58, 1 } } },
			
 
				-	{ { { 29, 0, 0 }, { 0, 58, 0 } } },
			
 
				-	{ { { 29, 0, 1 }, { 0, 59, 1 } } },
			
 
				-	{ { { 29, 0, 2 }, { 0, 59, 0 } } },
			
 
				-	{ { { 30, 0, 1 }, { 0, 60, 1 } } },
			
 
				-	{ { { 30, 0, 0 }, { 0, 60, 0 } } },
			
 
				-	{ { { 30, 0, 1 }, { 0, 61, 1 } } },
			
 
				-	{ { { 30, 0, 2 }, { 0, 61, 0 } } },
			
 
				-	{ { { 31, 0, 1 }, { 0, 62, 1 } } },
			
 
				-	{ { { 31, 0, 0 }, { 0, 62, 0 } } },
			
 
				-	{ { { 31, 0, 1 }, { 0, 63, 1 } } },
			
 
				-	{ { { 31, 0, 2 }, { 0, 63, 0 } } },
			
 
				-	{ { { 32, 0, 2 }, { 1, 63, 1 } } },
			
 
				-	{ { { 32, 0, 1 }, { 1, 63, 0 } } },
			
 
				-	{ { { 32, 0, 0 }, { 16, 48, 0 } } },
			
 
				-	{ { { 32, 0, 1 }, { 2, 63, 0 } } },
			
 
				-	{ { { 32, 0, 2 }, { 16, 49, 0 } } },
			
 
				-	{ { { 33, 0, 1 }, { 3, 63, 0 } } },
			
 
				-	{ { { 33, 0, 0 }, { 16, 50, 0 } } },
			
 
				-	{ { { 33, 0, 1 }, { 4, 63, 0 } } },
			
 
				-	{ { { 33, 0, 2 }, { 16, 51, 0 } } },
			
 
				-	{ { { 34, 0, 1 }, { 5, 63, 0 } } },
			
 
				-	{ { { 34, 0, 0 }, { 16, 52, 0 } } },
			
 
				-	{ { { 34, 0, 1 }, { 6, 63, 0 } } },
			
 
				-	{ { { 34, 0, 2 }, { 16, 53, 0 } } },
			
 
				-	{ { { 35, 0, 1 }, { 7, 63, 0 } } },
			
 
				-	{ { { 35, 0, 0 }, { 16, 54, 0 } } },
			
 
				-	{ { { 35, 0, 1 }, { 8, 63, 0 } } },
			
 
				-	{ { { 35, 0, 2 }, { 16, 55, 0 } } },
			
 
				-	{ { { 36, 0, 1 }, { 9, 63, 0 } } },
			
 
				-	{ { { 36, 0, 0 }, { 16, 56, 0 } } },
			
 
				-	{ { { 36, 0, 1 }, { 10, 63, 0 } } },
			
 
				-	{ { { 36, 0, 2 }, { 16, 57, 0 } } },
			
 
				-	{ { { 37, 0, 1 }, { 11, 63, 0 } } },
			
 
				-	{ { { 37, 0, 0 }, { 16, 58, 0 } } },
			
 
				-	{ { { 37, 0, 1 }, { 12, 63, 0 } } },
			
 
				-	{ { { 37, 0, 2 }, { 16, 59, 0 } } },
			
 
				-	{ { { 38, 0, 1 }, { 13, 63, 0 } } },
			
 
				-	{ { { 38, 0, 0 }, { 16, 60, 0 } } },
			
 
				-	{ { { 38, 0, 1 }, { 14, 63, 0 } } },
			
 
				-	{ { { 38, 0, 2 }, { 16, 61, 0 } } },
			
 
				-	{ { { 39, 0, 1 }, { 15, 63, 0 } } },
			
 
				-	{ { { 39, 0, 0 }, { 16, 62, 0 } } },
			
 
				-	{ { { 39, 0, 1 }, { 16, 63, 1 } } },
			
 
				-	{ { { 39, 0, 2 }, { 16, 63, 0 } } },
			
 
				-	{ { { 40, 0, 1 }, { 17, 63, 1 } } },
			
 
				-	{ { { 40, 0, 0 }, { 17, 63, 0 } } },
			
 
				-	{ { { 40, 0, 1 }, { 18, 63, 1 } } },
			
 
				-	{ { { 40, 0, 2 }, { 18, 63, 0 } } },
			
 
				-	{ { { 41, 0, 1 }, { 19, 63, 1 } } },
			
 
				-	{ { { 41, 0, 0 }, { 19, 63, 0 } } },
			
 
				-	{ { { 41, 0, 1 }, { 20, 63, 1 } } },
			
 
				-	{ { { 41, 0, 2 }, { 20, 63, 0 } } },
			
 
				-	{ { { 42, 0, 1 }, { 21, 63, 1 } } },
			
 
				-	{ { { 42, 0, 0 }, { 21, 63, 0 } } },
			
 
				-	{ { { 42, 0, 1 }, { 22, 63, 1 } } },
			
 
				-	{ { { 42, 0, 2 }, { 22, 63, 0 } } },
			
 
				-	{ { { 43, 0, 1 }, { 23, 63, 1 } } },
			
 
				-	{ { { 43, 0, 0 }, { 23, 63, 0 } } },
			
 
				-	{ { { 43, 0, 1 }, { 24, 63, 1 } } },
			
 
				-	{ { { 43, 0, 2 }, { 24, 63, 0 } } },
			
 
				-	{ { { 44, 0, 1 }, { 25, 63, 1 } } },
			
 
				-	{ { { 44, 0, 0 }, { 25, 63, 0 } } },
			
 
				-	{ { { 44, 0, 1 }, { 26, 63, 1 } } },
			
 
				-	{ { { 44, 0, 2 }, { 26, 63, 0 } } },
			
 
				-	{ { { 45, 0, 1 }, { 27, 63, 1 } } },
			
 
				-	{ { { 45, 0, 0 }, { 27, 63, 0 } } },
			
 
				-	{ { { 45, 0, 1 }, { 28, 63, 1 } } },
			
 
				-	{ { { 45, 0, 2 }, { 28, 63, 0 } } },
			
 
				-	{ { { 46, 0, 1 }, { 29, 63, 1 } } },
			
 
				-	{ { { 46, 0, 0 }, { 29, 63, 0 } } },
			
 
				-	{ { { 46, 0, 1 }, { 30, 63, 1 } } },
			
 
				-	{ { { 46, 0, 2 }, { 30, 63, 0 } } },
			
 
				-	{ { { 47, 0, 1 }, { 31, 63, 1 } } },
			
 
				-	{ { { 47, 0, 0 }, { 31, 63, 0 } } },
			
 
				-	{ { { 47, 0, 1 }, { 32, 63, 1 } } },
			
 
				-	{ { { 47, 0, 2 }, { 32, 63, 0 } } },
			
 
				-	{ { { 48, 0, 2 }, { 33, 63, 1 } } },
			
 
				-	{ { { 48, 0, 1 }, { 33, 63, 0 } } },
			
 
				-	{ { { 48, 0, 0 }, { 48, 48, 0 } } },
			
 
				-	{ { { 48, 0, 1 }, { 34, 63, 0 } } },
			
 
				-	{ { { 48, 0, 2 }, { 48, 49, 0 } } },
			
 
				-	{ { { 49, 0, 1 }, { 35, 63, 0 } } },
			
 
				-	{ { { 49, 0, 0 }, { 48, 50, 0 } } },
			
 
				-	{ { { 49, 0, 1 }, { 36, 63, 0 } } },
			
 
				-	{ { { 49, 0, 2 }, { 48, 51, 0 } } },
			
 
				-	{ { { 50, 0, 1 }, { 37, 63, 0 } } },
			
 
				-	{ { { 50, 0, 0 }, { 48, 52, 0 } } },
			
 
				-	{ { { 50, 0, 1 }, { 38, 63, 0 } } },
			
 
				-	{ { { 50, 0, 2 }, { 48, 53, 0 } } },
			
 
				-	{ { { 51, 0, 1 }, { 39, 63, 0 } } },
			
 
				-	{ { { 51, 0, 0 }, { 48, 54, 0 } } },
			
 
				-	{ { { 51, 0, 1 }, { 40, 63, 0 } } },
			
 
				-	{ { { 51, 0, 2 }, { 48, 55, 0 } } },
			
 
				-	{ { { 52, 0, 1 }, { 41, 63, 0 } } },
			
 
				-	{ { { 52, 0, 0 }, { 48, 56, 0 } } },
			
 
				-	{ { { 52, 0, 1 }, { 42, 63, 0 } } },
			
 
				-	{ { { 52, 0, 2 }, { 48, 57, 0 } } },
			
 
				-	{ { { 53, 0, 1 }, { 43, 63, 0 } } },
			
 
				-	{ { { 53, 0, 0 }, { 48, 58, 0 } } },
			
 
				-	{ { { 53, 0, 1 }, { 44, 63, 0 } } },
			
 
				-	{ { { 53, 0, 2 }, { 48, 59, 0 } } },
			
 
				-	{ { { 54, 0, 1 }, { 45, 63, 0 } } },
			
 
				-	{ { { 54, 0, 0 }, { 48, 60, 0 } } },
			
 
				-	{ { { 54, 0, 1 }, { 46, 63, 0 } } },
			
 
				-	{ { { 54, 0, 2 }, { 48, 61, 0 } } },
			
 
				-	{ { { 55, 0, 1 }, { 47, 63, 0 } } },
			
 
				-	{ { { 55, 0, 0 }, { 48, 62, 0 } } },
			
 
				-	{ { { 55, 0, 1 }, { 48, 63, 1 } } },
			
 
				-	{ { { 55, 0, 2 }, { 48, 63, 0 } } },
			
 
				-	{ { { 56, 0, 1 }, { 49, 63, 1 } } },
			
 
				-	{ { { 56, 0, 0 }, { 49, 63, 0 } } },
			
 
				-	{ { { 56, 0, 1 }, { 50, 63, 1 } } },
			
 
				-	{ { { 56, 0, 2 }, { 50, 63, 0 } } },
			
 
				-	{ { { 57, 0, 1 }, { 51, 63, 1 } } },
			
 
				-	{ { { 57, 0, 0 }, { 51, 63, 0 } } },
			
 
				-	{ { { 57, 0, 1 }, { 52, 63, 1 } } },
			
 
				-	{ { { 57, 0, 2 }, { 52, 63, 0 } } },
			
 
				-	{ { { 58, 0, 1 }, { 53, 63, 1 } } },
			
 
				-	{ { { 58, 0, 0 }, { 53, 63, 0 } } },
			
 
				-	{ { { 58, 0, 1 }, { 54, 63, 1 } } },
			
 
				-	{ { { 58, 0, 2 }, { 54, 63, 0 } } },
			
 
				-	{ { { 59, 0, 1 }, { 55, 63, 1 } } },
			
 
				-	{ { { 59, 0, 0 }, { 55, 63, 0 } } },
			
 
				-	{ { { 59, 0, 1 }, { 56, 63, 1 } } },
			
 
				-	{ { { 59, 0, 2 }, { 56, 63, 0 } } },
			
 
				-	{ { { 60, 0, 1 }, { 57, 63, 1 } } },
			
 
				-	{ { { 60, 0, 0 }, { 57, 63, 0 } } },
			
 
				-	{ { { 60, 0, 1 }, { 58, 63, 1 } } },
			
 
				-	{ { { 60, 0, 2 }, { 58, 63, 0 } } },
			
 
				-	{ { { 61, 0, 1 }, { 59, 63, 1 } } },
			
 
				-	{ { { 61, 0, 0 }, { 59, 63, 0 } } },
			
 
				-	{ { { 61, 0, 1 }, { 60, 63, 1 } } },
			
 
				-	{ { { 61, 0, 2 }, { 60, 63, 0 } } },
			
 
				-	{ { { 62, 0, 1 }, { 61, 63, 1 } } },
			
 
				-	{ { { 62, 0, 0 }, { 61, 63, 0 } } },
			
 
				-	{ { { 62, 0, 1 }, { 62, 63, 1 } } },
			
 
				-	{ { { 62, 0, 2 }, { 62, 63, 0 } } },
			
 
				-	{ { { 63, 0, 1 }, { 63, 63, 1 } } },
			
 
				-	{ { { 63, 0, 0 }, { 63, 63, 0 } } }
			
 
				-};
			
 
				-
			
 
				-static SingleColourLookup const lookup_5_4[] = 
			
 
				-{
			
 
				-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
			
 
				-	{ { { 0, 0, 1 }, { 0, 1, 1 } } },
			
 
				-	{ { { 0, 0, 2 }, { 0, 1, 0 } } },
			
 
				-	{ { { 0, 0, 3 }, { 0, 1, 1 } } },
			
 
				-	{ { { 0, 0, 4 }, { 0, 2, 1 } } },
			
 
				-	{ { { 1, 0, 3 }, { 0, 2, 0 } } },
			
 
				-	{ { { 1, 0, 2 }, { 0, 2, 1 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
			
 
				-	{ { { 1, 0, 0 }, { 0, 3, 0 } } },
			
 
				-	{ { { 1, 0, 1 }, { 1, 2, 1 } } },
			
 
				-	{ { { 1, 0, 2 }, { 1, 2, 0 } } },
			
 
				-	{ { { 1, 0, 3 }, { 0, 4, 0 } } },
			
 
				-	{ { { 1, 0, 4 }, { 0, 5, 1 } } },
			
 
				-	{ { { 2, 0, 3 }, { 0, 5, 0 } } },
			
 
				-	{ { { 2, 0, 2 }, { 0, 5, 1 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 6, 1 } } },
			
 
				-	{ { { 2, 0, 0 }, { 0, 6, 0 } } },
			
 
				-	{ { { 2, 0, 1 }, { 2, 3, 1 } } },
			
 
				-	{ { { 2, 0, 2 }, { 2, 3, 0 } } },
			
 
				-	{ { { 2, 0, 3 }, { 0, 7, 0 } } },
			
 
				-	{ { { 2, 0, 4 }, { 1, 6, 1 } } },
			
 
				-	{ { { 3, 0, 3 }, { 1, 6, 0 } } },
			
 
				-	{ { { 3, 0, 2 }, { 0, 8, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
			
 
				-	{ { { 3, 0, 0 }, { 0, 9, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
			
 
				-	{ { { 3, 0, 2 }, { 0, 10, 1 } } },
			
 
				-	{ { { 3, 0, 3 }, { 0, 10, 0 } } },
			
 
				-	{ { { 3, 0, 4 }, { 2, 7, 1 } } },
			
 
				-	{ { { 4, 0, 4 }, { 2, 7, 0 } } },
			
 
				-	{ { { 4, 0, 3 }, { 0, 11, 0 } } },
			
 
				-	{ { { 4, 0, 2 }, { 1, 10, 1 } } },
			
 
				-	{ { { 4, 0, 1 }, { 1, 10, 0 } } },
			
 
				-	{ { { 4, 0, 0 }, { 0, 12, 0 } } },
			
 
				-	{ { { 4, 0, 1 }, { 0, 13, 1 } } },
			
 
				-	{ { { 4, 0, 2 }, { 0, 13, 0 } } },
			
 
				-	{ { { 4, 0, 3 }, { 0, 13, 1 } } },
			
 
				-	{ { { 4, 0, 4 }, { 0, 14, 1 } } },
			
 
				-	{ { { 5, 0, 3 }, { 0, 14, 0 } } },
			
 
				-	{ { { 5, 0, 2 }, { 2, 11, 1 } } },
			
 
				-	{ { { 5, 0, 1 }, { 2, 11, 0 } } },
			
 
				-	{ { { 5, 0, 0 }, { 0, 15, 0 } } },
			
 
				-	{ { { 5, 0, 1 }, { 1, 14, 1 } } },
			
 
				-	{ { { 5, 0, 2 }, { 1, 14, 0 } } },
			
 
				-	{ { { 5, 0, 3 }, { 0, 16, 0 } } },
			
 
				-	{ { { 5, 0, 4 }, { 0, 17, 1 } } },
			
 
				-	{ { { 6, 0, 3 }, { 0, 17, 0 } } },
			
 
				-	{ { { 6, 0, 2 }, { 0, 17, 1 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 18, 1 } } },
			
 
				-	{ { { 6, 0, 0 }, { 0, 18, 0 } } },
			
 
				-	{ { { 6, 0, 1 }, { 2, 15, 1 } } },
			
 
				-	{ { { 6, 0, 2 }, { 2, 15, 0 } } },
			
 
				-	{ { { 6, 0, 3 }, { 0, 19, 0 } } },
			
 
				-	{ { { 6, 0, 4 }, { 1, 18, 1 } } },
			
 
				-	{ { { 7, 0, 3 }, { 1, 18, 0 } } },
			
 
				-	{ { { 7, 0, 2 }, { 0, 20, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 21, 1 } } },
			
 
				-	{ { { 7, 0, 0 }, { 0, 21, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 21, 1 } } },
			
 
				-	{ { { 7, 0, 2 }, { 0, 22, 1 } } },
			
 
				-	{ { { 7, 0, 3 }, { 0, 22, 0 } } },
			
 
				-	{ { { 7, 0, 4 }, { 2, 19, 1 } } },
			
 
				-	{ { { 8, 0, 4 }, { 2, 19, 0 } } },
			
 
				-	{ { { 8, 0, 3 }, { 0, 23, 0 } } },
			
 
				-	{ { { 8, 0, 2 }, { 1, 22, 1 } } },
			
 
				-	{ { { 8, 0, 1 }, { 1, 22, 0 } } },
			
 
				-	{ { { 8, 0, 0 }, { 0, 24, 0 } } },
			
 
				-	{ { { 8, 0, 1 }, { 0, 25, 1 } } },
			
 
				-	{ { { 8, 0, 2 }, { 0, 25, 0 } } },
			
 
				-	{ { { 8, 0, 3 }, { 0, 25, 1 } } },
			
 
				-	{ { { 8, 0, 4 }, { 0, 26, 1 } } },
			
 
				-	{ { { 9, 0, 3 }, { 0, 26, 0 } } },
			
 
				-	{ { { 9, 0, 2 }, { 2, 23, 1 } } },
			
 
				-	{ { { 9, 0, 1 }, { 2, 23, 0 } } },
			
 
				-	{ { { 9, 0, 0 }, { 0, 27, 0 } } },
			
 
				-	{ { { 9, 0, 1 }, { 1, 26, 1 } } },
			
 
				-	{ { { 9, 0, 2 }, { 1, 26, 0 } } },
			
 
				-	{ { { 9, 0, 3 }, { 0, 28, 0 } } },
			
 
				-	{ { { 9, 0, 4 }, { 0, 29, 1 } } },
			
 
				-	{ { { 10, 0, 3 }, { 0, 29, 0 } } },
			
 
				-	{ { { 10, 0, 2 }, { 0, 29, 1 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 30, 1 } } },
			
 
				-	{ { { 10, 0, 0 }, { 0, 30, 0 } } },
			
 
				-	{ { { 10, 0, 1 }, { 2, 27, 1 } } },
			
 
				-	{ { { 10, 0, 2 }, { 2, 27, 0 } } },
			
 
				-	{ { { 10, 0, 3 }, { 0, 31, 0 } } },
			
 
				-	{ { { 10, 0, 4 }, { 1, 30, 1 } } },
			
 
				-	{ { { 11, 0, 3 }, { 1, 30, 0 } } },
			
 
				-	{ { { 11, 0, 2 }, { 4, 24, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 1, 31, 1 } } },
			
 
				-	{ { { 11, 0, 0 }, { 1, 31, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 1, 31, 1 } } },
			
 
				-	{ { { 11, 0, 2 }, { 2, 30, 1 } } },
			
 
				-	{ { { 11, 0, 3 }, { 2, 30, 0 } } },
			
 
				-	{ { { 11, 0, 4 }, { 2, 31, 1 } } },
			
 
				-	{ { { 12, 0, 4 }, { 2, 31, 0 } } },
			
 
				-	{ { { 12, 0, 3 }, { 4, 27, 0 } } },
			
 
				-	{ { { 12, 0, 2 }, { 3, 30, 1 } } },
			
 
				-	{ { { 12, 0, 1 }, { 3, 30, 0 } } },
			
 
				-	{ { { 12, 0, 0 }, { 4, 28, 0 } } },
			
 
				-	{ { { 12, 0, 1 }, { 3, 31, 1 } } },
			
 
				-	{ { { 12, 0, 2 }, { 3, 31, 0 } } },
			
 
				-	{ { { 12, 0, 3 }, { 3, 31, 1 } } },
			
 
				-	{ { { 12, 0, 4 }, { 4, 30, 1 } } },
			
 
				-	{ { { 13, 0, 3 }, { 4, 30, 0 } } },
			
 
				-	{ { { 13, 0, 2 }, { 6, 27, 1 } } },
			
 
				-	{ { { 13, 0, 1 }, { 6, 27, 0 } } },
			
 
				-	{ { { 13, 0, 0 }, { 4, 31, 0 } } },
			
 
				-	{ { { 13, 0, 1 }, { 5, 30, 1 } } },
			
 
				-	{ { { 13, 0, 2 }, { 5, 30, 0 } } },
			
 
				-	{ { { 13, 0, 3 }, { 8, 24, 0 } } },
			
 
				-	{ { { 13, 0, 4 }, { 5, 31, 1 } } },
			
 
				-	{ { { 14, 0, 3 }, { 5, 31, 0 } } },
			
 
				-	{ { { 14, 0, 2 }, { 5, 31, 1 } } },
			
 
				-	{ { { 14, 0, 1 }, { 6, 30, 1 } } },
			
 
				-	{ { { 14, 0, 0 }, { 6, 30, 0 } } },
			
 
				-	{ { { 14, 0, 1 }, { 6, 31, 1 } } },
			
 
				-	{ { { 14, 0, 2 }, { 6, 31, 0 } } },
			
 
				-	{ { { 14, 0, 3 }, { 8, 27, 0 } } },
			
 
				-	{ { { 14, 0, 4 }, { 7, 30, 1 } } },
			
 
				-	{ { { 15, 0, 3 }, { 7, 30, 0 } } },
			
 
				-	{ { { 15, 0, 2 }, { 8, 28, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 7, 31, 1 } } },
			
 
				-	{ { { 15, 0, 0 }, { 7, 31, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 7, 31, 1 } } },
			
 
				-	{ { { 15, 0, 2 }, { 8, 30, 1 } } },
			
 
				-	{ { { 15, 0, 3 }, { 8, 30, 0 } } },
			
 
				-	{ { { 15, 0, 4 }, { 10, 27, 1 } } },
			
 
				-	{ { { 16, 0, 4 }, { 10, 27, 0 } } },
			
 
				-	{ { { 16, 0, 3 }, { 8, 31, 0 } } },
			
 
				-	{ { { 16, 0, 2 }, { 9, 30, 1 } } },
			
 
				-	{ { { 16, 0, 1 }, { 9, 30, 0 } } },
			
 
				-	{ { { 16, 0, 0 }, { 12, 24, 0 } } },
			
 
				-	{ { { 16, 0, 1 }, { 9, 31, 1 } } },
			
 
				-	{ { { 16, 0, 2 }, { 9, 31, 0 } } },
			
 
				-	{ { { 16, 0, 3 }, { 9, 31, 1 } } },
			
 
				-	{ { { 16, 0, 4 }, { 10, 30, 1 } } },
			
 
				-	{ { { 17, 0, 3 }, { 10, 30, 0 } } },
			
 
				-	{ { { 17, 0, 2 }, { 10, 31, 1 } } },
			
 
				-	{ { { 17, 0, 1 }, { 10, 31, 0 } } },
			
 
				-	{ { { 17, 0, 0 }, { 12, 27, 0 } } },
			
 
				-	{ { { 17, 0, 1 }, { 11, 30, 1 } } },
			
 
				-	{ { { 17, 0, 2 }, { 11, 30, 0 } } },
			
 
				-	{ { { 17, 0, 3 }, { 12, 28, 0 } } },
			
 
				-	{ { { 17, 0, 4 }, { 11, 31, 1 } } },
			
 
				-	{ { { 18, 0, 3 }, { 11, 31, 0 } } },
			
 
				-	{ { { 18, 0, 2 }, { 11, 31, 1 } } },
			
 
				-	{ { { 18, 0, 1 }, { 12, 30, 1 } } },
			
 
				-	{ { { 18, 0, 0 }, { 12, 30, 0 } } },
			
 
				-	{ { { 18, 0, 1 }, { 14, 27, 1 } } },
			
 
				-	{ { { 18, 0, 2 }, { 14, 27, 0 } } },
			
 
				-	{ { { 18, 0, 3 }, { 12, 31, 0 } } },
			
 
				-	{ { { 18, 0, 4 }, { 13, 30, 1 } } },
			
 
				-	{ { { 19, 0, 3 }, { 13, 30, 0 } } },
			
 
				-	{ { { 19, 0, 2 }, { 16, 24, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 13, 31, 1 } } },
			
 
				-	{ { { 19, 0, 0 }, { 13, 31, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 13, 31, 1 } } },
			
 
				-	{ { { 19, 0, 2 }, { 14, 30, 1 } } },
			
 
				-	{ { { 19, 0, 3 }, { 14, 30, 0 } } },
			
 
				-	{ { { 19, 0, 4 }, { 14, 31, 1 } } },
			
 
				-	{ { { 20, 0, 4 }, { 14, 31, 0 } } },
			
 
				-	{ { { 20, 0, 3 }, { 16, 27, 0 } } },
			
 
				-	{ { { 20, 0, 2 }, { 15, 30, 1 } } },
			
 
				-	{ { { 20, 0, 1 }, { 15, 30, 0 } } },
			
 
				-	{ { { 20, 0, 0 }, { 16, 28, 0 } } },
			
 
				-	{ { { 20, 0, 1 }, { 15, 31, 1 } } },
			
 
				-	{ { { 20, 0, 2 }, { 15, 31, 0 } } },
			
 
				-	{ { { 20, 0, 3 }, { 15, 31, 1 } } },
			
 
				-	{ { { 20, 0, 4 }, { 16, 30, 1 } } },
			
 
				-	{ { { 21, 0, 3 }, { 16, 30, 0 } } },
			
 
				-	{ { { 21, 0, 2 }, { 18, 27, 1 } } },
			
 
				-	{ { { 21, 0, 1 }, { 18, 27, 0 } } },
			
 
				-	{ { { 21, 0, 0 }, { 16, 31, 0 } } },
			
 
				-	{ { { 21, 0, 1 }, { 17, 30, 1 } } },
			
 
				-	{ { { 21, 0, 2 }, { 17, 30, 0 } } },
			
 
				-	{ { { 21, 0, 3 }, { 20, 24, 0 } } },
			
 
				-	{ { { 21, 0, 4 }, { 17, 31, 1 } } },
			
 
				-	{ { { 22, 0, 3 }, { 17, 31, 0 } } },
			
 
				-	{ { { 22, 0, 2 }, { 17, 31, 1 } } },
			
 
				-	{ { { 22, 0, 1 }, { 18, 30, 1 } } },
			
 
				-	{ { { 22, 0, 0 }, { 18, 30, 0 } } },
			
 
				-	{ { { 22, 0, 1 }, { 18, 31, 1 } } },
			
 
				-	{ { { 22, 0, 2 }, { 18, 31, 0 } } },
			
 
				-	{ { { 22, 0, 3 }, { 20, 27, 0 } } },
			
 
				-	{ { { 22, 0, 4 }, { 19, 30, 1 } } },
			
 
				-	{ { { 23, 0, 3 }, { 19, 30, 0 } } },
			
 
				-	{ { { 23, 0, 2 }, { 20, 28, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 19, 31, 1 } } },
			
 
				-	{ { { 23, 0, 0 }, { 19, 31, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 19, 31, 1 } } },
			
 
				-	{ { { 23, 0, 2 }, { 20, 30, 1 } } },
			
 
				-	{ { { 23, 0, 3 }, { 20, 30, 0 } } },
			
 
				-	{ { { 23, 0, 4 }, { 22, 27, 1 } } },
			
 
				-	{ { { 24, 0, 4 }, { 22, 27, 0 } } },
			
 
				-	{ { { 24, 0, 3 }, { 20, 31, 0 } } },
			
 
				-	{ { { 24, 0, 2 }, { 21, 30, 1 } } },
			
 
				-	{ { { 24, 0, 1 }, { 21, 30, 0 } } },
			
 
				-	{ { { 24, 0, 0 }, { 24, 24, 0 } } },
			
 
				-	{ { { 24, 0, 1 }, { 21, 31, 1 } } },
			
 
				-	{ { { 24, 0, 2 }, { 21, 31, 0 } } },
			
 
				-	{ { { 24, 0, 3 }, { 21, 31, 1 } } },
			
 
				-	{ { { 24, 0, 4 }, { 22, 30, 1 } } },
			
 
				-	{ { { 25, 0, 3 }, { 22, 30, 0 } } },
			
 
				-	{ { { 25, 0, 2 }, { 22, 31, 1 } } },
			
 
				-	{ { { 25, 0, 1 }, { 22, 31, 0 } } },
			
 
				-	{ { { 25, 0, 0 }, { 24, 27, 0 } } },
			
 
				-	{ { { 25, 0, 1 }, { 23, 30, 1 } } },
			
 
				-	{ { { 25, 0, 2 }, { 23, 30, 0 } } },
			
 
				-	{ { { 25, 0, 3 }, { 24, 28, 0 } } },
			
 
				-	{ { { 25, 0, 4 }, { 23, 31, 1 } } },
			
 
				-	{ { { 26, 0, 3 }, { 23, 31, 0 } } },
			
 
				-	{ { { 26, 0, 2 }, { 23, 31, 1 } } },
			
 
				-	{ { { 26, 0, 1 }, { 24, 30, 1 } } },
			
 
				-	{ { { 26, 0, 0 }, { 24, 30, 0 } } },
			
 
				-	{ { { 26, 0, 1 }, { 26, 27, 1 } } },
			
 
				-	{ { { 26, 0, 2 }, { 26, 27, 0 } } },
			
 
				-	{ { { 26, 0, 3 }, { 24, 31, 0 } } },
			
 
				-	{ { { 26, 0, 4 }, { 25, 30, 1 } } },
			
 
				-	{ { { 27, 0, 3 }, { 25, 30, 0 } } },
			
 
				-	{ { { 27, 0, 2 }, { 28, 24, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 25, 31, 1 } } },
			
 
				-	{ { { 27, 0, 0 }, { 25, 31, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 25, 31, 1 } } },
			
 
				-	{ { { 27, 0, 2 }, { 26, 30, 1 } } },
			
 
				-	{ { { 27, 0, 3 }, { 26, 30, 0 } } },
			
 
				-	{ { { 27, 0, 4 }, { 26, 31, 1 } } },
			
 
				-	{ { { 28, 0, 4 }, { 26, 31, 0 } } },
			
 
				-	{ { { 28, 0, 3 }, { 28, 27, 0 } } },
			
 
				-	{ { { 28, 0, 2 }, { 27, 30, 1 } } },
			
 
				-	{ { { 28, 0, 1 }, { 27, 30, 0 } } },
			
 
				-	{ { { 28, 0, 0 }, { 28, 28, 0 } } },
			
 
				-	{ { { 28, 0, 1 }, { 27, 31, 1 } } },
			
 
				-	{ { { 28, 0, 2 }, { 27, 31, 0 } } },
			
 
				-	{ { { 28, 0, 3 }, { 27, 31, 1 } } },
			
 
				-	{ { { 28, 0, 4 }, { 28, 30, 1 } } },
			
 
				-	{ { { 29, 0, 3 }, { 28, 30, 0 } } },
			
 
				-	{ { { 29, 0, 2 }, { 30, 27, 1 } } },
			
 
				-	{ { { 29, 0, 1 }, { 30, 27, 0 } } },
			
 
				-	{ { { 29, 0, 0 }, { 28, 31, 0 } } },
			
 
				-	{ { { 29, 0, 1 }, { 29, 30, 1 } } },
			
 
				-	{ { { 29, 0, 2 }, { 29, 30, 0 } } },
			
 
				-	{ { { 29, 0, 3 }, { 29, 30, 1 } } },
			
 
				-	{ { { 29, 0, 4 }, { 29, 31, 1 } } },
			
 
				-	{ { { 30, 0, 3 }, { 29, 31, 0 } } },
			
 
				-	{ { { 30, 0, 2 }, { 29, 31, 1 } } },
			
 
				-	{ { { 30, 0, 1 }, { 30, 30, 1 } } },
			
 
				-	{ { { 30, 0, 0 }, { 30, 30, 0 } } },
			
 
				-	{ { { 30, 0, 1 }, { 30, 31, 1 } } },
			
 
				-	{ { { 30, 0, 2 }, { 30, 31, 0 } } },
			
 
				-	{ { { 30, 0, 3 }, { 30, 31, 1 } } },
			
 
				-	{ { { 30, 0, 4 }, { 31, 30, 1 } } },
			
 
				-	{ { { 31, 0, 3 }, { 31, 30, 0 } } },
			
 
				-	{ { { 31, 0, 2 }, { 31, 30, 1 } } },
			
 
				-	{ { { 31, 0, 1 }, { 31, 31, 1 } } },
			
 
				-	{ { { 31, 0, 0 }, { 31, 31, 0 } } }
			
 
				-};
			
 
				-
			
 
				-static SingleColourLookup const lookup_6_4[] = 
			
 
				-{
			
 
				-	{ { { 0, 0, 0 }, { 0, 0, 0 } } },
			
 
				-	{ { { 0, 0, 1 }, { 0, 1, 0 } } },
			
 
				-	{ { { 0, 0, 2 }, { 0, 2, 0 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 3, 1 } } },
			
 
				-	{ { { 1, 0, 0 }, { 0, 3, 0 } } },
			
 
				-	{ { { 1, 0, 1 }, { 0, 4, 0 } } },
			
 
				-	{ { { 1, 0, 2 }, { 0, 5, 0 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 6, 1 } } },
			
 
				-	{ { { 2, 0, 0 }, { 0, 6, 0 } } },
			
 
				-	{ { { 2, 0, 1 }, { 0, 7, 0 } } },
			
 
				-	{ { { 2, 0, 2 }, { 0, 8, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 9, 1 } } },
			
 
				-	{ { { 3, 0, 0 }, { 0, 9, 0 } } },
			
 
				-	{ { { 3, 0, 1 }, { 0, 10, 0 } } },
			
 
				-	{ { { 3, 0, 2 }, { 0, 11, 0 } } },
			
 
				-	{ { { 4, 0, 1 }, { 0, 12, 1 } } },
			
 
				-	{ { { 4, 0, 0 }, { 0, 12, 0 } } },
			
 
				-	{ { { 4, 0, 1 }, { 0, 13, 0 } } },
			
 
				-	{ { { 4, 0, 2 }, { 0, 14, 0 } } },
			
 
				-	{ { { 5, 0, 1 }, { 0, 15, 1 } } },
			
 
				-	{ { { 5, 0, 0 }, { 0, 15, 0 } } },
			
 
				-	{ { { 5, 0, 1 }, { 0, 16, 0 } } },
			
 
				-	{ { { 5, 0, 2 }, { 1, 15, 0 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 17, 0 } } },
			
 
				-	{ { { 6, 0, 0 }, { 0, 18, 0 } } },
			
 
				-	{ { { 6, 0, 1 }, { 0, 19, 0 } } },
			
 
				-	{ { { 6, 0, 2 }, { 3, 14, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 20, 0 } } },
			
 
				-	{ { { 7, 0, 0 }, { 0, 21, 0 } } },
			
 
				-	{ { { 7, 0, 1 }, { 0, 22, 0 } } },
			
 
				-	{ { { 7, 0, 2 }, { 4, 15, 0 } } },
			
 
				-	{ { { 8, 0, 1 }, { 0, 23, 0 } } },
			
 
				-	{ { { 8, 0, 0 }, { 0, 24, 0 } } },
			
 
				-	{ { { 8, 0, 1 }, { 0, 25, 0 } } },
			
 
				-	{ { { 8, 0, 2 }, { 6, 14, 0 } } },
			
 
				-	{ { { 9, 0, 1 }, { 0, 26, 0 } } },
			
 
				-	{ { { 9, 0, 0 }, { 0, 27, 0 } } },
			
 
				-	{ { { 9, 0, 1 }, { 0, 28, 0 } } },
			
 
				-	{ { { 9, 0, 2 }, { 7, 15, 0 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 29, 0 } } },
			
 
				-	{ { { 10, 0, 0 }, { 0, 30, 0 } } },
			
 
				-	{ { { 10, 0, 1 }, { 0, 31, 0 } } },
			
 
				-	{ { { 10, 0, 2 }, { 9, 14, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 0, 32, 0 } } },
			
 
				-	{ { { 11, 0, 0 }, { 0, 33, 0 } } },
			
 
				-	{ { { 11, 0, 1 }, { 2, 30, 0 } } },
			
 
				-	{ { { 11, 0, 2 }, { 0, 34, 0 } } },
			
 
				-	{ { { 12, 0, 1 }, { 0, 35, 0 } } },
			
 
				-	{ { { 12, 0, 0 }, { 0, 36, 0 } } },
			
 
				-	{ { { 12, 0, 1 }, { 3, 31, 0 } } },
			
 
				-	{ { { 12, 0, 2 }, { 0, 37, 0 } } },
			
 
				-	{ { { 13, 0, 1 }, { 0, 38, 0 } } },
			
 
				-	{ { { 13, 0, 0 }, { 0, 39, 0 } } },
			
 
				-	{ { { 13, 0, 1 }, { 5, 30, 0 } } },
			
 
				-	{ { { 13, 0, 2 }, { 0, 40, 0 } } },
			
 
				-	{ { { 14, 0, 1 }, { 0, 41, 0 } } },
			
 
				-	{ { { 14, 0, 0 }, { 0, 42, 0 } } },
			
 
				-	{ { { 14, 0, 1 }, { 6, 31, 0 } } },
			
 
				-	{ { { 14, 0, 2 }, { 0, 43, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 0, 44, 0 } } },
			
 
				-	{ { { 15, 0, 0 }, { 0, 45, 0 } } },
			
 
				-	{ { { 15, 0, 1 }, { 8, 30, 0 } } },
			
 
				-	{ { { 15, 0, 2 }, { 0, 46, 0 } } },
			
 
				-	{ { { 16, 0, 2 }, { 0, 47, 0 } } },
			
 
				-	{ { { 16, 0, 1 }, { 1, 46, 0 } } },
			
 
				-	{ { { 16, 0, 0 }, { 0, 48, 0 } } },
			
 
				-	{ { { 16, 0, 1 }, { 0, 49, 0 } } },
			
 
				-	{ { { 16, 0, 2 }, { 0, 50, 0 } } },
			
 
				-	{ { { 17, 0, 1 }, { 2, 47, 0 } } },
			
 
				-	{ { { 17, 0, 0 }, { 0, 51, 0 } } },
			
 
				-	{ { { 17, 0, 1 }, { 0, 52, 0 } } },
			
 
				-	{ { { 17, 0, 2 }, { 0, 53, 0 } } },
			
 
				-	{ { { 18, 0, 1 }, { 4, 46, 0 } } },
			
 
				-	{ { { 18, 0, 0 }, { 0, 54, 0 } } },
			
 
				-	{ { { 18, 0, 1 }, { 0, 55, 0 } } },
			
 
				-	{ { { 18, 0, 2 }, { 0, 56, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 5, 47, 0 } } },
			
 
				-	{ { { 19, 0, 0 }, { 0, 57, 0 } } },
			
 
				-	{ { { 19, 0, 1 }, { 0, 58, 0 } } },
			
 
				-	{ { { 19, 0, 2 }, { 0, 59, 0 } } },
			
 
				-	{ { { 20, 0, 1 }, { 7, 46, 0 } } },
			
 
				-	{ { { 20, 0, 0 }, { 0, 60, 0 } } },
			
 
				-	{ { { 20, 0, 1 }, { 0, 61, 0 } } },
			
 
				-	{ { { 20, 0, 2 }, { 0, 62, 0 } } },
			
 
				-	{ { { 21, 0, 1 }, { 8, 47, 0 } } },
			
 
				-	{ { { 21, 0, 0 }, { 0, 63, 0 } } },
			
 
				-	{ { { 21, 0, 1 }, { 1, 62, 0 } } },
			
 
				-	{ { { 21, 0, 2 }, { 1, 63, 0 } } },
			
 
				-	{ { { 22, 0, 1 }, { 10, 46, 0 } } },
			
 
				-	{ { { 22, 0, 0 }, { 2, 62, 0 } } },
			
 
				-	{ { { 22, 0, 1 }, { 2, 63, 0 } } },
			
 
				-	{ { { 22, 0, 2 }, { 3, 62, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 11, 47, 0 } } },
			
 
				-	{ { { 23, 0, 0 }, { 3, 63, 0 } } },
			
 
				-	{ { { 23, 0, 1 }, { 4, 62, 0 } } },
			
 
				-	{ { { 23, 0, 2 }, { 4, 63, 0 } } },
			
 
				-	{ { { 24, 0, 1 }, { 13, 46, 0 } } },
			
 
				-	{ { { 24, 0, 0 }, { 5, 62, 0 } } },
			
 
				-	{ { { 24, 0, 1 }, { 5, 63, 0 } } },
			
 
				-	{ { { 24, 0, 2 }, { 6, 62, 0 } } },
			
 
				-	{ { { 25, 0, 1 }, { 14, 47, 0 } } },
			
 
				-	{ { { 25, 0, 0 }, { 6, 63, 0 } } },
			
 
				-	{ { { 25, 0, 1 }, { 7, 62, 0 } } },
			
 
				-	{ { { 25, 0, 2 }, { 7, 63, 0 } } },
			
 
				-	{ { { 26, 0, 1 }, { 16, 45, 0 } } },
			
 
				-	{ { { 26, 0, 0 }, { 8, 62, 0 } } },
			
 
				-	{ { { 26, 0, 1 }, { 8, 63, 0 } } },
			
 
				-	{ { { 26, 0, 2 }, { 9, 62, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 16, 48, 0 } } },
			
 
				-	{ { { 27, 0, 0 }, { 9, 63, 0 } } },
			
 
				-	{ { { 27, 0, 1 }, { 10, 62, 0 } } },
			
 
				-	{ { { 27, 0, 2 }, { 10, 63, 0 } } },
			
 
				-	{ { { 28, 0, 1 }, { 16, 51, 0 } } },
			
 
				-	{ { { 28, 0, 0 }, { 11, 62, 0 } } },
			
 
				-	{ { { 28, 0, 1 }, { 11, 63, 0 } } },
			
 
				-	{ { { 28, 0, 2 }, { 12, 62, 0 } } },
			
 
				-	{ { { 29, 0, 1 }, { 16, 54, 0 } } },
			
 
				-	{ { { 29, 0, 0 }, { 12, 63, 0 } } },
			
 
				-	{ { { 29, 0, 1 }, { 13, 62, 0 } } },
			
 
				-	{ { { 29, 0, 2 }, { 13, 63, 0 } } },
			
 
				-	{ { { 30, 0, 1 }, { 16, 57, 0 } } },
			
 
				-	{ { { 30, 0, 0 }, { 14, 62, 0 } } },
			
 
				-	{ { { 30, 0, 1 }, { 14, 63, 0 } } },
			
 
				-	{ { { 30, 0, 2 }, { 15, 62, 0 } } },
			
 
				-	{ { { 31, 0, 1 }, { 16, 60, 0 } } },
			
 
				-	{ { { 31, 0, 0 }, { 15, 63, 0 } } },
			
 
				-	{ { { 31, 0, 1 }, { 24, 46, 0 } } },
			
 
				-	{ { { 31, 0, 2 }, { 16, 62, 0 } } },
			
 
				-	{ { { 32, 0, 2 }, { 16, 63, 0 } } },
			
 
				-	{ { { 32, 0, 1 }, { 17, 62, 0 } } },
			
 
				-	{ { { 32, 0, 0 }, { 25, 47, 0 } } },
			
 
				-	{ { { 32, 0, 1 }, { 17, 63, 0 } } },
			
 
				-	{ { { 32, 0, 2 }, { 18, 62, 0 } } },
			
 
				-	{ { { 33, 0, 1 }, { 18, 63, 0 } } },
			
 
				-	{ { { 33, 0, 0 }, { 27, 46, 0 } } },
			
 
				-	{ { { 33, 0, 1 }, { 19, 62, 0 } } },
			
 
				-	{ { { 33, 0, 2 }, { 19, 63, 0 } } },
			
 
				-	{ { { 34, 0, 1 }, { 20, 62, 0 } } },
			
 
				-	{ { { 34, 0, 0 }, { 28, 47, 0 } } },
			
 
				-	{ { { 34, 0, 1 }, { 20, 63, 0 } } },
			
 
				-	{ { { 34, 0, 2 }, { 21, 62, 0 } } },
			
 
				-	{ { { 35, 0, 1 }, { 21, 63, 0 } } },
			
 
				-	{ { { 35, 0, 0 }, { 30, 46, 0 } } },
			
 
				-	{ { { 35, 0, 1 }, { 22, 62, 0 } } },
			
 
				-	{ { { 35, 0, 2 }, { 22, 63, 0 } } },
			
 
				-	{ { { 36, 0, 1 }, { 23, 62, 0 } } },
			
 
				-	{ { { 36, 0, 0 }, { 31, 47, 0 } } },
			
 
				-	{ { { 36, 0, 1 }, { 23, 63, 0 } } },
			
 
				-	{ { { 36, 0, 2 }, { 24, 62, 0 } } },
			
 
				-	{ { { 37, 0, 1 }, { 24, 63, 0 } } },
			
 
				-	{ { { 37, 0, 0 }, { 32, 47, 0 } } },
			
 
				-	{ { { 37, 0, 1 }, { 25, 62, 0 } } },
			
 
				-	{ { { 37, 0, 2 }, { 25, 63, 0 } } },
			
 
				-	{ { { 38, 0, 1 }, { 26, 62, 0 } } },
			
 
				-	{ { { 38, 0, 0 }, { 32, 50, 0 } } },
			
 
				-	{ { { 38, 0, 1 }, { 26, 63, 0 } } },
			
 
				-	{ { { 38, 0, 2 }, { 27, 62, 0 } } },
			
 
				-	{ { { 39, 0, 1 }, { 27, 63, 0 } } },
			
 
				-	{ { { 39, 0, 0 }, { 32, 53, 0 } } },
			
 
				-	{ { { 39, 0, 1 }, { 28, 62, 0 } } },
			
 
				-	{ { { 39, 0, 2 }, { 28, 63, 0 } } },
			
 
				-	{ { { 40, 0, 1 }, { 29, 62, 0 } } },
			
 
				-	{ { { 40, 0, 0 }, { 32, 56, 0 } } },
			
 
				-	{ { { 40, 0, 1 }, { 29, 63, 0 } } },
			
 
				-	{ { { 40, 0, 2 }, { 30, 62, 0 } } },
			
 
				-	{ { { 41, 0, 1 }, { 30, 63, 0 } } },
			
 
				-	{ { { 41, 0, 0 }, { 32, 59, 0 } } },
			
 
				-	{ { { 41, 0, 1 }, { 31, 62, 0 } } },
			
 
				-	{ { { 41, 0, 2 }, { 31, 63, 0 } } },
			
 
				-	{ { { 42, 0, 1 }, { 32, 61, 0 } } },
			
 
				-	{ { { 42, 0, 0 }, { 32, 62, 0 } } },
			
 
				-	{ { { 42, 0, 1 }, { 32, 63, 0 } } },
			
 
				-	{ { { 42, 0, 2 }, { 41, 46, 0 } } },
			
 
				-	{ { { 43, 0, 1 }, { 33, 62, 0 } } },
			
 
				-	{ { { 43, 0, 0 }, { 33, 63, 0 } } },
			
 
				-	{ { { 43, 0, 1 }, { 34, 62, 0 } } },
			
 
				-	{ { { 43, 0, 2 }, { 42, 47, 0 } } },
			
 
				-	{ { { 44, 0, 1 }, { 34, 63, 0 } } },
			
 
				-	{ { { 44, 0, 0 }, { 35, 62, 0 } } },
			
 
				-	{ { { 44, 0, 1 }, { 35, 63, 0 } } },
			
 
				-	{ { { 44, 0, 2 }, { 44, 46, 0 } } },
			
 
				-	{ { { 45, 0, 1 }, { 36, 62, 0 } } },
			
 
				-	{ { { 45, 0, 0 }, { 36, 63, 0 } } },
			
 
				-	{ { { 45, 0, 1 }, { 37, 62, 0 } } },
			
 
				-	{ { { 45, 0, 2 }, { 45, 47, 0 } } },
			
 
				-	{ { { 46, 0, 1 }, { 37, 63, 0 } } },
			
 
				-	{ { { 46, 0, 0 }, { 38, 62, 0 } } },
			
 
				-	{ { { 46, 0, 1 }, { 38, 63, 0 } } },
			
 
				-	{ { { 46, 0, 2 }, { 47, 46, 0 } } },
			
 
				-	{ { { 47, 0, 1 }, { 39, 62, 0 } } },
			
 
				-	{ { { 47, 0, 0 }, { 39, 63, 0 } } },
			
 
				-	{ { { 47, 0, 1 }, { 40, 62, 0 } } },
			
 
				-	{ { { 47, 0, 2 }, { 48, 46, 0 } } },
			
 
				-	{ { { 48, 0, 2 }, { 40, 63, 0 } } },
			
 
				-	{ { { 48, 0, 1 }, { 41, 62, 0 } } },
			
 
				-	{ { { 48, 0, 0 }, { 41, 63, 0 } } },
			
 
				-	{ { { 48, 0, 1 }, { 48, 49, 0 } } },
			
 
				-	{ { { 48, 0, 2 }, { 42, 62, 0 } } },
			
 
				-	{ { { 49, 0, 1 }, { 42, 63, 0 } } },
			
 
				-	{ { { 49, 0, 0 }, { 43, 62, 0 } } },
			
 
				-	{ { { 49, 0, 1 }, { 48, 52, 0 } } },
			
 
				-	{ { { 49, 0, 2 }, { 43, 63, 0 } } },
			
 
				-	{ { { 50, 0, 1 }, { 44, 62, 0 } } },
			
 
				-	{ { { 50, 0, 0 }, { 44, 63, 0 } } },
			
 
				-	{ { { 50, 0, 1 }, { 48, 55, 0 } } },
			
 
				-	{ { { 50, 0, 2 }, { 45, 62, 0 } } },
			
 
				-	{ { { 51, 0, 1 }, { 45, 63, 0 } } },
			
 
				-	{ { { 51, 0, 0 }, { 46, 62, 0 } } },
			
 
				-	{ { { 51, 0, 1 }, { 48, 58, 0 } } },
			
 
				-	{ { { 51, 0, 2 }, { 46, 63, 0 } } },
			
 
				-	{ { { 52, 0, 1 }, { 47, 62, 0 } } },
			
 
				-	{ { { 52, 0, 0 }, { 47, 63, 0 } } },
			
 
				-	{ { { 52, 0, 1 }, { 48, 61, 0 } } },
			
 
				-	{ { { 52, 0, 2 }, { 48, 62, 0 } } },
			
 
				-	{ { { 53, 0, 1 }, { 56, 47, 0 } } },
			
 
				-	{ { { 53, 0, 0 }, { 48, 63, 0 } } },
			
 
				-	{ { { 53, 0, 1 }, { 49, 62, 0 } } },
			
 
				-	{ { { 53, 0, 2 }, { 49, 63, 0 } } },
			
 
				-	{ { { 54, 0, 1 }, { 58, 46, 0 } } },
			
 
				-	{ { { 54, 0, 0 }, { 50, 62, 0 } } },
			
 
				-	{ { { 54, 0, 1 }, { 50, 63, 0 } } },
			
 
				-	{ { { 54, 0, 2 }, { 51, 62, 0 } } },
			
 
				-	{ { { 55, 0, 1 }, { 59, 47, 0 } } },
			
 
				-	{ { { 55, 0, 0 }, { 51, 63, 0 } } },
			
 
				-	{ { { 55, 0, 1 }, { 52, 62, 0 } } },
			
 
				-	{ { { 55, 0, 2 }, { 52, 63, 0 } } },
			
 
				-	{ { { 56, 0, 1 }, { 61, 46, 0 } } },
			
 
				-	{ { { 56, 0, 0 }, { 53, 62, 0 } } },
			
 
				-	{ { { 56, 0, 1 }, { 53, 63, 0 } } },
			
 
				-	{ { { 56, 0, 2 }, { 54, 62, 0 } } },
			
 
				-	{ { { 57, 0, 1 }, { 62, 47, 0 } } },
			
 
				-	{ { { 57, 0, 0 }, { 54, 63, 0 } } },
			
 
				-	{ { { 57, 0, 1 }, { 55, 62, 0 } } },
			
 
				-	{ { { 57, 0, 2 }, { 55, 63, 0 } } },
			
 
				-	{ { { 58, 0, 1 }, { 56, 62, 1 } } },
			
 
				-	{ { { 58, 0, 0 }, { 56, 62, 0 } } },
			
 
				-	{ { { 58, 0, 1 }, { 56, 63, 0 } } },
			
 
				-	{ { { 58, 0, 2 }, { 57, 62, 0 } } },
			
 
				-	{ { { 59, 0, 1 }, { 57, 63, 1 } } },
			
 
				-	{ { { 59, 0, 0 }, { 57, 63, 0 } } },
			
 
				-	{ { { 59, 0, 1 }, { 58, 62, 0 } } },
			
 
				-	{ { { 59, 0, 2 }, { 58, 63, 0 } } },
			
 
				-	{ { { 60, 0, 1 }, { 59, 62, 1 } } },
			
 
				-	{ { { 60, 0, 0 }, { 59, 62, 0 } } },
			
 
				-	{ { { 60, 0, 1 }, { 59, 63, 0 } } },
			
 
				-	{ { { 60, 0, 2 }, { 60, 62, 0 } } },
			
 
				-	{ { { 61, 0, 1 }, { 60, 63, 1 } } },
			
 
				-	{ { { 61, 0, 0 }, { 60, 63, 0 } } },
			
 
				-	{ { { 61, 0, 1 }, { 61, 62, 0 } } },
			
 
				-	{ { { 61, 0, 2 }, { 61, 63, 0 } } },
			
 
				-	{ { { 62, 0, 1 }, { 62, 62, 1 } } },
			
 
				-	{ { { 62, 0, 0 }, { 62, 62, 0 } } },
			
 
				-	{ { { 62, 0, 1 }, { 62, 63, 0 } } },
			
 
				-	{ { { 62, 0, 2 }, { 63, 62, 0 } } },
			
 
				-	{ { { 63, 0, 1 }, { 63, 63, 1 } } },
			
 
				-	{ { { 63, 0, 0 }, { 63, 63, 0 } } }
			
 
				-};
			
--- a/3rdparty/libsquish/squish.cpp
+++ b/3rdparty/libsquish/squish.cpp
@@ -1,260 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#include "squish.h"
			
 
				-#include "colourset.h"
			
 
				-#include "maths.h"
			
 
				-#include "rangefit.h"
			
 
				-#include "clusterfit.h"
			
 
				-#include "colourblock.h"
			
 
				-#include "alpha.h"
			
 
				-#include "singlecolourfit.h"
			
 
				-
			
 
				-namespace squish {
			
 
				-
			
 
				-static int FixFlags( int flags )
			
 
				-{
			
 
				-	// grab the flag bits
			
 
				-	int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
			
 
				-	int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
			
 
				-	int extra = flags & kWeightColourByAlpha;
			
 
				-	
			
 
				-	// set defaults
			
 
				-	if ( method != kDxt3
			
 
				-	&&   method != kDxt5
			
 
				-	&&   method != kBc4
			
 
				-	&&   method != kBc5 )
			
 
				-	{
			
 
				-		method = kDxt1;
			
 
				-	}
			
 
				-	if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
			
 
				-		fit = kColourClusterFit;
			
 
				-		
			
 
				-	// done
			
 
				-	return method | fit | extra;
			
 
				-}
			
 
				-
			
 
				-void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
			
 
				-{
			
 
				-	// fix any bad flags
			
 
				-	flags = FixFlags( flags );
			
 
				-
			
 
				-	if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
			
 
				-	{
			
 
				-		u8 alpha[16*4];
			
 
				-		for( int i = 0; i < 16; ++i )
			
 
				-		{
			
 
				-			alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
			
 
				-		}
			
 
				-
			
 
				-		u8* rBlock = reinterpret_cast< u8* >( block );
			
 
				-		CompressAlphaDxt5( alpha, mask, rBlock );
			
 
				-
			
 
				-		if ( ( flags & ( kBc5 ) ) != 0 )
			
 
				-		{
			
 
				-			for( int i = 0; i < 16; ++i )
			
 
				-			{
			
 
				-				alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
			
 
				-			}
			
 
				-
			
 
				-			u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
			
 
				-			CompressAlphaDxt5( alpha, mask, gBlock );
			
 
				-		}
			
 
				-
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	// get the block locations
			
 
				-	void* colourBlock = block;
			
 
				-	void* alphaBlock = block;
			
 
				-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
			
 
				-		colourBlock = reinterpret_cast< u8* >( block ) + 8;
			
 
				-
			
 
				-	// create the minimal point set
			
 
				-	ColourSet colours( rgba, mask, flags );
			
 
				-	
			
 
				-	// check the compression type and compress colour
			
 
				-	if( colours.GetCount() == 1 )
			
 
				-	{
			
 
				-		// always do a single colour fit
			
 
				-		SingleColourFit fit( &colours, flags );
			
 
				-		fit.Compress( colourBlock );
			
 
				-	}
			
 
				-	else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
			
 
				-	{
			
 
				-		// do a range fit
			
 
				-		RangeFit fit( &colours, flags, metric );
			
 
				-		fit.Compress( colourBlock );
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// default to a cluster fit (could be iterative or not)
			
 
				-		ClusterFit fit( &colours, flags, metric );
			
 
				-		fit.Compress( colourBlock );
			
 
				-	}
			
 
				-	
			
 
				-	// compress alpha separately if necessary
			
 
				-	if( ( flags & kDxt3 ) != 0 )
			
 
				-		CompressAlphaDxt3( rgba, mask, alphaBlock );
			
 
				-	else if( ( flags & kDxt5 ) != 0 )
			
 
				-		CompressAlphaDxt5( rgba, mask, alphaBlock );
			
 
				-}
			
 
				-
			
 
				-void Decompress( u8* rgba, void const* block, int flags )
			
 
				-{
			
 
				-	// fix any bad flags
			
 
				-	flags = FixFlags( flags );
			
 
				-
			
 
				-	// get the block locations
			
 
				-	void const* colourBlock = block;
			
 
				-	void const* alphaBock = block;
			
 
				-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
			
 
				-		colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
			
 
				-
			
 
				-	// decompress colour
			
 
				-	DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
			
 
				-
			
 
				-	// decompress alpha separately if necessary
			
 
				-	if( ( flags & kDxt3 ) != 0 )
			
 
				-		DecompressAlphaDxt3( rgba, alphaBock );
			
 
				-	else if( ( flags & kDxt5 ) != 0 )
			
 
				-		DecompressAlphaDxt5( rgba, alphaBock );
			
 
				-}
			
 
				-
			
 
				-int GetStorageRequirements( int width, int height, int flags )
			
 
				-{
			
 
				-	// fix any bad flags
			
 
				-	flags = FixFlags( flags );
			
 
				-	
			
 
				-	// compute the storage requirements
			
 
				-	int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
			
 
				-	int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
			
 
				-	return blockcount*blocksize;
			
 
				-}
			
 
				-
			
 
				-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
			
 
				-{
			
 
				-	// fix any bad flags
			
 
				-	flags = FixFlags( flags );
			
 
				-
			
 
				-	// initialise the block output
			
 
				-	u8* targetBlock = reinterpret_cast< u8* >( blocks );
			
 
				-	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
			
 
				-
			
 
				-	// loop over blocks
			
 
				-	for( int y = 0; y < height; y += 4 )
			
 
				-	{
			
 
				-		for( int x = 0; x < width; x += 4 )
			
 
				-		{
			
 
				-			// build the 4x4 block of pixels
			
 
				-			u8 sourceRgba[16*4];
			
 
				-			u8* targetPixel = sourceRgba;
			
 
				-			int mask = 0;
			
 
				-			for( int py = 0; py < 4; ++py )
			
 
				-			{
			
 
				-				for( int px = 0; px < 4; ++px )
			
 
				-				{
			
 
				-					// get the source pixel in the image
			
 
				-					int sx = x + px;
			
 
				-					int sy = y + py;
			
 
				-					
			
 
				-					// enable if we're in the image
			
 
				-					if( sx < width && sy < height )
			
 
				-					{
			
 
				-						// copy the rgba value
			
 
				-						u8 const* sourcePixel = rgba + 4*( width*sy + sx );
			
 
				-						for( int i = 0; i < 4; ++i )
			
 
				-							*targetPixel++ = *sourcePixel++;
			
 
				-							
			
 
				-						// enable this pixel
			
 
				-						mask |= ( 1 << ( 4*py + px ) );
			
 
				-					}
			
 
				-					else
			
 
				-					{
			
 
				-						// skip this pixel as its outside the image
			
 
				-						targetPixel += 4;
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-			
			
 
				-			// compress it into the output
			
 
				-			CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
			
 
				-			
			
 
				-			// advance
			
 
				-			targetBlock += bytesPerBlock;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
			
 
				-{
			
 
				-	// fix any bad flags
			
 
				-	flags = FixFlags( flags );
			
 
				-
			
 
				-	// initialise the block input
			
 
				-	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
			
 
				-	int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
			
 
				-
			
 
				-	// loop over blocks
			
 
				-	for( int y = 0; y < height; y += 4 )
			
 
				-	{
			
 
				-		for( int x = 0; x < width; x += 4 )
			
 
				-		{
			
 
				-			// decompress the block
			
 
				-			u8 targetRgba[4*16];
			
 
				-			Decompress( targetRgba, sourceBlock, flags );
			
 
				-			
			
 
				-			// write the decompressed pixels to the correct image locations
			
 
				-			u8 const* sourcePixel = targetRgba;
			
 
				-			for( int py = 0; py < 4; ++py )
			
 
				-			{
			
 
				-				for( int px = 0; px < 4; ++px )
			
 
				-				{
			
 
				-					// get the target location
			
 
				-					int sx = x + px;
			
 
				-					int sy = y + py;
			
 
				-					if( sx < width && sy < height )
			
 
				-					{
			
 
				-						u8* targetPixel = rgba + 4*( width*sy + sx );
			
 
				-						
			
 
				-						// copy the rgba value
			
 
				-						for( int i = 0; i < 4; ++i )
			
 
				-							*targetPixel++ = *sourcePixel++;
			
 
				-					}
			
 
				-					else
			
 
				-					{
			
 
				-						// skip this pixel as its outside the image
			
 
				-						sourcePixel += 4;
			
 
				-					}
			
 
				-				}
			
 
				-			}
			
 
				-			
			
 
				-			// advance
			
 
				-			sourceBlock += bytesPerBlock;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-} // namespace squish
			
--- a/3rdparty/libsquish/squish.h
+++ b/3rdparty/libsquish/squish.h
@@ -1,269 +0,0 @@
 
				-/* -----------------------------------------------------------------------------
			
 
				-
			
 
				-	Copyright (c) 2006 Simon Brown                          [email protected]
			
 
				-
			
 
				-	Permission is hereby granted, free of charge, to any person obtaining
			
 
				-	a copy of this software and associated documentation files (the 
			
 
				-	"Software"), to	deal in the Software without restriction, including
			
 
				-	without limitation the rights to use, copy, modify, merge, publish,
			
 
				-	distribute, sublicense, and/or sell copies of the Software, and to 
			
 
				-	permit persons to whom the Software is furnished to do so, subject to 
			
 
				-	the following conditions:
			
 
				-
			
 
				-	The above copyright notice and this permission notice shall be included
			
 
				-	in all copies or substantial portions of the Software.
			
 
				-
			
 
				-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
			
 
				-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
			
 
				-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
			
 
				-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
			
 
				-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
			
 
				-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
			
 
				-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
			
 
				-	
			
 
				-   -------------------------------------------------------------------------- */
			
 
				-   
			
 
				-#ifndef SQUISH_H
			
 
				-#define SQUISH_H
			
 
				-
			
 
				-//! All squish API functions live in this namespace.
			
 
				-namespace squish {
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-//! Typedef a quantity that is a single unsigned byte.
			
 
				-typedef unsigned char u8;
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-enum
			
 
				-{
			
 
				-	//! Use DXT1 compression.
			
 
				-	kDxt1 = ( 1 << 0 ),
			
 
				-
			
 
				-	//! Use DXT3 compression.
			
 
				-	kDxt3 = ( 1 << 1 ),
			
 
				-
			
 
				-	//! Use DXT5 compression.
			
 
				-	kDxt5 = ( 1 << 2 ),
			
 
				-
			
 
				-	//! Use BC4 compression.
			
 
				-	kBc4 = ( 1 << 3 ),
			
 
				-
			
 
				-	//! Use BC5 compression.
			
 
				-	kBc5 = ( 1 << 4 ),
			
 
				-
			
 
				-	//! Use a slow but high quality colour compressor (the default).
			
 
				-	kColourClusterFit = ( 1 << 5 ),
			
 
				-
			
 
				-	//! Use a fast but low quality colour compressor.
			
 
				-	kColourRangeFit	= ( 1 << 6 ),
			
 
				-
			
 
				-	//! Weight the colour by alpha during cluster fit (disabled by default).
			
 
				-	kWeightColourByAlpha = ( 1 << 7 ),
			
 
				-
			
 
				-	//! Use a very slow but very high quality colour compressor.
			
 
				-	kColourIterativeClusterFit = ( 1 << 8 ),
			
 
				-};
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-/*! @brief Compresses a 4x4 block of pixels.
			
 
				-
			
 
				-	@param rgba		The rgba values of the 16 source pixels.
			
 
				-	@param mask		The valid pixel mask.
			
 
				-	@param block	Storage for the compressed DXT block.
			
 
				-	@param flags	Compression flags.
			
 
				-	@param metric	An optional perceptual metric.
			
 
				-	
			
 
				-	The source pixels should be presented as a contiguous array of 16 rgba
			
 
				-	values, with each component as 1 byte each. In memory this should be:
			
 
				-	
			
 
				-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
			
 
				-		
			
 
				-	The mask parameter enables only certain pixels within the block. The lowest
			
 
				-	bit enables the first pixel and so on up to the 16th bit. Bits beyond the
			
 
				-	16th bit are ignored. Pixels that are not enabled are allowed to take
			
 
				-	arbitrary colours in the output block. An example of how this can be used
			
 
				-	is in the CompressImage function to disable pixels outside the bounds of
			
 
				-	the image when the width or height is not divisible by 4.
			
 
				-	
			
 
				-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
			
 
				-	however, DXT1 will be used by default if none is specified. When using DXT1 
			
 
				-	compression, 8 bytes of storage are required for the compressed DXT block. 
			
 
				-	DXT3 and DXT5 compression require 16 bytes of storage per block.
			
 
				-	
			
 
				-	The flags parameter can also specify a preferred colour compressor to use 
			
 
				-	when fitting the RGB components of the data. Possible colour compressors 
			
 
				-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
			
 
				-	quality) or kColourIterativeClusterFit (slowest, best quality).
			
 
				-		
			
 
				-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
			
 
				-	flag can be specified to weight the importance of each pixel by its alpha 
			
 
				-	value. For images that are rendered using alpha blending, this can 
			
 
				-	significantly increase the perceived quality.
			
 
				-	
			
 
				-	The metric parameter can be used to weight the relative importance of each
			
 
				-	colour channel, or pass NULL to use the default uniform weight of 
			
 
				-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
			
 
				-	allowed either uniform or "perceptual" weights with the fixed values
			
 
				-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
			
 
				-	contiguous array of 3 floats.
			
 
				-*/
			
 
				-void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-/*! @brief Compresses a 4x4 block of pixels.
			
 
				-
			
 
				-	@param rgba		The rgba values of the 16 source pixels.
			
 
				-	@param block	Storage for the compressed DXT block.
			
 
				-	@param flags	Compression flags.
			
 
				-	@param metric	An optional perceptual metric.
			
 
				-	
			
 
				-	The source pixels should be presented as a contiguous array of 16 rgba
			
 
				-	values, with each component as 1 byte each. In memory this should be:
			
 
				-	
			
 
				-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
			
 
				-	
			
 
				-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
			
 
				-	however, DXT1 will be used by default if none is specified. When using DXT1 
			
 
				-	compression, 8 bytes of storage are required for the compressed DXT block. 
			
 
				-	DXT3 and DXT5 compression require 16 bytes of storage per block.
			
 
				-	
			
 
				-	The flags parameter can also specify a preferred colour compressor to use 
			
 
				-	when fitting the RGB components of the data. Possible colour compressors 
			
 
				-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
			
 
				-	quality) or kColourIterativeClusterFit (slowest, best quality).
			
 
				-		
			
 
				-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
			
 
				-	flag can be specified to weight the importance of each pixel by its alpha 
			
 
				-	value. For images that are rendered using alpha blending, this can 
			
 
				-	significantly increase the perceived quality.
			
 
				-	
			
 
				-	The metric parameter can be used to weight the relative importance of each
			
 
				-	colour channel, or pass NULL to use the default uniform weight of 
			
 
				-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
			
 
				-	allowed either uniform or "perceptual" weights with the fixed values
			
 
				-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
			
 
				-	contiguous array of 3 floats.
			
 
				-	
			
 
				-	This method is an inline that calls CompressMasked with a mask of 0xffff, 
			
 
				-	provided for compatibility with older versions of squish.
			
 
				-*/
			
 
				-inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
			
 
				-{
			
 
				-	CompressMasked( rgba, 0xffff, block, flags, metric );
			
 
				-}
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-/*! @brief Decompresses a 4x4 block of pixels.
			
 
				-
			
 
				-	@param rgba		Storage for the 16 decompressed pixels.
			
 
				-	@param block	The compressed DXT block.
			
 
				-	@param flags	Compression flags.
			
 
				-
			
 
				-	The decompressed pixels will be written as a contiguous array of 16 rgba
			
 
				-	values, with each component as 1 byte each. In memory this is:
			
 
				-	
			
 
				-		{ r1, g1, b1, a1, .... , r16, g16, b16, a16 }
			
 
				-	
			
 
				-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
			
 
				-	however, DXT1 will be used by default if none is specified. All other flags 
			
 
				-	are ignored.
			
 
				-*/
			
 
				-void Decompress( u8* rgba, void const* block, int flags );
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-/*! @brief Computes the amount of compressed storage required.
			
 
				-
			
 
				-	@param width	The width of the image.
			
 
				-	@param height	The height of the image.
			
 
				-	@param flags	Compression flags.
			
 
				-	
			
 
				-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
			
 
				-	however, DXT1 will be used by default if none is specified. All other flags 
			
 
				-	are ignored.
			
 
				-	
			
 
				-	Most DXT images will be a multiple of 4 in each dimension, but this 
			
 
				-	function supports arbitrary size images by allowing the outer blocks to
			
 
				-	be only partially used.
			
 
				-*/
			
 
				-int GetStorageRequirements( int width, int height, int flags );
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-/*! @brief Compresses an image in memory.
			
 
				-
			
 
				-	@param rgba		The pixels of the source.
			
 
				-	@param width	The width of the source image.
			
 
				-	@param height	The height of the source image.
			
 
				-	@param blocks	Storage for the compressed output.
			
 
				-	@param flags	Compression flags.
			
 
				-	@param metric	An optional perceptual metric.
			
 
				-	
			
 
				-	The source pixels should be presented as a contiguous array of width*height
			
 
				-	rgba values, with each component as 1 byte each. In memory this should be:
			
 
				-	
			
 
				-		{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
			
 
				-		
			
 
				-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
			
 
				-	however, DXT1 will be used by default if none is specified. When using DXT1 
			
 
				-	compression, 8 bytes of storage are required for each compressed DXT block. 
			
 
				-	DXT3 and DXT5 compression require 16 bytes of storage per block.
			
 
				-	
			
 
				-	The flags parameter can also specify a preferred colour compressor to use 
			
 
				-	when fitting the RGB components of the data. Possible colour compressors 
			
 
				-	are: kColourClusterFit (the default), kColourRangeFit (very fast, low 
			
 
				-	quality) or kColourIterativeClusterFit (slowest, best quality).
			
 
				-		
			
 
				-	When using kColourClusterFit or kColourIterativeClusterFit, an additional 
			
 
				-	flag can be specified to weight the importance of each pixel by its alpha 
			
 
				-	value. For images that are rendered using alpha blending, this can 
			
 
				-	significantly increase the perceived quality.
			
 
				-	
			
 
				-	The metric parameter can be used to weight the relative importance of each
			
 
				-	colour channel, or pass NULL to use the default uniform weight of 
			
 
				-	{ 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that 
			
 
				-	allowed either uniform or "perceptual" weights with the fixed values
			
 
				-	{ 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a 
			
 
				-	contiguous array of 3 floats.
			
 
				-	
			
 
				-	Internally this function calls squish::CompressMasked for each block, which 
			
 
				-	allows for pixels outside the image to take arbitrary values. The function 
			
 
				-	squish::GetStorageRequirements can be called to compute the amount of memory
			
 
				-	to allocate for the compressed output.
			
 
				-*/
			
 
				-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-/*! @brief Decompresses an image in memory.
			
 
				-
			
 
				-	@param rgba		Storage for the decompressed pixels.
			
 
				-	@param width	The width of the source image.
			
 
				-	@param height	The height of the source image.
			
 
				-	@param blocks	The compressed DXT blocks.
			
 
				-	@param flags	Compression flags.
			
 
				-	
			
 
				-	The decompressed pixels will be written as a contiguous array of width*height
			
 
				-	16 rgba values, with each component as 1 byte each. In memory this is:
			
 
				-	
			
 
				-		{ r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
			
 
				-		
			
 
				-	The flags parameter should specify either kDxt1, kDxt3 or kDxt5 compression, 
			
 
				-	however, DXT1 will be used by default if none is specified. All other flags 
			
 
				-	are ignored.
			
 
				-
			
 
				-	Internally this function calls squish::Decompress for each block.
			
 
				-*/
			
 
				-void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
			
 
				-
			
 
				-// -----------------------------------------------------------------------------
			
 
				-
			
 
				-} // namespace squish
			
 
				-
			
 
				-#endif // ndef SQUISH_H
			
 
				-
			
--- a/3rdparty/lodepng/README.md
+++ b/3rdparty/lodepng/README.md
@@ -1,10 +0,0 @@
 
				-LodePNG
			
 
				--------
			
 
				-
			
 
				-PNG encoder and decoder in C and C++.
			
 
				-
			
 
				-Home page: http://lodev.org/lodepng/
			
 
				-
			
 
				-Only two files are needed to allow your program to read and write PNG files: lodepng.cpp and lodepng.h.
			
 
				-
			
 
				-The other files in the project are just examples, unit tests, etc...
			
--- a/3rdparty/lodepng/lodepng.cpp
+++ b/3rdparty/lodepng/lodepng.cpp
@@ -1,6224 +0,0 @@
 
				-/*
			
 
				-LodePNG version 20160501
			
 
				-
			
 
				-Copyright (c) 2005-2016 Lode Vandevenne
			
 
				-
			
 
				-This software is provided 'as-is', without any express or implied
			
 
				-warranty. In no event will the authors be held liable for any damages
			
 
				-arising from the use of this software.
			
 
				-
			
 
				-Permission is granted to anyone to use this software for any purpose,
			
 
				-including commercial applications, and to alter it and redistribute it
			
 
				-freely, subject to the following restrictions:
			
 
				-
			
 
				-    1. The origin of this software must not be misrepresented; you must not
			
 
				-    claim that you wrote the original software. If you use this software
			
 
				-    in a product, an acknowledgment in the product documentation would be
			
 
				-    appreciated but is not required.
			
 
				-
			
 
				-    2. Altered source versions must be plainly marked as such, and must not be
			
 
				-    misrepresented as being the original software.
			
 
				-
			
 
				-    3. This notice may not be removed or altered from any source
			
 
				-    distribution.
			
 
				-*/
			
 
				-
			
 
				-/*
			
 
				-The manual and changelog are in the header file "lodepng.h"
			
 
				-Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
			
 
				-*/
			
 
				-
			
 
				-#include "lodepng.h"
			
 
				-
			
 
				-#include <limits.h>
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
			
 
				-#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
			
 
				-#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
			
 
				-#endif /*_MSC_VER */
			
 
				-
			
 
				-const char* LODEPNG_VERSION_STRING = "20160501";
			
 
				-
			
 
				-/*
			
 
				-This source file is built up in the following large parts. The code sections
			
 
				-with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
			
 
				--Tools for C and common code for PNG and Zlib
			
 
				--C Code for Zlib (huffman, deflate, ...)
			
 
				--C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
			
 
				--The C++ wrapper around all of the above
			
 
				-*/
			
 
				-
			
 
				-/*The malloc, realloc and free functions defined here with "lodepng_" in front
			
 
				-of the name, so that you can easily change them to others related to your
			
 
				-platform if needed. Everything else in the code calls these. Pass
			
 
				--DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
			
 
				-#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
			
 
				-define them in your own project's source files without needing to change
			
 
				-lodepng source code. Don't forget to remove "static" if you copypaste them
			
 
				-from here.*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ALLOCATORS
			
 
				-static void* lodepng_malloc(size_t size)
			
 
				-{
			
 
				-  return malloc(size);
			
 
				-}
			
 
				-
			
 
				-static void* lodepng_realloc(void* ptr, size_t new_size)
			
 
				-{
			
 
				-  return realloc(ptr, new_size);
			
 
				-}
			
 
				-
			
 
				-static void lodepng_free(void* ptr)
			
 
				-{
			
 
				-  free(ptr);
			
 
				-}
			
 
				-#else /*LODEPNG_COMPILE_ALLOCATORS*/
			
 
				-void* lodepng_malloc(size_t size);
			
 
				-void* lodepng_realloc(void* ptr, size_t new_size);
			
 
				-void lodepng_free(void* ptr);
			
 
				-#endif /*LODEPNG_COMPILE_ALLOCATORS*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* // Tools for C, and common code for PNG and Zlib.                       // */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*
			
 
				-Often in case of an error a value is assigned to a variable and then it breaks
			
 
				-out of a loop (to go to the cleanup phase of a function). This macro does that.
			
 
				-It makes the error handling code shorter and more readable.
			
 
				-
			
 
				-Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
			
 
				-*/
			
 
				-#define CERROR_BREAK(errorvar, code)\
			
 
				-{\
			
 
				-  errorvar = code;\
			
 
				-  break;\
			
 
				-}
			
 
				-
			
 
				-/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
			
 
				-#define ERROR_BREAK(code) CERROR_BREAK(error, code)
			
 
				-
			
 
				-/*Set error var to the error code, and return it.*/
			
 
				-#define CERROR_RETURN_ERROR(errorvar, code)\
			
 
				-{\
			
 
				-  errorvar = code;\
			
 
				-  return code;\
			
 
				-}
			
 
				-
			
 
				-/*Try the code, if it returns error, also return the error.*/
			
 
				-#define CERROR_TRY_RETURN(call)\
			
 
				-{\
			
 
				-  unsigned error = call;\
			
 
				-  if(error) return error;\
			
 
				-}
			
 
				-
			
 
				-/*Set error var to the error code, and return from the void function.*/
			
 
				-#define CERROR_RETURN(errorvar, code)\
			
 
				-{\
			
 
				-  errorvar = code;\
			
 
				-  return;\
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-About uivector, ucvector and string:
			
 
				--All of them wrap dynamic arrays or text strings in a similar way.
			
 
				--LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
			
 
				--The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
			
 
				--They're not used in the interface, only internally in this file as static functions.
			
 
				--As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
			
 
				-*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ZLIB
			
 
				-/*dynamic vector of unsigned ints*/
			
 
				-typedef struct uivector
			
 
				-{
			
 
				-  unsigned* data;
			
 
				-  size_t size; /*size in number of unsigned longs*/
			
 
				-  size_t allocsize; /*allocated size in bytes*/
			
 
				-} uivector;
			
 
				-
			
 
				-static void uivector_cleanup(void* p)
			
 
				-{
			
 
				-  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
			
 
				-  lodepng_free(((uivector*)p)->data);
			
 
				-  ((uivector*)p)->data = NULL;
			
 
				-}
			
 
				-
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned uivector_reserve(uivector* p, size_t allocsize)
			
 
				-{
			
 
				-  if(allocsize > p->allocsize)
			
 
				-  {
			
 
				-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
			
 
				-    void* data = lodepng_realloc(p->data, newsize);
			
 
				-    if(data)
			
 
				-    {
			
 
				-      p->allocsize = newsize;
			
 
				-      p->data = (unsigned*)data;
			
 
				-    }
			
 
				-    else return 0; /*error: not enough memory*/
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned uivector_resize(uivector* p, size_t size)
			
 
				-{
			
 
				-  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
			
 
				-  p->size = size;
			
 
				-  return 1; /*success*/
			
 
				-}
			
 
				-
			
 
				-/*resize and give all new elements the value*/
			
 
				-static unsigned uivector_resizev(uivector* p, size_t size, unsigned value)
			
 
				-{
			
 
				-  size_t oldsize = p->size, i;
			
 
				-  if(!uivector_resize(p, size)) return 0;
			
 
				-  for(i = oldsize; i < size; ++i) p->data[i] = value;
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-static void uivector_init(uivector* p)
			
 
				-{
			
 
				-  p->data = NULL;
			
 
				-  p->size = p->allocsize = 0;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned uivector_push_back(uivector* p, unsigned c)
			
 
				-{
			
 
				-  if(!uivector_resize(p, p->size + 1)) return 0;
			
 
				-  p->data[p->size - 1] = c;
			
 
				-  return 1;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-#endif /*LODEPNG_COMPILE_ZLIB*/
			
 
				-
			
 
				-/* /////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*dynamic vector of unsigned chars*/
			
 
				-typedef struct ucvector
			
 
				-{
			
 
				-  unsigned char* data;
			
 
				-  size_t size; /*used size*/
			
 
				-  size_t allocsize; /*allocated size*/
			
 
				-} ucvector;
			
 
				-
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned ucvector_reserve(ucvector* p, size_t allocsize)
			
 
				-{
			
 
				-  if(allocsize > p->allocsize)
			
 
				-  {
			
 
				-    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
			
 
				-    void* data = lodepng_realloc(p->data, newsize);
			
 
				-    if(data)
			
 
				-    {
			
 
				-      p->allocsize = newsize;
			
 
				-      p->data = (unsigned char*)data;
			
 
				-    }
			
 
				-    else return 0; /*error: not enough memory*/
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned ucvector_resize(ucvector* p, size_t size)
			
 
				-{
			
 
				-  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
			
 
				-  p->size = size;
			
 
				-  return 1; /*success*/
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-
			
 
				-static void ucvector_cleanup(void* p)
			
 
				-{
			
 
				-  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
			
 
				-  lodepng_free(((ucvector*)p)->data);
			
 
				-  ((ucvector*)p)->data = NULL;
			
 
				-}
			
 
				-
			
 
				-static void ucvector_init(ucvector* p)
			
 
				-{
			
 
				-  p->data = NULL;
			
 
				-  p->size = p->allocsize = 0;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_PNG*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ZLIB
			
 
				-/*you can both convert from vector to buffer&size and vica versa. If you use
			
 
				-init_buffer to take over a buffer and size, it is not needed to use cleanup*/
			
 
				-static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size)
			
 
				-{
			
 
				-  p->data = buffer;
			
 
				-  p->allocsize = p->size = size;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ZLIB*/
			
 
				-
			
 
				-#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned ucvector_push_back(ucvector* p, unsigned char c)
			
 
				-{
			
 
				-  if(!ucvector_resize(p, p->size + 1)) return 0;
			
 
				-  p->data[p->size - 1] = c;
			
 
				-  return 1;
			
 
				-}
			
 
				-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
			
 
				-
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-/*returns 1 if success, 0 if failure ==> nothing done*/
			
 
				-static unsigned string_resize(char** out, size_t size)
			
 
				-{
			
 
				-  char* data = (char*)lodepng_realloc(*out, size + 1);
			
 
				-  if(data)
			
 
				-  {
			
 
				-    data[size] = 0; /*null termination char*/
			
 
				-    *out = data;
			
 
				-  }
			
 
				-  return data != 0;
			
 
				-}
			
 
				-
			
 
				-/*init a {char*, size_t} pair for use as string*/
			
 
				-static void string_init(char** out)
			
 
				-{
			
 
				-  *out = NULL;
			
 
				-  string_resize(out, 0);
			
 
				-}
			
 
				-
			
 
				-/*free the above pair again*/
			
 
				-static void string_cleanup(char** out)
			
 
				-{
			
 
				-  lodepng_free(*out);
			
 
				-  *out = NULL;
			
 
				-}
			
 
				-
			
 
				-static void string_set(char** out, const char* in)
			
 
				-{
			
 
				-  size_t insize = strlen(in), i;
			
 
				-  if(string_resize(out, insize))
			
 
				-  {
			
 
				-    for(i = 0; i != insize; ++i)
			
 
				-    {
			
 
				-      (*out)[i] = in[i];
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-#endif /*LODEPNG_COMPILE_PNG*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-unsigned lodepng_read32bitInt(const unsigned char* buffer)
			
 
				-{
			
 
				-  return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
			
 
				-}
			
 
				-
			
 
				-#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
			
 
				-/*buffer must have at least 4 allocated bytes available*/
			
 
				-static void lodepng_set32bitInt(unsigned char* buffer, unsigned value)
			
 
				-{
			
 
				-  buffer[0] = (unsigned char)((value >> 24) & 0xff);
			
 
				-  buffer[1] = (unsigned char)((value >> 16) & 0xff);
			
 
				-  buffer[2] = (unsigned char)((value >>  8) & 0xff);
			
 
				-  buffer[3] = (unsigned char)((value      ) & 0xff);
			
 
				-}
			
 
				-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-static void lodepng_add32bitInt(ucvector* buffer, unsigned value)
			
 
				-{
			
 
				-  ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/
			
 
				-  lodepng_set32bitInt(&buffer->data[buffer->size - 4], value);
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / File IO                                                                / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-
			
 
				-/* returns negative value on error. This should be pure C compatible, so no fstat. */
			
 
				-static long lodepng_filesize(const char* filename)
			
 
				-{
			
 
				-  FILE* file;
			
 
				-  long size;
			
 
				-  file = fopen(filename, "rb");
			
 
				-  if(!file) return -1;
			
 
				-
			
 
				-  if(fseek(file, 0, SEEK_END) != 0)
			
 
				-  {
			
 
				-    fclose(file);
			
 
				-    return -1;
			
 
				-  }
			
 
				-
			
 
				-  size = ftell(file);
			
 
				-  /* It may give LONG_MAX as directory size, this is invalid for us. */
			
 
				-  if(size == LONG_MAX) size = -1;
			
 
				-
			
 
				-  fclose(file);
			
 
				-  return size;
			
 
				-}
			
 
				-
			
 
				-/* load file into buffer that already has the correct allocated size. Returns error code.*/
			
 
				-static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename)
			
 
				-{
			
 
				-  FILE* file;
			
 
				-  size_t readsize;
			
 
				-  file = fopen(filename, "rb");
			
 
				-  if(!file) return 78;
			
 
				-
			
 
				-  readsize = fread(out, 1, size, file);
			
 
				-  fclose(file);
			
 
				-
			
 
				-  if (readsize != size) return 78;
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename)
			
 
				-{
			
 
				-  long size = lodepng_filesize(filename);
			
 
				-  if (size < 0) return 78;
			
 
				-  *outsize = (size_t)size;
			
 
				-
			
 
				-  *out = (unsigned char*)lodepng_malloc((size_t)size);
			
 
				-  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
			
 
				-
			
 
				-  return lodepng_buffer_file(*out, (size_t)size, filename);
			
 
				-}
			
 
				-
			
 
				-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
			
 
				-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename)
			
 
				-{
			
 
				-  FILE* file;
			
 
				-  file = fopen(filename, "wb" );
			
 
				-  if(!file) return 79;
			
 
				-  fwrite((char*)buffer , 1 , buffersize, file);
			
 
				-  fclose(file);
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_DISK*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* // End of common code and tools. Begin of Zlib related code.            // */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ZLIB
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*TODO: this ignores potential out of memory errors*/
			
 
				-#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\
			
 
				-{\
			
 
				-  /*add a new byte at the end*/\
			
 
				-  if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\
			
 
				-  /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\
			
 
				-  (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\
			
 
				-  ++(*bitpointer);\
			
 
				-}
			
 
				-
			
 
				-static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1));
			
 
				-}
			
 
				-
			
 
				-static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1));
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1)
			
 
				-
			
 
				-static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream)
			
 
				-{
			
 
				-  unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream));
			
 
				-  ++(*bitpointer);
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
			
 
				-{
			
 
				-  unsigned result = 0, i;
			
 
				-  for(i = 0; i != nbits; ++i)
			
 
				-  {
			
 
				-    result += ((unsigned)READBIT(*bitpointer, bitstream)) << i;
			
 
				-    ++(*bitpointer);
			
 
				-  }
			
 
				-  return result;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Deflate - Huffman                                                      / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#define FIRST_LENGTH_CODE_INDEX 257
			
 
				-#define LAST_LENGTH_CODE_INDEX 285
			
 
				-/*256 literals, the end code, some length codes, and 2 unused codes*/
			
 
				-#define NUM_DEFLATE_CODE_SYMBOLS 288
			
 
				-/*the distance codes have their own symbols, 30 used, 2 unused*/
			
 
				-#define NUM_DISTANCE_SYMBOLS 32
			
 
				-/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
			
 
				-#define NUM_CODE_LENGTH_CODES 19
			
 
				-
			
 
				-/*the base lengths represented by codes 257-285*/
			
 
				-static const unsigned LENGTHBASE[29]
			
 
				-  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
			
 
				-     67, 83, 99, 115, 131, 163, 195, 227, 258};
			
 
				-
			
 
				-/*the extra bits used by codes 257-285 (added to base length)*/
			
 
				-static const unsigned LENGTHEXTRA[29]
			
 
				-  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
			
 
				-      4,  4,  4,   4,   5,   5,   5,   5,   0};
			
 
				-
			
 
				-/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
			
 
				-static const unsigned DISTANCEBASE[30]
			
 
				-  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
			
 
				-     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
			
 
				-
			
 
				-/*the extra bits of backwards distances (added to base)*/
			
 
				-static const unsigned DISTANCEEXTRA[30]
			
 
				-  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
			
 
				-       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
			
 
				-
			
 
				-/*the order in which "code length alphabet code lengths" are stored, out of this
			
 
				-the huffman tree of the dynamic huffman tree lengths is generated*/
			
 
				-static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
			
 
				-  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*
			
 
				-Huffman tree struct, containing multiple representations of the tree
			
 
				-*/
			
 
				-typedef struct HuffmanTree
			
 
				-{
			
 
				-  unsigned* tree2d;
			
 
				-  unsigned* tree1d;
			
 
				-  unsigned* lengths; /*the lengths of the codes of the 1d-tree*/
			
 
				-  unsigned maxbitlen; /*maximum number of bits a single code can get*/
			
 
				-  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
			
 
				-} HuffmanTree;
			
 
				-
			
 
				-/*function used for debug purposes to draw the tree in ascii art with C++*/
			
 
				-/*
			
 
				-static void HuffmanTree_draw(HuffmanTree* tree)
			
 
				-{
			
 
				-  std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl;
			
 
				-  for(size_t i = 0; i != tree->tree1d.size; ++i)
			
 
				-  {
			
 
				-    if(tree->lengths.data[i])
			
 
				-      std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl;
			
 
				-  }
			
 
				-  std::cout << std::endl;
			
 
				-}*/
			
 
				-
			
 
				-static void HuffmanTree_init(HuffmanTree* tree)
			
 
				-{
			
 
				-  tree->tree2d = 0;
			
 
				-  tree->tree1d = 0;
			
 
				-  tree->lengths = 0;
			
 
				-}
			
 
				-
			
 
				-static void HuffmanTree_cleanup(HuffmanTree* tree)
			
 
				-{
			
 
				-  lodepng_free(tree->tree2d);
			
 
				-  lodepng_free(tree->tree1d);
			
 
				-  lodepng_free(tree->lengths);
			
 
				-}
			
 
				-
			
 
				-/*the tree representation used by the decoder. return value is error*/
			
 
				-static unsigned HuffmanTree_make2DTree(HuffmanTree* tree)
			
 
				-{
			
 
				-  unsigned nodefilled = 0; /*up to which node it is filled*/
			
 
				-  unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/
			
 
				-  unsigned n, i;
			
 
				-
			
 
				-  tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned));
			
 
				-  if(!tree->tree2d) return 83; /*alloc fail*/
			
 
				-
			
 
				-  /*
			
 
				-  convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
			
 
				-  uninited, a value >= numcodes is an address to another bit, a value < numcodes
			
 
				-  is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
			
 
				-  many columns as codes - 1.
			
 
				-  A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
			
 
				-  Here, the internal nodes are stored (what their 0 and 1 option point to).
			
 
				-  There is only memory for such good tree currently, if there are more nodes
			
 
				-  (due to too long length codes), error 55 will happen
			
 
				-  */
			
 
				-  for(n = 0; n < tree->numcodes * 2; ++n)
			
 
				-  {
			
 
				-    tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/
			
 
				-  }
			
 
				-
			
 
				-  for(n = 0; n < tree->numcodes; ++n) /*the codes*/
			
 
				-  {
			
 
				-    for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/
			
 
				-    {
			
 
				-      unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1);
			
 
				-      /*oversubscribed, see comment in lodepng_error_text*/
			
 
				-      if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55;
			
 
				-      if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/
			
 
				-      {
			
 
				-        if(i + 1 == tree->lengths[n]) /*last bit*/
			
 
				-        {
			
 
				-          tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/
			
 
				-          treepos = 0;
			
 
				-        }
			
 
				-        else
			
 
				-        {
			
 
				-          /*put address of the next step in here, first that address has to be found of course
			
 
				-          (it's just nodefilled + 1)...*/
			
 
				-          ++nodefilled;
			
 
				-          /*addresses encoded with numcodes added to it*/
			
 
				-          tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes;
			
 
				-          treepos = nodefilled;
			
 
				-        }
			
 
				-      }
			
 
				-      else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for(n = 0; n < tree->numcodes * 2; ++n)
			
 
				-  {
			
 
				-    if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/
			
 
				-  }
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
			
 
				-numcodes, lengths and maxbitlen must already be filled in correctly. return
			
 
				-value is error.
			
 
				-*/
			
 
				-static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree)
			
 
				-{
			
 
				-  uivector blcount;
			
 
				-  uivector nextcode;
			
 
				-  unsigned error = 0;
			
 
				-  unsigned bits, n;
			
 
				-
			
 
				-  uivector_init(&blcount);
			
 
				-  uivector_init(&nextcode);
			
 
				-
			
 
				-  tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
			
 
				-  if(!tree->tree1d) error = 83; /*alloc fail*/
			
 
				-
			
 
				-  if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0)
			
 
				-  || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0))
			
 
				-    error = 83; /*alloc fail*/
			
 
				-
			
 
				-  if(!error)
			
 
				-  {
			
 
				-    /*step 1: count number of instances of each code length*/
			
 
				-    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]];
			
 
				-    /*step 2: generate the nextcode values*/
			
 
				-    for(bits = 1; bits <= tree->maxbitlen; ++bits)
			
 
				-    {
			
 
				-      nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1;
			
 
				-    }
			
 
				-    /*step 3: generate all the codes*/
			
 
				-    for(n = 0; n != tree->numcodes; ++n)
			
 
				-    {
			
 
				-      if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  uivector_cleanup(&blcount);
			
 
				-  uivector_cleanup(&nextcode);
			
 
				-
			
 
				-  if(!error) return HuffmanTree_make2DTree(tree);
			
 
				-  else return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-given the code lengths (as stored in the PNG file), generate the tree as defined
			
 
				-by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
			
 
				-return value is error.
			
 
				-*/
			
 
				-static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
			
 
				-                                            size_t numcodes, unsigned maxbitlen)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
			
 
				-  if(!tree->lengths) return 83; /*alloc fail*/
			
 
				-  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
			
 
				-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
			
 
				-  tree->maxbitlen = maxbitlen;
			
 
				-  return HuffmanTree_makeFromLengths2(tree);
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-
			
 
				-/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
			
 
				-Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
			
 
				-
			
 
				-/*chain node for boundary package merge*/
			
 
				-typedef struct BPMNode
			
 
				-{
			
 
				-  int weight; /*the sum of all weights in this chain*/
			
 
				-  unsigned index; /*index of this leaf node (called "count" in the paper)*/
			
 
				-  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
			
 
				-  int in_use;
			
 
				-} BPMNode;
			
 
				-
			
 
				-/*lists of chains*/
			
 
				-typedef struct BPMLists
			
 
				-{
			
 
				-  /*memory pool*/
			
 
				-  unsigned memsize;
			
 
				-  BPMNode* memory;
			
 
				-  unsigned numfree;
			
 
				-  unsigned nextfree;
			
 
				-  BPMNode** freelist;
			
 
				-  /*two heads of lookahead chains per list*/
			
 
				-  unsigned listsize;
			
 
				-  BPMNode** chains0;
			
 
				-  BPMNode** chains1;
			
 
				-} BPMLists;
			
 
				-
			
 
				-/*creates a new chain node with the given parameters, from the memory in the lists */
			
 
				-static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  BPMNode* result;
			
 
				-
			
 
				-  /*memory full, so garbage collect*/
			
 
				-  if(lists->nextfree >= lists->numfree)
			
 
				-  {
			
 
				-    /*mark only those that are in use*/
			
 
				-    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
			
 
				-    for(i = 0; i != lists->listsize; ++i)
			
 
				-    {
			
 
				-      BPMNode* node;
			
 
				-      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
			
 
				-      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
			
 
				-    }
			
 
				-    /*collect those that are free*/
			
 
				-    lists->numfree = 0;
			
 
				-    for(i = 0; i != lists->memsize; ++i)
			
 
				-    {
			
 
				-      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
			
 
				-    }
			
 
				-    lists->nextfree = 0;
			
 
				-  }
			
 
				-
			
 
				-  result = lists->freelist[lists->nextfree++];
			
 
				-  result->weight = weight;
			
 
				-  result->index = index;
			
 
				-  result->tail = tail;
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-/*sort the leaves with stable mergesort*/
			
 
				-static void bpmnode_sort(BPMNode* leaves, size_t num)
			
 
				-{
			
 
				-  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
			
 
				-  size_t width, counter = 0;
			
 
				-  for(width = 1; width < num; width *= 2)
			
 
				-  {
			
 
				-    BPMNode* a = (counter & 1) ? mem : leaves;
			
 
				-    BPMNode* b = (counter & 1) ? leaves : mem;
			
 
				-    size_t p;
			
 
				-    for(p = 0; p < num; p += 2 * width)
			
 
				-    {
			
 
				-      size_t q = (p + width > num) ? num : (p + width);
			
 
				-      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
			
 
				-      size_t i = p, j = q, k;
			
 
				-      for(k = p; k < r; k++)
			
 
				-      {
			
 
				-        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
			
 
				-        else b[k] = a[j++];
			
 
				-      }
			
 
				-    }
			
 
				-    counter++;
			
 
				-  }
			
 
				-  if(counter & 1) memcpy(leaves, mem, sizeof(*leaves) * num);
			
 
				-  lodepng_free(mem);
			
 
				-}
			
 
				-
			
 
				-/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
			
 
				-static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num)
			
 
				-{
			
 
				-  unsigned lastindex = lists->chains1[c]->index;
			
 
				-
			
 
				-  if(c == 0)
			
 
				-  {
			
 
				-    if(lastindex >= numpresent) return;
			
 
				-    lists->chains0[c] = lists->chains1[c];
			
 
				-    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    /*sum of the weights of the head nodes of the previous lookahead chains.*/
			
 
				-    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
			
 
				-    lists->chains0[c] = lists->chains1[c];
			
 
				-    if(lastindex < numpresent && sum > leaves[lastindex].weight)
			
 
				-    {
			
 
				-      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
			
 
				-      return;
			
 
				-    }
			
 
				-    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
			
 
				-    /*in the end we are only interested in the chain of the last list, so no
			
 
				-    need to recurse if we're at the last one (this gives measurable speedup)*/
			
 
				-    if(num + 1 < (int)(2 * numpresent - 2))
			
 
				-    {
			
 
				-      boundaryPM(lists, leaves, numpresent, c - 1, num);
			
 
				-      boundaryPM(lists, leaves, numpresent, c - 1, num);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
			
 
				-                                      size_t numcodes, unsigned maxbitlen)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  unsigned i;
			
 
				-  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
			
 
				-  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
			
 
				-
			
 
				-  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
			
 
				-  if((1u << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/
			
 
				-
			
 
				-  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
			
 
				-  if(!leaves) return 83; /*alloc fail*/
			
 
				-
			
 
				-  for(i = 0; i != numcodes; ++i)
			
 
				-  {
			
 
				-    if(frequencies[i] > 0)
			
 
				-    {
			
 
				-      leaves[numpresent].weight = (int)frequencies[i];
			
 
				-      leaves[numpresent].index = i;
			
 
				-      ++numpresent;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
			
 
				-
			
 
				-  /*ensure at least two present symbols. There should be at least one symbol
			
 
				-  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
			
 
				-  make these work as well ensure there are at least two symbols. The
			
 
				-  Package-Merge code below also doesn't work correctly if there's only one
			
 
				-  symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/
			
 
				-  if(numpresent == 0)
			
 
				-  {
			
 
				-    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
			
 
				-  }
			
 
				-  else if(numpresent == 1)
			
 
				-  {
			
 
				-    lengths[leaves[0].index] = 1;
			
 
				-    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    BPMLists lists;
			
 
				-    BPMNode* node;
			
 
				-
			
 
				-    bpmnode_sort(leaves, numpresent);
			
 
				-
			
 
				-    lists.listsize = maxbitlen;
			
 
				-    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
			
 
				-    lists.nextfree = 0;
			
 
				-    lists.numfree = lists.memsize;
			
 
				-    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
			
 
				-    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
			
 
				-    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
			
 
				-    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
			
 
				-    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
			
 
				-
			
 
				-    if(!error)
			
 
				-    {
			
 
				-      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
			
 
				-
			
 
				-      bpmnode_create(&lists, leaves[0].weight, 1, 0);
			
 
				-      bpmnode_create(&lists, leaves[1].weight, 2, 0);
			
 
				-
			
 
				-      for(i = 0; i != lists.listsize; ++i)
			
 
				-      {
			
 
				-        lists.chains0[i] = &lists.memory[0];
			
 
				-        lists.chains1[i] = &lists.memory[1];
			
 
				-      }
			
 
				-
			
 
				-      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
			
 
				-      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
			
 
				-
			
 
				-      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail)
			
 
				-      {
			
 
				-        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    lodepng_free(lists.memory);
			
 
				-    lodepng_free(lists.freelist);
			
 
				-    lodepng_free(lists.chains0);
			
 
				-    lodepng_free(lists.chains1);
			
 
				-  }
			
 
				-
			
 
				-  lodepng_free(leaves);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*Create the Huffman tree given the symbol frequencies*/
			
 
				-static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
			
 
				-                                                size_t mincodes, size_t numcodes, unsigned maxbitlen)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
			
 
				-  tree->maxbitlen = maxbitlen;
			
 
				-  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
			
 
				-  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
			
 
				-  if(!tree->lengths) return 83; /*alloc fail*/
			
 
				-  /*initialize all lengths to 0*/
			
 
				-  memset(tree->lengths, 0, numcodes * sizeof(unsigned));
			
 
				-
			
 
				-  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
			
 
				-  if(!error) error = HuffmanTree_makeFromLengths2(tree);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index)
			
 
				-{
			
 
				-  return tree->tree1d[index];
			
 
				-}
			
 
				-
			
 
				-static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index)
			
 
				-{
			
 
				-  return tree->lengths[index];
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
			
 
				-static unsigned generateFixedLitLenTree(HuffmanTree* tree)
			
 
				-{
			
 
				-  unsigned i, error = 0;
			
 
				-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
			
 
				-  if(!bitlen) return 83; /*alloc fail*/
			
 
				-
			
 
				-  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
			
 
				-  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
			
 
				-  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
			
 
				-  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
			
 
				-  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
			
 
				-
			
 
				-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
			
 
				-
			
 
				-  lodepng_free(bitlen);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
			
 
				-static unsigned generateFixedDistanceTree(HuffmanTree* tree)
			
 
				-{
			
 
				-  unsigned i, error = 0;
			
 
				-  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
			
 
				-  if(!bitlen) return 83; /*alloc fail*/
			
 
				-
			
 
				-  /*there are 32 distance codes, but 30-31 are unused*/
			
 
				-  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
			
 
				-  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
			
 
				-
			
 
				-  lodepng_free(bitlen);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-/*
			
 
				-returns the code, or (unsigned)(-1) if error happened
			
 
				-inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
			
 
				-*/
			
 
				-static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp,
			
 
				-                                    const HuffmanTree* codetree, size_t inbitlength)
			
 
				-{
			
 
				-  unsigned treepos = 0, ct;
			
 
				-  for(;;)
			
 
				-  {
			
 
				-    if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/
			
 
				-    /*
			
 
				-    decode the symbol from the tree. The "readBitFromStream" code is inlined in
			
 
				-    the expression below because this is the biggest bottleneck while decoding
			
 
				-    */
			
 
				-    ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)];
			
 
				-    ++(*bp);
			
 
				-    if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/
			
 
				-    else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/
			
 
				-
			
 
				-    if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/
			
 
				-  }
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Inflator (Decompressor)                                                / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
			
 
				-static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d)
			
 
				-{
			
 
				-  /*TODO: check for out of memory errors*/
			
 
				-  generateFixedLitLenTree(tree_ll);
			
 
				-  generateFixedDistanceTree(tree_d);
			
 
				-}
			
 
				-
			
 
				-/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
			
 
				-static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
			
 
				-                                      const unsigned char* in, size_t* bp, size_t inlength)
			
 
				-{
			
 
				-  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
			
 
				-  unsigned error = 0;
			
 
				-  unsigned n, HLIT, HDIST, HCLEN, i;
			
 
				-  size_t inbitlength = inlength * 8;
			
 
				-
			
 
				-  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
			
 
				-  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
			
 
				-  unsigned* bitlen_d = 0; /*dist code lengths*/
			
 
				-  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
			
 
				-  unsigned* bitlen_cl = 0;
			
 
				-  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
			
 
				-
			
 
				-  if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/
			
 
				-
			
 
				-  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
			
 
				-  HLIT =  readBitsFromStream(bp, in, 5) + 257;
			
 
				-  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
			
 
				-  HDIST = readBitsFromStream(bp, in, 5) + 1;
			
 
				-  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
			
 
				-  HCLEN = readBitsFromStream(bp, in, 4) + 4;
			
 
				-
			
 
				-  if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/
			
 
				-
			
 
				-  HuffmanTree_init(&tree_cl);
			
 
				-
			
 
				-  while(!error)
			
 
				-  {
			
 
				-    /*read the code length codes out of 3 * (amount of code length codes) bits*/
			
 
				-
			
 
				-    bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
			
 
				-    if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-
			
 
				-    for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i)
			
 
				-    {
			
 
				-      if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3);
			
 
				-      else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/
			
 
				-    }
			
 
				-
			
 
				-    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
			
 
				-    if(error) break;
			
 
				-
			
 
				-    /*now we can use this tree to read the lengths for the tree that this function will return*/
			
 
				-    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
			
 
				-    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
			
 
				-    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
			
 
				-    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
			
 
				-
			
 
				-    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
			
 
				-    i = 0;
			
 
				-    while(i < HLIT + HDIST)
			
 
				-    {
			
 
				-      unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength);
			
 
				-      if(code <= 15) /*a length code*/
			
 
				-      {
			
 
				-        if(i < HLIT) bitlen_ll[i] = code;
			
 
				-        else bitlen_d[i - HLIT] = code;
			
 
				-        ++i;
			
 
				-      }
			
 
				-      else if(code == 16) /*repeat previous*/
			
 
				-      {
			
 
				-        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
			
 
				-        unsigned value; /*set value to the previous code*/
			
 
				-
			
 
				-        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
			
 
				-
			
 
				-        if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
			
 
				-        replength += readBitsFromStream(bp, in, 2);
			
 
				-
			
 
				-        if(i < HLIT + 1) value = bitlen_ll[i - 1];
			
 
				-        else value = bitlen_d[i - HLIT - 1];
			
 
				-        /*repeat this value in the next lengths*/
			
 
				-        for(n = 0; n < replength; ++n)
			
 
				-        {
			
 
				-          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
			
 
				-          if(i < HLIT) bitlen_ll[i] = value;
			
 
				-          else bitlen_d[i - HLIT] = value;
			
 
				-          ++i;
			
 
				-        }
			
 
				-      }
			
 
				-      else if(code == 17) /*repeat "0" 3-10 times*/
			
 
				-      {
			
 
				-        unsigned replength = 3; /*read in the bits that indicate repeat length*/
			
 
				-        if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
			
 
				-        replength += readBitsFromStream(bp, in, 3);
			
 
				-
			
 
				-        /*repeat this value in the next lengths*/
			
 
				-        for(n = 0; n < replength; ++n)
			
 
				-        {
			
 
				-          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
			
 
				-
			
 
				-          if(i < HLIT) bitlen_ll[i] = 0;
			
 
				-          else bitlen_d[i - HLIT] = 0;
			
 
				-          ++i;
			
 
				-        }
			
 
				-      }
			
 
				-      else if(code == 18) /*repeat "0" 11-138 times*/
			
 
				-      {
			
 
				-        unsigned replength = 11; /*read in the bits that indicate repeat length*/
			
 
				-        if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
			
 
				-        replength += readBitsFromStream(bp, in, 7);
			
 
				-
			
 
				-        /*repeat this value in the next lengths*/
			
 
				-        for(n = 0; n < replength; ++n)
			
 
				-        {
			
 
				-          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
			
 
				-
			
 
				-          if(i < HLIT) bitlen_ll[i] = 0;
			
 
				-          else bitlen_d[i - HLIT] = 0;
			
 
				-          ++i;
			
 
				-        }
			
 
				-      }
			
 
				-      else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
			
 
				-      {
			
 
				-        if(code == (unsigned)(-1))
			
 
				-        {
			
 
				-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
			
 
				-          (10=no endcode, 11=wrong jump outside of tree)*/
			
 
				-          error = (*bp) > inbitlength ? 10 : 11;
			
 
				-        }
			
 
				-        else error = 16; /*unexisting code, this can never happen*/
			
 
				-        break;
			
 
				-      }
			
 
				-    }
			
 
				-    if(error) break;
			
 
				-
			
 
				-    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
			
 
				-
			
 
				-    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
			
 
				-    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
			
 
				-    if(error) break;
			
 
				-    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
			
 
				-
			
 
				-    break; /*end of error-while*/
			
 
				-  }
			
 
				-
			
 
				-  lodepng_free(bitlen_cl);
			
 
				-  lodepng_free(bitlen_ll);
			
 
				-  lodepng_free(bitlen_d);
			
 
				-  HuffmanTree_cleanup(&tree_cl);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*inflate a block with dynamic of fixed Huffman tree*/
			
 
				-static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp,
			
 
				-                                    size_t* pos, size_t inlength, unsigned btype)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
			
 
				-  HuffmanTree tree_d; /*the huffman tree for distance codes*/
			
 
				-  size_t inbitlength = inlength * 8;
			
 
				-
			
 
				-  HuffmanTree_init(&tree_ll);
			
 
				-  HuffmanTree_init(&tree_d);
			
 
				-
			
 
				-  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
			
 
				-  else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength);
			
 
				-
			
 
				-  while(!error) /*decode all symbols until end reached, breaks at end code*/
			
 
				-  {
			
 
				-    /*code_ll is literal, length or end code*/
			
 
				-    unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength);
			
 
				-    if(code_ll <= 255) /*literal symbol*/
			
 
				-    {
			
 
				-      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
			
 
				-      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-      out->data[*pos] = (unsigned char)code_ll;
			
 
				-      ++(*pos);
			
 
				-    }
			
 
				-    else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/
			
 
				-    {
			
 
				-      unsigned code_d, distance;
			
 
				-      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
			
 
				-      size_t start, forward, backward, length;
			
 
				-
			
 
				-      /*part 1: get length base*/
			
 
				-      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
			
 
				-
			
 
				-      /*part 2: get extra bits and add the value of that to length*/
			
 
				-      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
			
 
				-      if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
			
 
				-      length += readBitsFromStream(bp, in, numextrabits_l);
			
 
				-
			
 
				-      /*part 3: get distance code*/
			
 
				-      code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength);
			
 
				-      if(code_d > 29)
			
 
				-      {
			
 
				-        if(code_ll == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
			
 
				-        {
			
 
				-          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
			
 
				-          (10=no endcode, 11=wrong jump outside of tree)*/
			
 
				-          error = (*bp) > inlength * 8 ? 10 : 11;
			
 
				-        }
			
 
				-        else error = 18; /*error: invalid distance code (30-31 are never used)*/
			
 
				-        break;
			
 
				-      }
			
 
				-      distance = DISTANCEBASE[code_d];
			
 
				-
			
 
				-      /*part 4: get extra bits from distance*/
			
 
				-      numextrabits_d = DISTANCEEXTRA[code_d];
			
 
				-      if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
			
 
				-      distance += readBitsFromStream(bp, in, numextrabits_d);
			
 
				-
			
 
				-      /*part 5: fill in all the out[n] values based on the length and dist*/
			
 
				-      start = (*pos);
			
 
				-      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
			
 
				-      backward = start - distance;
			
 
				-
			
 
				-      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-      if (distance < length) {
			
 
				-        for(forward = 0; forward < length; ++forward)
			
 
				-        {
			
 
				-          out->data[(*pos)++] = out->data[backward++];
			
 
				-        }
			
 
				-      } else {
			
 
				-        memcpy(out->data + *pos, out->data + backward, length);
			
 
				-        *pos += length;
			
 
				-      }
			
 
				-    }
			
 
				-    else if(code_ll == 256)
			
 
				-    {
			
 
				-      break; /*end code, break the loop*/
			
 
				-    }
			
 
				-    else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
			
 
				-    {
			
 
				-      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
			
 
				-      (10=no endcode, 11=wrong jump outside of tree)*/
			
 
				-      error = ((*bp) > inlength * 8) ? 10 : 11;
			
 
				-      break;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  HuffmanTree_cleanup(&tree_ll);
			
 
				-  HuffmanTree_cleanup(&tree_d);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength)
			
 
				-{
			
 
				-  size_t p;
			
 
				-  unsigned LEN, NLEN, n, error = 0;
			
 
				-
			
 
				-  /*go to first boundary of byte*/
			
 
				-  while(((*bp) & 0x7) != 0) ++(*bp);
			
 
				-  p = (*bp) / 8; /*byte position*/
			
 
				-
			
 
				-  /*read LEN (2 bytes) and NLEN (2 bytes)*/
			
 
				-  if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/
			
 
				-  LEN = in[p] + 256u * in[p + 1]; p += 2;
			
 
				-  NLEN = in[p] + 256u * in[p + 1]; p += 2;
			
 
				-
			
 
				-  /*check if 16-bit NLEN is really the one's complement of LEN*/
			
 
				-  if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/
			
 
				-
			
 
				-  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
			
 
				-
			
 
				-  /*read the literal data: LEN bytes are now stored in the out buffer*/
			
 
				-  if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/
			
 
				-  for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++];
			
 
				-
			
 
				-  (*bp) = p * 8;
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned lodepng_inflatev(ucvector* out,
			
 
				-                                 const unsigned char* in, size_t insize,
			
 
				-                                 const LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/
			
 
				-  size_t bp = 0;
			
 
				-  unsigned BFINAL = 0;
			
 
				-  size_t pos = 0; /*byte position in the out buffer*/
			
 
				-  unsigned error = 0;
			
 
				-
			
 
				-  (void)settings;
			
 
				-
			
 
				-  while(!BFINAL)
			
 
				-  {
			
 
				-    unsigned BTYPE;
			
 
				-    if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/
			
 
				-    BFINAL = readBitFromStream(&bp, in);
			
 
				-    BTYPE = 1u * readBitFromStream(&bp, in);
			
 
				-    BTYPE += 2u * readBitFromStream(&bp, in);
			
 
				-
			
 
				-    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
			
 
				-    else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/
			
 
				-    else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/
			
 
				-
			
 
				-    if(error) return error;
			
 
				-  }
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
			
 
				-                         const unsigned char* in, size_t insize,
			
 
				-                         const LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  unsigned error;
			
 
				-  ucvector v;
			
 
				-  ucvector_init_buffer(&v, *out, *outsize);
			
 
				-  error = lodepng_inflatev(&v, in, insize, settings);
			
 
				-  *out = v.data;
			
 
				-  *outsize = v.size;
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned inflate(unsigned char** out, size_t* outsize,
			
 
				-                        const unsigned char* in, size_t insize,
			
 
				-                        const LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  if(settings->custom_inflate)
			
 
				-  {
			
 
				-    return settings->custom_inflate(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    return lodepng_inflate(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Deflator (Compressor)                                                  / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
			
 
				-
			
 
				-/*bitlen is the size in bits of the code*/
			
 
				-static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen)
			
 
				-{
			
 
				-  addBitsToStreamReversed(bp, compressed, code, bitlen);
			
 
				-}
			
 
				-
			
 
				-/*search the index in the array, that has the largest value smaller than or equal to the given value,
			
 
				-given array must be sorted (if no value is smaller, it returns the size of the given array)*/
			
 
				-static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value)
			
 
				-{
			
 
				-  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
			
 
				-  size_t left = 1;
			
 
				-  size_t right = array_size - 1;
			
 
				-
			
 
				-  while(left <= right) {
			
 
				-    size_t mid = (left + right) >> 1;
			
 
				-    if (array[mid] >= value) right = mid - 1;
			
 
				-    else left = mid + 1;
			
 
				-  }
			
 
				-  if(left >= array_size || array[left] > value) left--;
			
 
				-  return left;
			
 
				-}
			
 
				-
			
 
				-static void addLengthDistance(uivector* values, size_t length, size_t distance)
			
 
				-{
			
 
				-  /*values in encoded vector are those used by deflate:
			
 
				-  0-255: literal bytes
			
 
				-  256: end
			
 
				-  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
			
 
				-  286-287: invalid*/
			
 
				-
			
 
				-  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
			
 
				-  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
			
 
				-  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
			
 
				-  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
			
 
				-
			
 
				-  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
			
 
				-  uivector_push_back(values, extra_length);
			
 
				-  uivector_push_back(values, dist_code);
			
 
				-  uivector_push_back(values, extra_distance);
			
 
				-}
			
 
				-
			
 
				-/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
			
 
				-bytes as input because 3 is the minimum match length for deflate*/
			
 
				-static const unsigned HASH_NUM_VALUES = 65536;
			
 
				-static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
			
 
				-
			
 
				-typedef struct Hash
			
 
				-{
			
 
				-  int* head; /*hash value to head circular pos - can be outdated if went around window*/
			
 
				-  /*circular pos to prev circular pos*/
			
 
				-  unsigned short* chain;
			
 
				-  int* val; /*circular pos to hash value*/
			
 
				-
			
 
				-  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
			
 
				-  it's always going to be the zeros that dominate, so not important for PNG*/
			
 
				-  int* headz; /*similar to head, but for chainz*/
			
 
				-  unsigned short* chainz; /*those with same amount of zeros*/
			
 
				-  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
			
 
				-} Hash;
			
 
				-
			
 
				-static unsigned hash_init(Hash* hash, unsigned windowsize)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
			
 
				-  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
			
 
				-  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
			
 
				-
			
 
				-  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
			
 
				-  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
			
 
				-  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
			
 
				-
			
 
				-  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros)
			
 
				-  {
			
 
				-    return 83; /*alloc fail*/
			
 
				-  }
			
 
				-
			
 
				-  /*initialize hash table*/
			
 
				-  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
			
 
				-  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
			
 
				-  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
			
 
				-
			
 
				-  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
			
 
				-  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-static void hash_cleanup(Hash* hash)
			
 
				-{
			
 
				-  lodepng_free(hash->head);
			
 
				-  lodepng_free(hash->val);
			
 
				-  lodepng_free(hash->chain);
			
 
				-
			
 
				-  lodepng_free(hash->zeros);
			
 
				-  lodepng_free(hash->headz);
			
 
				-  lodepng_free(hash->chainz);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-
			
 
				-static unsigned getHash(const unsigned char* data, size_t size, size_t pos)
			
 
				-{
			
 
				-  unsigned result = 0;
			
 
				-  if(pos + 2 < size)
			
 
				-  {
			
 
				-    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
			
 
				-    by zeroes due to the filters, a better hash does not have a significant
			
 
				-    effect on speed in traversing the chain, and causes more time spend on
			
 
				-    calculating the hash.*/
			
 
				-    result ^= (unsigned)(data[pos + 0] << 0u);
			
 
				-    result ^= (unsigned)(data[pos + 1] << 4u);
			
 
				-    result ^= (unsigned)(data[pos + 2] << 8u);
			
 
				-  } else {
			
 
				-    size_t amount, i;
			
 
				-    if(pos >= size) return 0;
			
 
				-    amount = size - pos;
			
 
				-    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
			
 
				-  }
			
 
				-  return result & HASH_BIT_MASK;
			
 
				-}
			
 
				-
			
 
				-static unsigned countZeros(const unsigned char* data, size_t size, size_t pos)
			
 
				-{
			
 
				-  const unsigned char* start = data + pos;
			
 
				-  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
			
 
				-  if(end > data + size) end = data + size;
			
 
				-  data = start;
			
 
				-  while(data != end && *data == 0) ++data;
			
 
				-  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
			
 
				-  return (unsigned)(data - start);
			
 
				-}
			
 
				-
			
 
				-/*wpos = pos & (windowsize - 1)*/
			
 
				-static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros)
			
 
				-{
			
 
				-  hash->val[wpos] = (int)hashval;
			
 
				-  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
			
 
				-  hash->head[hashval] = wpos;
			
 
				-
			
 
				-  hash->zeros[wpos] = numzeros;
			
 
				-  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
			
 
				-  hash->headz[numzeros] = wpos;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-LZ77-encode the data. Return value is error code. The input are raw bytes, the output
			
 
				-is in the form of unsigned integers with codes representing for example literal bytes, or
			
 
				-length/distance pairs.
			
 
				-It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
			
 
				-sliding window (of windowsize) is used, and all past bytes in that window can be used as
			
 
				-the "dictionary". A brute force search through all possible distances would be slow, and
			
 
				-this hash technique is one out of several ways to speed this up.
			
 
				-*/
			
 
				-static unsigned encodeLZ77(uivector* out, Hash* hash,
			
 
				-                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
			
 
				-                           unsigned minmatch, unsigned nicematch, unsigned lazymatching)
			
 
				-{
			
 
				-  size_t pos;
			
 
				-  unsigned i, error = 0;
			
 
				-  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
			
 
				-  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8;
			
 
				-  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
			
 
				-
			
 
				-  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
			
 
				-  unsigned numzeros = 0;
			
 
				-
			
 
				-  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
			
 
				-  unsigned length;
			
 
				-  unsigned lazy = 0;
			
 
				-  unsigned lazylength = 0, lazyoffset = 0;
			
 
				-  unsigned hashval;
			
 
				-  unsigned current_offset, current_length;
			
 
				-  unsigned prev_offset;
			
 
				-  const unsigned char *lastptr, *foreptr, *backptr;
			
 
				-  unsigned hashpos;
			
 
				-
			
 
				-  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
			
 
				-  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
			
 
				-
			
 
				-  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
			
 
				-
			
 
				-  for(pos = inpos; pos < insize; ++pos)
			
 
				-  {
			
 
				-    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
			
 
				-    unsigned chainlength = 0;
			
 
				-
			
 
				-    hashval = getHash(in, insize, pos);
			
 
				-
			
 
				-    if(usezeros && hashval == 0)
			
 
				-    {
			
 
				-      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
			
 
				-      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      numzeros = 0;
			
 
				-    }
			
 
				-
			
 
				-    updateHashChain(hash, wpos, hashval, numzeros);
			
 
				-
			
 
				-    /*the length and offset found for the current position*/
			
 
				-    length = 0;
			
 
				-    offset = 0;
			
 
				-
			
 
				-    hashpos = hash->chain[wpos];
			
 
				-
			
 
				-    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
			
 
				-
			
 
				-    /*search for the longest string*/
			
 
				-    prev_offset = 0;
			
 
				-    for(;;)
			
 
				-    {
			
 
				-      if(chainlength++ >= maxchainlength) break;
			
 
				-      current_offset = hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize;
			
 
				-
			
 
				-      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
			
 
				-      prev_offset = current_offset;
			
 
				-      if(current_offset > 0)
			
 
				-      {
			
 
				-        /*test the next characters*/
			
 
				-        foreptr = &in[pos];
			
 
				-        backptr = &in[pos - current_offset];
			
 
				-
			
 
				-        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
			
 
				-        if(numzeros >= 3)
			
 
				-        {
			
 
				-          unsigned skip = hash->zeros[hashpos];
			
 
				-          if(skip > numzeros) skip = numzeros;
			
 
				-          backptr += skip;
			
 
				-          foreptr += skip;
			
 
				-        }
			
 
				-
			
 
				-        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/
			
 
				-        {
			
 
				-          ++backptr;
			
 
				-          ++foreptr;
			
 
				-        }
			
 
				-        current_length = (unsigned)(foreptr - &in[pos]);
			
 
				-
			
 
				-        if(current_length > length)
			
 
				-        {
			
 
				-          length = current_length; /*the longest length*/
			
 
				-          offset = current_offset; /*the offset that is related to this longest length*/
			
 
				-          /*jump out once a length of max length is found (speed gain). This also jumps
			
 
				-          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
			
 
				-          if(current_length >= nicematch) break;
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      if(hashpos == hash->chain[hashpos]) break;
			
 
				-
			
 
				-      if(numzeros >= 3 && length > numzeros)
			
 
				-      {
			
 
				-        hashpos = hash->chainz[hashpos];
			
 
				-        if(hash->zeros[hashpos] != numzeros) break;
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        hashpos = hash->chain[hashpos];
			
 
				-        /*outdated hash value, happens if particular value was not encountered in whole last window*/
			
 
				-        if(hash->val[hashpos] != (int)hashval) break;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    if(lazymatching)
			
 
				-    {
			
 
				-      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH)
			
 
				-      {
			
 
				-        lazy = 1;
			
 
				-        lazylength = length;
			
 
				-        lazyoffset = offset;
			
 
				-        continue; /*try the next byte*/
			
 
				-      }
			
 
				-      if(lazy)
			
 
				-      {
			
 
				-        lazy = 0;
			
 
				-        if(pos == 0) ERROR_BREAK(81);
			
 
				-        if(length > lazylength + 1)
			
 
				-        {
			
 
				-          /*push the previous character as literal*/
			
 
				-          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-        }
			
 
				-        else
			
 
				-        {
			
 
				-          length = lazylength;
			
 
				-          offset = lazyoffset;
			
 
				-          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
			
 
				-          hash->headz[numzeros] = -1; /*idem*/
			
 
				-          --pos;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
			
 
				-
			
 
				-    /*encode it as length/distance pair or literal value*/
			
 
				-    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/
			
 
				-    {
			
 
				-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    }
			
 
				-    else if(length < minmatch || (length == 3 && offset > 4096))
			
 
				-    {
			
 
				-      /*compensate for the fact that longer offsets have more extra bits, a
			
 
				-      length of only 3 may be not worth it then*/
			
 
				-      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      addLengthDistance(out, length, offset);
			
 
				-      for(i = 1; i < length; ++i)
			
 
				-      {
			
 
				-        ++pos;
			
 
				-        wpos = pos & (windowsize - 1);
			
 
				-        hashval = getHash(in, insize, pos);
			
 
				-        if(usezeros && hashval == 0)
			
 
				-        {
			
 
				-          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
			
 
				-          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
			
 
				-        }
			
 
				-        else
			
 
				-        {
			
 
				-          numzeros = 0;
			
 
				-        }
			
 
				-        updateHashChain(hash, wpos, hashval, numzeros);
			
 
				-      }
			
 
				-    }
			
 
				-  } /*end of the loop through each character of input*/
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/* /////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize)
			
 
				-{
			
 
				-  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
			
 
				-  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
			
 
				-
			
 
				-  size_t i, j, numdeflateblocks = (datasize + 65534) / 65535;
			
 
				-  unsigned datapos = 0;
			
 
				-  for(i = 0; i != numdeflateblocks; ++i)
			
 
				-  {
			
 
				-    unsigned BFINAL, BTYPE, LEN, NLEN;
			
 
				-    unsigned char firstbyte;
			
 
				-
			
 
				-    BFINAL = (i == numdeflateblocks - 1);
			
 
				-    BTYPE = 0;
			
 
				-
			
 
				-    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
			
 
				-    ucvector_push_back(out, firstbyte);
			
 
				-
			
 
				-    LEN = 65535;
			
 
				-    if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos;
			
 
				-    NLEN = 65535 - LEN;
			
 
				-
			
 
				-    ucvector_push_back(out, (unsigned char)(LEN & 255));
			
 
				-    ucvector_push_back(out, (unsigned char)(LEN >> 8));
			
 
				-    ucvector_push_back(out, (unsigned char)(NLEN & 255));
			
 
				-    ucvector_push_back(out, (unsigned char)(NLEN >> 8));
			
 
				-
			
 
				-    /*Decompressed data*/
			
 
				-    for(j = 0; j < 65535 && datapos < datasize; ++j)
			
 
				-    {
			
 
				-      ucvector_push_back(out, data[datapos++]);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
			
 
				-tree_ll: the tree for lit and len codes.
			
 
				-tree_d: the tree for distance codes.
			
 
				-*/
			
 
				-static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded,
			
 
				-                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d)
			
 
				-{
			
 
				-  size_t i = 0;
			
 
				-  for(i = 0; i != lz77_encoded->size; ++i)
			
 
				-  {
			
 
				-    unsigned val = lz77_encoded->data[i];
			
 
				-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
			
 
				-    if(val > 256) /*for a length code, 3 more things have to be added*/
			
 
				-    {
			
 
				-      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
			
 
				-      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
			
 
				-      unsigned length_extra_bits = lz77_encoded->data[++i];
			
 
				-
			
 
				-      unsigned distance_code = lz77_encoded->data[++i];
			
 
				-
			
 
				-      unsigned distance_index = distance_code;
			
 
				-      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
			
 
				-      unsigned distance_extra_bits = lz77_encoded->data[++i];
			
 
				-
			
 
				-      addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits);
			
 
				-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code),
			
 
				-                       HuffmanTree_getLength(tree_d, distance_code));
			
 
				-      addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
			
 
				-static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash,
			
 
				-                               const unsigned char* data, size_t datapos, size_t dataend,
			
 
				-                               const LodePNGCompressSettings* settings, unsigned final)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-
			
 
				-  /*
			
 
				-  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
			
 
				-  literal bytes and length/distance pairs. This is then huffman compressed with
			
 
				-  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
			
 
				-  another huffman tree is used for the dist values ("d"). These two trees are
			
 
				-  stored using their code lengths, and to compress even more these code lengths
			
 
				-  are also run-length encoded and huffman compressed. This gives a huffman tree
			
 
				-  of code lengths "cl". The code lenghts used to describe this third tree are
			
 
				-  the code length code lengths ("clcl").
			
 
				-  */
			
 
				-
			
 
				-  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
			
 
				-  uivector lz77_encoded;
			
 
				-  HuffmanTree tree_ll; /*tree for lit,len values*/
			
 
				-  HuffmanTree tree_d; /*tree for distance codes*/
			
 
				-  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
			
 
				-  uivector frequencies_ll; /*frequency of lit,len codes*/
			
 
				-  uivector frequencies_d; /*frequency of dist codes*/
			
 
				-  uivector frequencies_cl; /*frequency of code length codes*/
			
 
				-  uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/
			
 
				-  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/
			
 
				-  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
			
 
				-  (these are written as is in the file, it would be crazy to compress these using yet another huffman
			
 
				-  tree that needs to be represented by yet another set of code lengths)*/
			
 
				-  uivector bitlen_cl;
			
 
				-  size_t datasize = dataend - datapos;
			
 
				-
			
 
				-  /*
			
 
				-  Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
			
 
				-  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
			
 
				-  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
			
 
				-  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
			
 
				-  */
			
 
				-
			
 
				-  unsigned BFINAL = final;
			
 
				-  size_t numcodes_ll, numcodes_d, i;
			
 
				-  unsigned HLIT, HDIST, HCLEN;
			
 
				-
			
 
				-  uivector_init(&lz77_encoded);
			
 
				-  HuffmanTree_init(&tree_ll);
			
 
				-  HuffmanTree_init(&tree_d);
			
 
				-  HuffmanTree_init(&tree_cl);
			
 
				-  uivector_init(&frequencies_ll);
			
 
				-  uivector_init(&frequencies_d);
			
 
				-  uivector_init(&frequencies_cl);
			
 
				-  uivector_init(&bitlen_lld);
			
 
				-  uivector_init(&bitlen_lld_e);
			
 
				-  uivector_init(&bitlen_cl);
			
 
				-
			
 
				-  /*This while loop never loops due to a break at the end, it is here to
			
 
				-  allow breaking out of it to the cleanup phase on error conditions.*/
			
 
				-  while(!error)
			
 
				-  {
			
 
				-    if(settings->use_lz77)
			
 
				-    {
			
 
				-      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
			
 
				-                         settings->minmatch, settings->nicematch, settings->lazymatching);
			
 
				-      if(error) break;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
			
 
				-    }
			
 
				-
			
 
				-    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-
			
 
				-    /*Count the frequencies of lit, len and dist codes*/
			
 
				-    for(i = 0; i != lz77_encoded.size; ++i)
			
 
				-    {
			
 
				-      unsigned symbol = lz77_encoded.data[i];
			
 
				-      ++frequencies_ll.data[symbol];
			
 
				-      if(symbol > 256)
			
 
				-      {
			
 
				-        unsigned dist = lz77_encoded.data[i + 2];
			
 
				-        ++frequencies_d.data[dist];
			
 
				-        i += 3;
			
 
				-      }
			
 
				-    }
			
 
				-    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
			
 
				-
			
 
				-    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
			
 
				-    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
			
 
				-    if(error) break;
			
 
				-    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
			
 
				-    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
			
 
				-    if(error) break;
			
 
				-
			
 
				-    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
			
 
				-    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
			
 
				-    /*store the code lengths of both generated trees in bitlen_lld*/
			
 
				-    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
			
 
				-    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
			
 
				-
			
 
				-    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
			
 
				-    17 (3-10 zeroes), 18 (11-138 zeroes)*/
			
 
				-    for(i = 0; i != (unsigned)bitlen_lld.size; ++i)
			
 
				-    {
			
 
				-      unsigned j = 0; /*amount of repititions*/
			
 
				-      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
			
 
				-
			
 
				-      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/
			
 
				-      {
			
 
				-        ++j; /*include the first zero*/
			
 
				-        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/
			
 
				-        {
			
 
				-          uivector_push_back(&bitlen_lld_e, 17);
			
 
				-          uivector_push_back(&bitlen_lld_e, j - 3);
			
 
				-        }
			
 
				-        else /*repeat code 18 supports max 138 zeroes*/
			
 
				-        {
			
 
				-          if(j > 138) j = 138;
			
 
				-          uivector_push_back(&bitlen_lld_e, 18);
			
 
				-          uivector_push_back(&bitlen_lld_e, j - 11);
			
 
				-        }
			
 
				-        i += (j - 1);
			
 
				-      }
			
 
				-      else if(j >= 3) /*repeat code for value other than zero*/
			
 
				-      {
			
 
				-        size_t k;
			
 
				-        unsigned num = j / 6, rest = j % 6;
			
 
				-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
			
 
				-        for(k = 0; k < num; ++k)
			
 
				-        {
			
 
				-          uivector_push_back(&bitlen_lld_e, 16);
			
 
				-          uivector_push_back(&bitlen_lld_e, 6 - 3);
			
 
				-        }
			
 
				-        if(rest >= 3)
			
 
				-        {
			
 
				-          uivector_push_back(&bitlen_lld_e, 16);
			
 
				-          uivector_push_back(&bitlen_lld_e, rest - 3);
			
 
				-        }
			
 
				-        else j -= rest;
			
 
				-        i += j;
			
 
				-      }
			
 
				-      else /*too short to benefit from repeat code*/
			
 
				-      {
			
 
				-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    /*generate tree_cl, the huffmantree of huffmantrees*/
			
 
				-
			
 
				-    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    for(i = 0; i != bitlen_lld_e.size; ++i)
			
 
				-    {
			
 
				-      ++frequencies_cl.data[bitlen_lld_e.data[i]];
			
 
				-      /*after a repeat code come the bits that specify the number of repetitions,
			
 
				-      those don't need to be in the frequencies_cl calculation*/
			
 
				-      if(bitlen_lld_e.data[i] >= 16) ++i;
			
 
				-    }
			
 
				-
			
 
				-    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
			
 
				-                                            frequencies_cl.size, frequencies_cl.size, 7);
			
 
				-    if(error) break;
			
 
				-
			
 
				-    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    for(i = 0; i != tree_cl.numcodes; ++i)
			
 
				-    {
			
 
				-      /*lenghts of code length tree is in the order as specified by deflate*/
			
 
				-      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
			
 
				-    }
			
 
				-    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4)
			
 
				-    {
			
 
				-      /*remove zeros at the end, but minimum size must be 4*/
			
 
				-      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
			
 
				-    }
			
 
				-    if(error) break;
			
 
				-
			
 
				-    /*
			
 
				-    Write everything into the output
			
 
				-
			
 
				-    After the BFINAL and BTYPE, the dynamic block consists out of the following:
			
 
				-    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
			
 
				-    - (HCLEN+4)*3 bits code lengths of code length alphabet
			
 
				-    - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
			
 
				-      alphabet, + possible repetition codes 16, 17, 18)
			
 
				-    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
			
 
				-      alphabet, + possible repetition codes 16, 17, 18)
			
 
				-    - compressed data
			
 
				-    - 256 (end code)
			
 
				-    */
			
 
				-
			
 
				-    /*Write block type*/
			
 
				-    addBitToStream(bp, out, BFINAL);
			
 
				-    addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/
			
 
				-    addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/
			
 
				-
			
 
				-    /*write the HLIT, HDIST and HCLEN values*/
			
 
				-    HLIT = (unsigned)(numcodes_ll - 257);
			
 
				-    HDIST = (unsigned)(numcodes_d - 1);
			
 
				-    HCLEN = (unsigned)bitlen_cl.size - 4;
			
 
				-    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
			
 
				-    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
			
 
				-    addBitsToStream(bp, out, HLIT, 5);
			
 
				-    addBitsToStream(bp, out, HDIST, 5);
			
 
				-    addBitsToStream(bp, out, HCLEN, 4);
			
 
				-
			
 
				-    /*write the code lenghts of the code length alphabet*/
			
 
				-    for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3);
			
 
				-
			
 
				-    /*write the lenghts of the lit/len AND the dist alphabet*/
			
 
				-    for(i = 0; i != bitlen_lld_e.size; ++i)
			
 
				-    {
			
 
				-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
			
 
				-                       HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
			
 
				-      /*extra bits of repeat codes*/
			
 
				-      if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2);
			
 
				-      else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3);
			
 
				-      else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7);
			
 
				-    }
			
 
				-
			
 
				-    /*write the compressed data symbols*/
			
 
				-    writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
			
 
				-    /*error: the length of the end code 256 must be larger than 0*/
			
 
				-    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
			
 
				-
			
 
				-    /*write the end code*/
			
 
				-    addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
			
 
				-
			
 
				-    break; /*end of error-while*/
			
 
				-  }
			
 
				-
			
 
				-  /*cleanup*/
			
 
				-  uivector_cleanup(&lz77_encoded);
			
 
				-  HuffmanTree_cleanup(&tree_ll);
			
 
				-  HuffmanTree_cleanup(&tree_d);
			
 
				-  HuffmanTree_cleanup(&tree_cl);
			
 
				-  uivector_cleanup(&frequencies_ll);
			
 
				-  uivector_cleanup(&frequencies_d);
			
 
				-  uivector_cleanup(&frequencies_cl);
			
 
				-  uivector_cleanup(&bitlen_lld_e);
			
 
				-  uivector_cleanup(&bitlen_lld);
			
 
				-  uivector_cleanup(&bitlen_cl);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash,
			
 
				-                             const unsigned char* data,
			
 
				-                             size_t datapos, size_t dataend,
			
 
				-                             const LodePNGCompressSettings* settings, unsigned final)
			
 
				-{
			
 
				-  HuffmanTree tree_ll; /*tree for literal values and length codes*/
			
 
				-  HuffmanTree tree_d; /*tree for distance codes*/
			
 
				-
			
 
				-  unsigned BFINAL = final;
			
 
				-  unsigned error = 0;
			
 
				-  size_t i;
			
 
				-
			
 
				-  HuffmanTree_init(&tree_ll);
			
 
				-  HuffmanTree_init(&tree_d);
			
 
				-
			
 
				-  generateFixedLitLenTree(&tree_ll);
			
 
				-  generateFixedDistanceTree(&tree_d);
			
 
				-
			
 
				-  addBitToStream(bp, out, BFINAL);
			
 
				-  addBitToStream(bp, out, 1); /*first bit of BTYPE*/
			
 
				-  addBitToStream(bp, out, 0); /*second bit of BTYPE*/
			
 
				-
			
 
				-  if(settings->use_lz77) /*LZ77 encoded*/
			
 
				-  {
			
 
				-    uivector lz77_encoded;
			
 
				-    uivector_init(&lz77_encoded);
			
 
				-    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
			
 
				-                       settings->minmatch, settings->nicematch, settings->lazymatching);
			
 
				-    if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
			
 
				-    uivector_cleanup(&lz77_encoded);
			
 
				-  }
			
 
				-  else /*no LZ77, but still will be Huffman compressed*/
			
 
				-  {
			
 
				-    for(i = datapos; i < dataend; ++i)
			
 
				-    {
			
 
				-      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
			
 
				-    }
			
 
				-  }
			
 
				-  /*add END code*/
			
 
				-  if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
			
 
				-
			
 
				-  /*cleanup*/
			
 
				-  HuffmanTree_cleanup(&tree_ll);
			
 
				-  HuffmanTree_cleanup(&tree_d);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
			
 
				-                                 const LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  size_t i, blocksize, numdeflateblocks;
			
 
				-  size_t bp = 0; /*the bit pointer*/
			
 
				-  Hash hash;
			
 
				-
			
 
				-  if(settings->btype > 2) return 61;
			
 
				-  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
			
 
				-  else if(settings->btype == 1) blocksize = insize;
			
 
				-  else /*if(settings->btype == 2)*/
			
 
				-  {
			
 
				-    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
			
 
				-    blocksize = insize / 8 + 8;
			
 
				-    if(blocksize < 65536) blocksize = 65536;
			
 
				-    if(blocksize > 262144) blocksize = 262144;
			
 
				-  }
			
 
				-
			
 
				-  numdeflateblocks = (insize + blocksize - 1) / blocksize;
			
 
				-  if(numdeflateblocks == 0) numdeflateblocks = 1;
			
 
				-
			
 
				-  error = hash_init(&hash, settings->windowsize);
			
 
				-  if(error) return error;
			
 
				-
			
 
				-  for(i = 0; i != numdeflateblocks && !error; ++i)
			
 
				-  {
			
 
				-    unsigned final = (i == numdeflateblocks - 1);
			
 
				-    size_t start = i * blocksize;
			
 
				-    size_t end = start + blocksize;
			
 
				-    if(end > insize) end = insize;
			
 
				-
			
 
				-    if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final);
			
 
				-    else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final);
			
 
				-  }
			
 
				-
			
 
				-  hash_cleanup(&hash);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
			
 
				-                         const unsigned char* in, size_t insize,
			
 
				-                         const LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  unsigned error;
			
 
				-  ucvector v;
			
 
				-  ucvector_init_buffer(&v, *out, *outsize);
			
 
				-  error = lodepng_deflatev(&v, in, insize, settings);
			
 
				-  *out = v.data;
			
 
				-  *outsize = v.size;
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned deflate(unsigned char** out, size_t* outsize,
			
 
				-                        const unsigned char* in, size_t insize,
			
 
				-                        const LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  if(settings->custom_deflate)
			
 
				-  {
			
 
				-    return settings->custom_deflate(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    return lodepng_deflate(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Adler32                                                                  */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len)
			
 
				-{
			
 
				-   unsigned s1 = adler & 0xffff;
			
 
				-   unsigned s2 = (adler >> 16) & 0xffff;
			
 
				-
			
 
				-  while(len > 0)
			
 
				-  {
			
 
				-    /*at least 5550 sums can be done before the sums overflow, saving a lot of module divisions*/
			
 
				-    unsigned amount = len > 5550 ? 5550 : len;
			
 
				-    len -= amount;
			
 
				-    while(amount > 0)
			
 
				-    {
			
 
				-      s1 += (*data++);
			
 
				-      s2 += s1;
			
 
				-      --amount;
			
 
				-    }
			
 
				-    s1 %= 65521;
			
 
				-    s2 %= 65521;
			
 
				-  }
			
 
				-
			
 
				-  return (s2 << 16) | s1;
			
 
				-}
			
 
				-
			
 
				-/*Return the adler32 of the bytes data[0..len-1]*/
			
 
				-static unsigned adler32(const unsigned char* data, unsigned len)
			
 
				-{
			
 
				-  return update_adler32(1L, data, len);
			
 
				-}
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Zlib                                                                   / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                                 size_t insize, const LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  unsigned CM, CINFO, FDICT;
			
 
				-
			
 
				-  if(insize < 2) return 53; /*error, size of zlib data too small*/
			
 
				-  /*read information from zlib header*/
			
 
				-  if((in[0] * 256 + in[1]) % 31 != 0)
			
 
				-  {
			
 
				-    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
			
 
				-    return 24;
			
 
				-  }
			
 
				-
			
 
				-  CM = in[0] & 15;
			
 
				-  CINFO = (in[0] >> 4) & 15;
			
 
				-  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
			
 
				-  FDICT = (in[1] >> 5) & 1;
			
 
				-  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
			
 
				-
			
 
				-  if(CM != 8 || CINFO > 7)
			
 
				-  {
			
 
				-    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
			
 
				-    return 25;
			
 
				-  }
			
 
				-  if(FDICT != 0)
			
 
				-  {
			
 
				-    /*error: the specification of PNG says about the zlib stream:
			
 
				-      "The additional flags shall not specify a preset dictionary."*/
			
 
				-    return 26;
			
 
				-  }
			
 
				-
			
 
				-  error = inflate(out, outsize, in + 2, insize - 2, settings);
			
 
				-  if(error) return error;
			
 
				-
			
 
				-  if(!settings->ignore_adler32)
			
 
				-  {
			
 
				-    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
			
 
				-    unsigned checksum = adler32(*out, (unsigned)(*outsize));
			
 
				-    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
			
 
				-  }
			
 
				-
			
 
				-  return 0; /*no error*/
			
 
				-}
			
 
				-
			
 
				-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                                size_t insize, const LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  if(settings->custom_zlib)
			
 
				-  {
			
 
				-    return settings->custom_zlib(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-
			
 
				-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                               size_t insize, const LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  /*initially, *out must be NULL and outsize 0, if you just give some random *out
			
 
				-  that's pointing to a non allocated buffer, this'll crash*/
			
 
				-  ucvector outv;
			
 
				-  size_t i;
			
 
				-  unsigned error;
			
 
				-  unsigned char* deflatedata = 0;
			
 
				-  size_t deflatesize = 0;
			
 
				-
			
 
				-  /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
			
 
				-  unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
			
 
				-  unsigned FLEVEL = 0;
			
 
				-  unsigned FDICT = 0;
			
 
				-  unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
			
 
				-  unsigned FCHECK = 31 - CMFFLG % 31;
			
 
				-  CMFFLG += FCHECK;
			
 
				-
			
 
				-  /*ucvector-controlled version of the output buffer, for dynamic array*/
			
 
				-  ucvector_init_buffer(&outv, *out, *outsize);
			
 
				-
			
 
				-  ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8));
			
 
				-  ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255));
			
 
				-
			
 
				-  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
			
 
				-
			
 
				-  if(!error)
			
 
				-  {
			
 
				-    unsigned ADLER32 = adler32(in, (unsigned)insize);
			
 
				-    for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]);
			
 
				-    lodepng_free(deflatedata);
			
 
				-    lodepng_add32bitInt(&outv, ADLER32);
			
 
				-  }
			
 
				-
			
 
				-  *out = outv.data;
			
 
				-  *outsize = outv.size;
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/* compress using the default or custom zlib function */
			
 
				-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                              size_t insize, const LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  if(settings->custom_zlib)
			
 
				-  {
			
 
				-    return settings->custom_zlib(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    return lodepng_zlib_compress(out, outsize, in, insize, settings);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-#else /*no LODEPNG_COMPILE_ZLIB*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                                size_t insize, const LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
			
 
				-  return settings->custom_zlib(out, outsize, in, insize, settings);
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                              size_t insize, const LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
			
 
				-  return settings->custom_zlib(out, outsize, in, insize, settings);
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_ZLIB*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-
			
 
				-/*this is a good tradeoff between speed and compression ratio*/
			
 
				-#define DEFAULT_WINDOWSIZE 2048
			
 
				-
			
 
				-void lodepng_compress_settings_init(LodePNGCompressSettings* settings)
			
 
				-{
			
 
				-  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
			
 
				-  settings->btype = 2;
			
 
				-  settings->use_lz77 = 1;
			
 
				-  settings->windowsize = DEFAULT_WINDOWSIZE;
			
 
				-  settings->minmatch = 3;
			
 
				-  settings->nicematch = 128;
			
 
				-  settings->lazymatching = 1;
			
 
				-
			
 
				-  settings->custom_zlib = 0;
			
 
				-  settings->custom_deflate = 0;
			
 
				-  settings->custom_context = 0;
			
 
				-}
			
 
				-
			
 
				-const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
			
 
				-
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings)
			
 
				-{
			
 
				-  settings->ignore_adler32 = 0;
			
 
				-
			
 
				-  settings->custom_zlib = 0;
			
 
				-  settings->custom_inflate = 0;
			
 
				-  settings->custom_context = 0;
			
 
				-}
			
 
				-
			
 
				-const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0};
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* // End of Zlib related code. Begin of PNG related code.                 // */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / CRC32                                                                  / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-
			
 
				-#ifndef LODEPNG_NO_COMPILE_CRC
			
 
				-/* CRC polynomial: 0xedb88320 */
			
 
				-static unsigned lodepng_crc32_table[256] = {
			
 
				-           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
			
 
				-   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
			
 
				-   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
			
 
				-   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
			
 
				-   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
			
 
				-   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
			
 
				-   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
			
 
				-   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
			
 
				-  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
			
 
				-  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
			
 
				-  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
			
 
				-  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
			
 
				-  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
			
 
				-  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
			
 
				-  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
			
 
				-  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
			
 
				-  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
			
 
				-  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
			
 
				-  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
			
 
				-  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
			
 
				-  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
			
 
				-  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
			
 
				-  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
			
 
				-  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
			
 
				-  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
			
 
				-  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
			
 
				-  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
			
 
				-  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
			
 
				-  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
			
 
				-  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
			
 
				-  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
			
 
				-  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
			
 
				-};
			
 
				-
			
 
				-/*Return the CRC of the bytes buf[0..len-1].*/
			
 
				-unsigned lodepng_crc32(const unsigned char* data, size_t length)
			
 
				-{
			
 
				-  unsigned r = 0xffffffffu;
			
 
				-  size_t i;
			
 
				-  for(i = 0; i < length; ++i)
			
 
				-  {
			
 
				-    r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8);
			
 
				-  }
			
 
				-  return r ^ 0xffffffffu;
			
 
				-}
			
 
				-#else /* !LODEPNG_NO_COMPILE_CRC */
			
 
				-unsigned lodepng_crc32(const unsigned char* data, size_t length);
			
 
				-#endif /* !LODEPNG_NO_COMPILE_CRC */
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Reading and writing single bits and bytes from/to stream for LodePNG   / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream)
			
 
				-{
			
 
				-  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
			
 
				-  ++(*bitpointer);
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
			
 
				-{
			
 
				-  unsigned result = 0;
			
 
				-  size_t i;
			
 
				-  for(i = 0 ; i < nbits; ++i)
			
 
				-  {
			
 
				-    result <<= 1;
			
 
				-    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
			
 
				-  }
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
			
 
				-{
			
 
				-  /*the current bit in bitstream must be 0 for this to work*/
			
 
				-  if(bit)
			
 
				-  {
			
 
				-    /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/
			
 
				-    bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7)));
			
 
				-  }
			
 
				-  ++(*bitpointer);
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
			
 
				-{
			
 
				-  /*the current bit in bitstream may be 0 or 1 for this to work*/
			
 
				-  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
			
 
				-  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
			
 
				-  ++(*bitpointer);
			
 
				-}
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / PNG chunks                                                             / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-unsigned lodepng_chunk_length(const unsigned char* chunk)
			
 
				-{
			
 
				-  return lodepng_read32bitInt(&chunk[0]);
			
 
				-}
			
 
				-
			
 
				-void lodepng_chunk_type(char type[5], const unsigned char* chunk)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
			
 
				-  type[4] = 0; /*null termination char*/
			
 
				-}
			
 
				-
			
 
				-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type)
			
 
				-{
			
 
				-  if(strlen(type) != 4) return 0;
			
 
				-  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
			
 
				-}
			
 
				-
			
 
				-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk)
			
 
				-{
			
 
				-  return((chunk[4] & 32) != 0);
			
 
				-}
			
 
				-
			
 
				-unsigned char lodepng_chunk_private(const unsigned char* chunk)
			
 
				-{
			
 
				-  return((chunk[6] & 32) != 0);
			
 
				-}
			
 
				-
			
 
				-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk)
			
 
				-{
			
 
				-  return((chunk[7] & 32) != 0);
			
 
				-}
			
 
				-
			
 
				-unsigned char* lodepng_chunk_data(unsigned char* chunk)
			
 
				-{
			
 
				-  return &chunk[8];
			
 
				-}
			
 
				-
			
 
				-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk)
			
 
				-{
			
 
				-  return &chunk[8];
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_chunk_check_crc(const unsigned char* chunk)
			
 
				-{
			
 
				-  unsigned length = lodepng_chunk_length(chunk);
			
 
				-  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
			
 
				-  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
			
 
				-  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
			
 
				-  if(CRC != checksum) return 1;
			
 
				-  else return 0;
			
 
				-}
			
 
				-
			
 
				-void lodepng_chunk_generate_crc(unsigned char* chunk)
			
 
				-{
			
 
				-  unsigned length = lodepng_chunk_length(chunk);
			
 
				-  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
			
 
				-  lodepng_set32bitInt(chunk + 8 + length, CRC);
			
 
				-}
			
 
				-
			
 
				-unsigned char* lodepng_chunk_next(unsigned char* chunk)
			
 
				-{
			
 
				-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
			
 
				-  return &chunk[total_chunk_length];
			
 
				-}
			
 
				-
			
 
				-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk)
			
 
				-{
			
 
				-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
			
 
				-  return &chunk[total_chunk_length];
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
			
 
				-  unsigned char *chunk_start, *new_buffer;
			
 
				-  size_t new_length = (*outlength) + total_chunk_length;
			
 
				-  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
			
 
				-
			
 
				-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
			
 
				-  if(!new_buffer) return 83; /*alloc fail*/
			
 
				-  (*out) = new_buffer;
			
 
				-  (*outlength) = new_length;
			
 
				-  chunk_start = &(*out)[new_length - total_chunk_length];
			
 
				-
			
 
				-  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
			
 
				-                              const char* type, const unsigned char* data)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  unsigned char *chunk, *new_buffer;
			
 
				-  size_t new_length = (*outlength) + length + 12;
			
 
				-  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
			
 
				-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
			
 
				-  if(!new_buffer) return 83; /*alloc fail*/
			
 
				-  (*out) = new_buffer;
			
 
				-  (*outlength) = new_length;
			
 
				-  chunk = &(*out)[(*outlength) - length - 12];
			
 
				-
			
 
				-  /*1: length*/
			
 
				-  lodepng_set32bitInt(chunk, (unsigned)length);
			
 
				-
			
 
				-  /*2: chunk name (4 letters)*/
			
 
				-  chunk[4] = (unsigned char)type[0];
			
 
				-  chunk[5] = (unsigned char)type[1];
			
 
				-  chunk[6] = (unsigned char)type[2];
			
 
				-  chunk[7] = (unsigned char)type[3];
			
 
				-
			
 
				-  /*3: the data*/
			
 
				-  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
			
 
				-
			
 
				-  /*4: CRC (of the chunkname characters and the data)*/
			
 
				-  lodepng_chunk_generate_crc(chunk);
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / Color types and such                                                   / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*return type is a LodePNG error code*/
			
 
				-static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/
			
 
				-{
			
 
				-  switch(colortype)
			
 
				-  {
			
 
				-    case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/
			
 
				-    case 2: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGB*/
			
 
				-    case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break; /*palette*/
			
 
				-    case 4: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/
			
 
				-    case 6: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGBA*/
			
 
				-    default: return 31;
			
 
				-  }
			
 
				-  return 0; /*allowed color type / bits combination*/
			
 
				-}
			
 
				-
			
 
				-static unsigned getNumColorChannels(LodePNGColorType colortype)
			
 
				-{
			
 
				-  switch(colortype)
			
 
				-  {
			
 
				-    case 0: return 1; /*grey*/
			
 
				-    case 2: return 3; /*RGB*/
			
 
				-    case 3: return 1; /*palette*/
			
 
				-    case 4: return 2; /*grey + alpha*/
			
 
				-    case 6: return 4; /*RGBA*/
			
 
				-  }
			
 
				-  return 0; /*unexisting color type*/
			
 
				-}
			
 
				-
			
 
				-static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  /*bits per pixel is amount of channels * bits per channel*/
			
 
				-  return getNumColorChannels(colortype) * bitdepth;
			
 
				-}
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-void lodepng_color_mode_init(LodePNGColorMode* info)
			
 
				-{
			
 
				-  info->key_defined = 0;
			
 
				-  info->key_r = info->key_g = info->key_b = 0;
			
 
				-  info->colortype = LCT_RGBA;
			
 
				-  info->bitdepth = 8;
			
 
				-  info->palette = 0;
			
 
				-  info->palettesize = 0;
			
 
				-}
			
 
				-
			
 
				-void lodepng_color_mode_cleanup(LodePNGColorMode* info)
			
 
				-{
			
 
				-  lodepng_palette_clear(info);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  lodepng_color_mode_cleanup(dest);
			
 
				-  *dest = *source;
			
 
				-  if(source->palette)
			
 
				-  {
			
 
				-    dest->palette = (unsigned char*)lodepng_malloc(1024);
			
 
				-    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
			
 
				-    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
			
 
				-  }
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  if(a->colortype != b->colortype) return 0;
			
 
				-  if(a->bitdepth != b->bitdepth) return 0;
			
 
				-  if(a->key_defined != b->key_defined) return 0;
			
 
				-  if(a->key_defined)
			
 
				-  {
			
 
				-    if(a->key_r != b->key_r) return 0;
			
 
				-    if(a->key_g != b->key_g) return 0;
			
 
				-    if(a->key_b != b->key_b) return 0;
			
 
				-  }
			
 
				-  /*if one of the palette sizes is 0, then we consider it to be the same as the
			
 
				-  other: it means that e.g. the palette was not given by the user and should be
			
 
				-  considered the same as the palette inside the PNG.*/
			
 
				-  if(1/*a->palettesize != 0 && b->palettesize != 0*/) {
			
 
				-    if(a->palettesize != b->palettesize) return 0;
			
 
				-    for(i = 0; i != a->palettesize * 4; ++i)
			
 
				-    {
			
 
				-      if(a->palette[i] != b->palette[i]) return 0;
			
 
				-    }
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void lodepng_palette_clear(LodePNGColorMode* info)
			
 
				-{
			
 
				-  if(info->palette) lodepng_free(info->palette);
			
 
				-  info->palette = 0;
			
 
				-  info->palettesize = 0;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_palette_add(LodePNGColorMode* info,
			
 
				-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
			
 
				-{
			
 
				-  unsigned char* data;
			
 
				-  /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with
			
 
				-  the max of 256 colors, it'll have the exact alloc size*/
			
 
				-  if(!info->palette) /*allocate palette if empty*/
			
 
				-  {
			
 
				-    /*room for 256 colors with 4 bytes each*/
			
 
				-    data = (unsigned char*)lodepng_realloc(info->palette, 1024);
			
 
				-    if(!data) return 83; /*alloc fail*/
			
 
				-    else info->palette = data;
			
 
				-  }
			
 
				-  info->palette[4 * info->palettesize + 0] = r;
			
 
				-  info->palette[4 * info->palettesize + 1] = g;
			
 
				-  info->palette[4 * info->palettesize + 2] = b;
			
 
				-  info->palette[4 * info->palettesize + 3] = a;
			
 
				-  ++info->palettesize;
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_get_bpp(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  /*calculate bits per pixel out of colortype and bitdepth*/
			
 
				-  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_get_channels(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  return getNumColorChannels(info->colortype);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  return (info->colortype & 4) != 0; /*4 or 6*/
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_is_palette_type(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  return info->colortype == LCT_PALETTE;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  for(i = 0; i != info->palettesize; ++i)
			
 
				-  {
			
 
				-    if(info->palette[i * 4 + 3] < 255) return 1;
			
 
				-  }
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info)
			
 
				-{
			
 
				-  return info->key_defined
			
 
				-      || lodepng_is_alpha_type(info)
			
 
				-      || lodepng_has_palette_alpha(info);
			
 
				-}
			
 
				-
			
 
				-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color)
			
 
				-{
			
 
				-  /*will not overflow for any color type if roughly w * h < 268435455*/
			
 
				-  size_t bpp = lodepng_get_bpp(color);
			
 
				-  size_t n = w * h;
			
 
				-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
			
 
				-}
			
 
				-
			
 
				-size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  /*will not overflow for any color type if roughly w * h < 268435455*/
			
 
				-  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
			
 
				-  size_t n = w * h;
			
 
				-  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/
			
 
				-static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color)
			
 
				-{
			
 
				-  /*will not overflow for any color type if roughly w * h < 268435455*/
			
 
				-  size_t bpp = lodepng_get_bpp(color);
			
 
				-  size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8;
			
 
				-  return h * line;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-#endif /*LODEPNG_COMPILE_PNG*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-
			
 
				-static void LodePNGUnknownChunks_init(LodePNGInfo* info)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
			
 
				-  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
			
 
				-}
			
 
				-
			
 
				-static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
			
 
				-}
			
 
				-
			
 
				-static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-
			
 
				-  LodePNGUnknownChunks_cleanup(dest);
			
 
				-
			
 
				-  for(i = 0; i != 3; ++i)
			
 
				-  {
			
 
				-    size_t j;
			
 
				-    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
			
 
				-    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
			
 
				-    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
			
 
				-    for(j = 0; j < src->unknown_chunks_size[i]; ++j)
			
 
				-    {
			
 
				-      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-/******************************************************************************/
			
 
				-
			
 
				-static void LodePNGText_init(LodePNGInfo* info)
			
 
				-{
			
 
				-  info->text_num = 0;
			
 
				-  info->text_keys = NULL;
			
 
				-  info->text_strings = NULL;
			
 
				-}
			
 
				-
			
 
				-static void LodePNGText_cleanup(LodePNGInfo* info)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  for(i = 0; i != info->text_num; ++i)
			
 
				-  {
			
 
				-    string_cleanup(&info->text_keys[i]);
			
 
				-    string_cleanup(&info->text_strings[i]);
			
 
				-  }
			
 
				-  lodepng_free(info->text_keys);
			
 
				-  lodepng_free(info->text_strings);
			
 
				-}
			
 
				-
			
 
				-static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
			
 
				-{
			
 
				-  size_t i = 0;
			
 
				-  dest->text_keys = 0;
			
 
				-  dest->text_strings = 0;
			
 
				-  dest->text_num = 0;
			
 
				-  for(i = 0; i != source->text_num; ++i)
			
 
				-  {
			
 
				-    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
			
 
				-  }
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-void lodepng_clear_text(LodePNGInfo* info)
			
 
				-{
			
 
				-  LodePNGText_cleanup(info);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str)
			
 
				-{
			
 
				-  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
			
 
				-  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
			
 
				-  if(!new_keys || !new_strings)
			
 
				-  {
			
 
				-    lodepng_free(new_keys);
			
 
				-    lodepng_free(new_strings);
			
 
				-    return 83; /*alloc fail*/
			
 
				-  }
			
 
				-
			
 
				-  ++info->text_num;
			
 
				-  info->text_keys = new_keys;
			
 
				-  info->text_strings = new_strings;
			
 
				-
			
 
				-  string_init(&info->text_keys[info->text_num - 1]);
			
 
				-  string_set(&info->text_keys[info->text_num - 1], key);
			
 
				-
			
 
				-  string_init(&info->text_strings[info->text_num - 1]);
			
 
				-  string_set(&info->text_strings[info->text_num - 1], str);
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-/******************************************************************************/
			
 
				-
			
 
				-static void LodePNGIText_init(LodePNGInfo* info)
			
 
				-{
			
 
				-  info->itext_num = 0;
			
 
				-  info->itext_keys = NULL;
			
 
				-  info->itext_langtags = NULL;
			
 
				-  info->itext_transkeys = NULL;
			
 
				-  info->itext_strings = NULL;
			
 
				-}
			
 
				-
			
 
				-static void LodePNGIText_cleanup(LodePNGInfo* info)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  for(i = 0; i != info->itext_num; ++i)
			
 
				-  {
			
 
				-    string_cleanup(&info->itext_keys[i]);
			
 
				-    string_cleanup(&info->itext_langtags[i]);
			
 
				-    string_cleanup(&info->itext_transkeys[i]);
			
 
				-    string_cleanup(&info->itext_strings[i]);
			
 
				-  }
			
 
				-  lodepng_free(info->itext_keys);
			
 
				-  lodepng_free(info->itext_langtags);
			
 
				-  lodepng_free(info->itext_transkeys);
			
 
				-  lodepng_free(info->itext_strings);
			
 
				-}
			
 
				-
			
 
				-static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
			
 
				-{
			
 
				-  size_t i = 0;
			
 
				-  dest->itext_keys = 0;
			
 
				-  dest->itext_langtags = 0;
			
 
				-  dest->itext_transkeys = 0;
			
 
				-  dest->itext_strings = 0;
			
 
				-  dest->itext_num = 0;
			
 
				-  for(i = 0; i != source->itext_num; ++i)
			
 
				-  {
			
 
				-    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
			
 
				-                                        source->itext_transkeys[i], source->itext_strings[i]));
			
 
				-  }
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-void lodepng_clear_itext(LodePNGInfo* info)
			
 
				-{
			
 
				-  LodePNGIText_cleanup(info);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
			
 
				-                           const char* transkey, const char* str)
			
 
				-{
			
 
				-  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
			
 
				-  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
			
 
				-  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
			
 
				-  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
			
 
				-  if(!new_keys || !new_langtags || !new_transkeys || !new_strings)
			
 
				-  {
			
 
				-    lodepng_free(new_keys);
			
 
				-    lodepng_free(new_langtags);
			
 
				-    lodepng_free(new_transkeys);
			
 
				-    lodepng_free(new_strings);
			
 
				-    return 83; /*alloc fail*/
			
 
				-  }
			
 
				-
			
 
				-  ++info->itext_num;
			
 
				-  info->itext_keys = new_keys;
			
 
				-  info->itext_langtags = new_langtags;
			
 
				-  info->itext_transkeys = new_transkeys;
			
 
				-  info->itext_strings = new_strings;
			
 
				-
			
 
				-  string_init(&info->itext_keys[info->itext_num - 1]);
			
 
				-  string_set(&info->itext_keys[info->itext_num - 1], key);
			
 
				-
			
 
				-  string_init(&info->itext_langtags[info->itext_num - 1]);
			
 
				-  string_set(&info->itext_langtags[info->itext_num - 1], langtag);
			
 
				-
			
 
				-  string_init(&info->itext_transkeys[info->itext_num - 1]);
			
 
				-  string_set(&info->itext_transkeys[info->itext_num - 1], transkey);
			
 
				-
			
 
				-  string_init(&info->itext_strings[info->itext_num - 1]);
			
 
				-  string_set(&info->itext_strings[info->itext_num - 1], str);
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-void lodepng_info_init(LodePNGInfo* info)
			
 
				-{
			
 
				-  lodepng_color_mode_init(&info->color);
			
 
				-  info->interlace_method = 0;
			
 
				-  info->compression_method = 0;
			
 
				-  info->filter_method = 0;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  info->background_defined = 0;
			
 
				-  info->background_r = info->background_g = info->background_b = 0;
			
 
				-
			
 
				-  LodePNGText_init(info);
			
 
				-  LodePNGIText_init(info);
			
 
				-
			
 
				-  info->time_defined = 0;
			
 
				-  info->phys_defined = 0;
			
 
				-
			
 
				-  LodePNGUnknownChunks_init(info);
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-}
			
 
				-
			
 
				-void lodepng_info_cleanup(LodePNGInfo* info)
			
 
				-{
			
 
				-  lodepng_color_mode_cleanup(&info->color);
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  LodePNGText_cleanup(info);
			
 
				-  LodePNGIText_cleanup(info);
			
 
				-
			
 
				-  LodePNGUnknownChunks_cleanup(info);
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source)
			
 
				-{
			
 
				-  lodepng_info_cleanup(dest);
			
 
				-  *dest = *source;
			
 
				-  lodepng_color_mode_init(&dest->color);
			
 
				-  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
			
 
				-  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
			
 
				-
			
 
				-  LodePNGUnknownChunks_init(dest);
			
 
				-  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b)
			
 
				-{
			
 
				-  LodePNGInfo temp = *a;
			
 
				-  *a = *b;
			
 
				-  *b = temp;
			
 
				-}
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
			
 
				-static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in)
			
 
				-{
			
 
				-  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
			
 
				-  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
			
 
				-  unsigned p = index & m;
			
 
				-  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
			
 
				-  in = in << (bits * (m - p));
			
 
				-  if(p == 0) out[index * bits / 8] = in;
			
 
				-  else out[index * bits / 8] |= in;
			
 
				-}
			
 
				-
			
 
				-typedef struct ColorTree ColorTree;
			
 
				-
			
 
				-/*
			
 
				-One node of a color tree
			
 
				-This is the data structure used to count the number of unique colors and to get a palette
			
 
				-index for a color. It's like an octree, but because the alpha channel is used too, each
			
 
				-node has 16 instead of 8 children.
			
 
				-*/
			
 
				-struct ColorTree
			
 
				-{
			
 
				-  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
			
 
				-  int index; /*the payload. Only has a meaningful value if this is in the last level*/
			
 
				-};
			
 
				-
			
 
				-static void color_tree_init(ColorTree* tree)
			
 
				-{
			
 
				-  int i;
			
 
				-  for(i = 0; i != 16; ++i) tree->children[i] = 0;
			
 
				-  tree->index = -1;
			
 
				-}
			
 
				-
			
 
				-static void color_tree_cleanup(ColorTree* tree)
			
 
				-{
			
 
				-  int i;
			
 
				-  for(i = 0; i != 16; ++i)
			
 
				-  {
			
 
				-    if(tree->children[i])
			
 
				-    {
			
 
				-      color_tree_cleanup(tree->children[i]);
			
 
				-      lodepng_free(tree->children[i]);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*returns -1 if color not present, its index otherwise*/
			
 
				-static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
			
 
				-{
			
 
				-  int bit = 0;
			
 
				-  for(bit = 0; bit < 8; ++bit)
			
 
				-  {
			
 
				-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
			
 
				-    if(!tree->children[i]) return -1;
			
 
				-    else tree = tree->children[i];
			
 
				-  }
			
 
				-  return tree ? tree->index : -1;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
			
 
				-{
			
 
				-  return color_tree_get(tree, r, g, b, a) >= 0;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-/*color is not allowed to already exist.
			
 
				-Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
			
 
				-static void color_tree_add(ColorTree* tree,
			
 
				-                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index)
			
 
				-{
			
 
				-  int bit;
			
 
				-  for(bit = 0; bit < 8; ++bit)
			
 
				-  {
			
 
				-    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
			
 
				-    if(!tree->children[i])
			
 
				-    {
			
 
				-      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
			
 
				-      color_tree_init(tree->children[i]);
			
 
				-    }
			
 
				-    tree = tree->children[i];
			
 
				-  }
			
 
				-  tree->index = (int)index;
			
 
				-}
			
 
				-
			
 
				-/*put a pixel, given its RGBA color, into image of any color type*/
			
 
				-static unsigned rgba8ToPixel(unsigned char* out, size_t i,
			
 
				-                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
			
 
				-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
			
 
				-{
			
 
				-  if(mode->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
			
 
				-    if(mode->bitdepth == 8) out[i] = grey;
			
 
				-    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey;
			
 
				-    else
			
 
				-    {
			
 
				-      /*take the most significant bits of grey*/
			
 
				-      grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
			
 
				-      addColorBits(out, i, mode->bitdepth, grey);
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      out[i * 3 + 0] = r;
			
 
				-      out[i * 3 + 1] = g;
			
 
				-      out[i * 3 + 2] = b;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      out[i * 6 + 0] = out[i * 6 + 1] = r;
			
 
				-      out[i * 6 + 2] = out[i * 6 + 3] = g;
			
 
				-      out[i * 6 + 4] = out[i * 6 + 5] = b;
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    int index = color_tree_get(tree, r, g, b, a);
			
 
				-    if(index < 0) return 82; /*color not in palette*/
			
 
				-    if(mode->bitdepth == 8) out[i] = index;
			
 
				-    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      out[i * 2 + 0] = grey;
			
 
				-      out[i * 2 + 1] = a;
			
 
				-    }
			
 
				-    else if(mode->bitdepth == 16)
			
 
				-    {
			
 
				-      out[i * 4 + 0] = out[i * 4 + 1] = grey;
			
 
				-      out[i * 4 + 2] = out[i * 4 + 3] = a;
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      out[i * 4 + 0] = r;
			
 
				-      out[i * 4 + 1] = g;
			
 
				-      out[i * 4 + 2] = b;
			
 
				-      out[i * 4 + 3] = a;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      out[i * 8 + 0] = out[i * 8 + 1] = r;
			
 
				-      out[i * 8 + 2] = out[i * 8 + 3] = g;
			
 
				-      out[i * 8 + 4] = out[i * 8 + 5] = b;
			
 
				-      out[i * 8 + 6] = out[i * 8 + 7] = a;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return 0; /*no error*/
			
 
				-}
			
 
				-
			
 
				-/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
			
 
				-static void rgba16ToPixel(unsigned char* out, size_t i,
			
 
				-                         const LodePNGColorMode* mode,
			
 
				-                         unsigned short r, unsigned short g, unsigned short b, unsigned short a)
			
 
				-{
			
 
				-  if(mode->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
			
 
				-    out[i * 2 + 0] = (grey >> 8) & 255;
			
 
				-    out[i * 2 + 1] = grey & 255;
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    out[i * 6 + 0] = (r >> 8) & 255;
			
 
				-    out[i * 6 + 1] = r & 255;
			
 
				-    out[i * 6 + 2] = (g >> 8) & 255;
			
 
				-    out[i * 6 + 3] = g & 255;
			
 
				-    out[i * 6 + 4] = (b >> 8) & 255;
			
 
				-    out[i * 6 + 5] = b & 255;
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
			
 
				-    out[i * 4 + 0] = (grey >> 8) & 255;
			
 
				-    out[i * 4 + 1] = grey & 255;
			
 
				-    out[i * 4 + 2] = (a >> 8) & 255;
			
 
				-    out[i * 4 + 3] = a & 255;
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    out[i * 8 + 0] = (r >> 8) & 255;
			
 
				-    out[i * 8 + 1] = r & 255;
			
 
				-    out[i * 8 + 2] = (g >> 8) & 255;
			
 
				-    out[i * 8 + 3] = g & 255;
			
 
				-    out[i * 8 + 4] = (b >> 8) & 255;
			
 
				-    out[i * 8 + 5] = b & 255;
			
 
				-    out[i * 8 + 6] = (a >> 8) & 255;
			
 
				-    out[i * 8 + 7] = a & 255;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
			
 
				-static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
			
 
				-                               unsigned char* b, unsigned char* a,
			
 
				-                               const unsigned char* in, size_t i,
			
 
				-                               const LodePNGColorMode* mode)
			
 
				-{
			
 
				-  if(mode->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      *r = *g = *b = in[i];
			
 
				-      if(mode->key_defined && *r == mode->key_r) *a = 0;
			
 
				-      else *a = 255;
			
 
				-    }
			
 
				-    else if(mode->bitdepth == 16)
			
 
				-    {
			
 
				-      *r = *g = *b = in[i * 2 + 0];
			
 
				-      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
			
 
				-      else *a = 255;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
			
 
				-      size_t j = i * mode->bitdepth;
			
 
				-      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
			
 
				-      *r = *g = *b = (value * 255) / highest;
			
 
				-      if(mode->key_defined && value == mode->key_r) *a = 0;
			
 
				-      else *a = 255;
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
			
 
				-      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
			
 
				-      else *a = 255;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      *r = in[i * 6 + 0];
			
 
				-      *g = in[i * 6 + 2];
			
 
				-      *b = in[i * 6 + 4];
			
 
				-      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
			
 
				-         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
			
 
				-         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
			
 
				-      else *a = 255;
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    unsigned index;
			
 
				-    if(mode->bitdepth == 8) index = in[i];
			
 
				-    else
			
 
				-    {
			
 
				-      size_t j = i * mode->bitdepth;
			
 
				-      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
			
 
				-    }
			
 
				-
			
 
				-    if(index >= mode->palettesize)
			
 
				-    {
			
 
				-      /*This is an error according to the PNG spec, but common PNG decoders make it black instead.
			
 
				-      Done here too, slightly faster due to no error handling needed.*/
			
 
				-      *r = *g = *b = 0;
			
 
				-      *a = 255;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      *r = mode->palette[index * 4 + 0];
			
 
				-      *g = mode->palette[index * 4 + 1];
			
 
				-      *b = mode->palette[index * 4 + 2];
			
 
				-      *a = mode->palette[index * 4 + 3];
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      *r = *g = *b = in[i * 2 + 0];
			
 
				-      *a = in[i * 2 + 1];
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      *r = *g = *b = in[i * 4 + 0];
			
 
				-      *a = in[i * 4 + 2];
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      *r = in[i * 4 + 0];
			
 
				-      *g = in[i * 4 + 1];
			
 
				-      *b = in[i * 4 + 2];
			
 
				-      *a = in[i * 4 + 3];
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      *r = in[i * 8 + 0];
			
 
				-      *g = in[i * 8 + 2];
			
 
				-      *b = in[i * 8 + 4];
			
 
				-      *a = in[i * 8 + 6];
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
			
 
				-mode test cases, optimized to convert the colors much faster, when converting
			
 
				-to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with
			
 
				-enough memory, if has_alpha is true the output is RGBA. mode has the color mode
			
 
				-of the input buffer.*/
			
 
				-static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels,
			
 
				-                                unsigned has_alpha, const unsigned char* in,
			
 
				-                                const LodePNGColorMode* mode)
			
 
				-{
			
 
				-  unsigned num_channels = has_alpha ? 4 : 3;
			
 
				-  size_t i;
			
 
				-  if(mode->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = buffer[1] = buffer[2] = in[i];
			
 
				-        if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255;
			
 
				-      }
			
 
				-    }
			
 
				-    else if(mode->bitdepth == 16)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
			
 
				-        if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
			
 
				-      }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
			
 
				-      size_t j = 0;
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
			
 
				-        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
			
 
				-        if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = in[i * 3 + 0];
			
 
				-        buffer[1] = in[i * 3 + 1];
			
 
				-        buffer[2] = in[i * 3 + 2];
			
 
				-        if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r
			
 
				-           && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255;
			
 
				-      }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = in[i * 6 + 0];
			
 
				-        buffer[1] = in[i * 6 + 2];
			
 
				-        buffer[2] = in[i * 6 + 4];
			
 
				-        if(has_alpha) buffer[3] = mode->key_defined
			
 
				-           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
			
 
				-           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
			
 
				-           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    unsigned index;
			
 
				-    size_t j = 0;
			
 
				-    for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-    {
			
 
				-      if(mode->bitdepth == 8) index = in[i];
			
 
				-      else index = readBitsFromReversedStream(&j, in, mode->bitdepth);
			
 
				-
			
 
				-      if(index >= mode->palettesize)
			
 
				-      {
			
 
				-        /*This is an error according to the PNG spec, but most PNG decoders make it black instead.
			
 
				-        Done here too, slightly faster due to no error handling needed.*/
			
 
				-        buffer[0] = buffer[1] = buffer[2] = 0;
			
 
				-        if(has_alpha) buffer[3] = 255;
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        buffer[0] = mode->palette[index * 4 + 0];
			
 
				-        buffer[1] = mode->palette[index * 4 + 1];
			
 
				-        buffer[2] = mode->palette[index * 4 + 2];
			
 
				-        if(has_alpha) buffer[3] = mode->palette[index * 4 + 3];
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
			
 
				-        if(has_alpha) buffer[3] = in[i * 2 + 1];
			
 
				-      }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
			
 
				-        if(has_alpha) buffer[3] = in[i * 4 + 2];
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    if(mode->bitdepth == 8)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = in[i * 4 + 0];
			
 
				-        buffer[1] = in[i * 4 + 1];
			
 
				-        buffer[2] = in[i * 4 + 2];
			
 
				-        if(has_alpha) buffer[3] = in[i * 4 + 3];
			
 
				-      }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i, buffer += num_channels)
			
 
				-      {
			
 
				-        buffer[0] = in[i * 8 + 0];
			
 
				-        buffer[1] = in[i * 8 + 2];
			
 
				-        buffer[2] = in[i * 8 + 4];
			
 
				-        if(has_alpha) buffer[3] = in[i * 8 + 6];
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
			
 
				-given color type, but the given color type must be 16-bit itself.*/
			
 
				-static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
			
 
				-                                const unsigned char* in, size_t i, const LodePNGColorMode* mode)
			
 
				-{
			
 
				-  if(mode->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
			
 
				-    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
			
 
				-    else *a = 65535;
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
			
 
				-    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
			
 
				-    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
			
 
				-    if(mode->key_defined
			
 
				-       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
			
 
				-       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
			
 
				-       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
			
 
				-    else *a = 65535;
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
			
 
				-    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
			
 
				-  }
			
 
				-  else if(mode->colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
			
 
				-    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
			
 
				-    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
			
 
				-    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
			
 
				-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
			
 
				-                         unsigned w, unsigned h)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  ColorTree tree;
			
 
				-  size_t numpixels = w * h;
			
 
				-
			
 
				-  if(lodepng_color_mode_equal(mode_out, mode_in))
			
 
				-  {
			
 
				-    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
			
 
				-    for(i = 0; i != numbytes; ++i) out[i] = in[i];
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  if(mode_out->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    size_t palettesize = mode_out->palettesize;
			
 
				-    const unsigned char* palette = mode_out->palette;
			
 
				-    size_t palsize = size_t(1) << mode_out->bitdepth;
			
 
				-    /*if the user specified output palette but did not give the values, assume
			
 
				-    they want the values of the input color type (assuming that one is palette).
			
 
				-    Note that we never create a new palette ourselves.*/
			
 
				-    if(palettesize == 0)
			
 
				-    {
			
 
				-      palettesize = mode_in->palettesize;
			
 
				-      palette = mode_in->palette;
			
 
				-    }
			
 
				-    if(palettesize < palsize) palsize = palettesize;
			
 
				-    color_tree_init(&tree);
			
 
				-    for(i = 0; i != palsize; ++i)
			
 
				-    {
			
 
				-      const unsigned char* p = &palette[i * 4];
			
 
				-      color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned int)(i));
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16)
			
 
				-  {
			
 
				-    for(i = 0; i != numpixels; ++i)
			
 
				-    {
			
 
				-      unsigned short r = 0, g = 0, b = 0, a = 0;
			
 
				-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
			
 
				-      rgba16ToPixel(out, i, mode_out, r, g, b, a);
			
 
				-    }
			
 
				-  }
			
 
				-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    getPixelColorsRGBA8(out, numpixels, 1, in, mode_in);
			
 
				-  }
			
 
				-  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    getPixelColorsRGBA8(out, numpixels, 0, in, mode_in);
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    unsigned char r = 0, g = 0, b = 0, a = 0;
			
 
				-    for(i = 0; i != numpixels; ++i)
			
 
				-    {
			
 
				-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
			
 
				-      CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a));
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  if(mode_out->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    color_tree_cleanup(&tree);
			
 
				-  }
			
 
				-
			
 
				-  return 0; /*no error*/
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-
			
 
				-void lodepng_color_profile_init(LodePNGColorProfile* profile)
			
 
				-{
			
 
				-  profile->colored = 0;
			
 
				-  profile->key = 0;
			
 
				-  profile->alpha = 0;
			
 
				-  profile->key_r = profile->key_g = profile->key_b = 0;
			
 
				-  profile->numcolors = 0;
			
 
				-  profile->bits = 1;
			
 
				-}
			
 
				-
			
 
				-/*function used for debug purposes with C++*/
			
 
				-/*void printColorProfile(LodePNGColorProfile* p)
			
 
				-{
			
 
				-  std::cout << "colored: " << (int)p->colored << ", ";
			
 
				-  std::cout << "key: " << (int)p->key << ", ";
			
 
				-  std::cout << "key_r: " << (int)p->key_r << ", ";
			
 
				-  std::cout << "key_g: " << (int)p->key_g << ", ";
			
 
				-  std::cout << "key_b: " << (int)p->key_b << ", ";
			
 
				-  std::cout << "alpha: " << (int)p->alpha << ", ";
			
 
				-  std::cout << "numcolors: " << (int)p->numcolors << ", ";
			
 
				-  std::cout << "bits: " << (int)p->bits << std::endl;
			
 
				-}*/
			
 
				-
			
 
				-/*Returns how many bits needed to represent given value (max 8 bit)*/
			
 
				-static unsigned getValueRequiredBits(unsigned char value)
			
 
				-{
			
 
				-  if(value == 0 || value == 255) return 1;
			
 
				-  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
			
 
				-  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
			
 
				-  return 8;
			
 
				-}
			
 
				-
			
 
				-/*profile must already have been inited with mode.
			
 
				-It's ok to set some parameters of profile to done already.*/
			
 
				-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
			
 
				-                                   const unsigned char* in, unsigned w, unsigned h,
			
 
				-                                   const LodePNGColorMode* mode)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  size_t i;
			
 
				-  ColorTree tree;
			
 
				-  size_t numpixels = w * h;
			
 
				-
			
 
				-  unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0;
			
 
				-  unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1;
			
 
				-  unsigned numcolors_done = 0;
			
 
				-  unsigned bpp = lodepng_get_bpp(mode);
			
 
				-  unsigned bits_done = bpp == 1 ? 1 : 0;
			
 
				-  unsigned maxnumcolors = 257;
			
 
				-  unsigned sixteen = 0;
			
 
				-  if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256));
			
 
				-
			
 
				-  color_tree_init(&tree);
			
 
				-
			
 
				-  /*Check if the 16-bit input is truly 16-bit*/
			
 
				-  if(mode->bitdepth == 16)
			
 
				-  {
			
 
				-    unsigned short r, g, b, a;
			
 
				-    for(i = 0; i != numpixels; ++i)
			
 
				-    {
			
 
				-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
			
 
				-      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
			
 
				-         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/
			
 
				-      {
			
 
				-        sixteen = 1;
			
 
				-        break;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  if(sixteen)
			
 
				-  {
			
 
				-    unsigned short r = 0, g = 0, b = 0, a = 0;
			
 
				-    profile->bits = 16;
			
 
				-    bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
			
 
				-
			
 
				-    for(i = 0; i != numpixels; ++i)
			
 
				-    {
			
 
				-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
			
 
				-
			
 
				-      if(!colored_done && (r != g || r != b))
			
 
				-      {
			
 
				-        profile->colored = 1;
			
 
				-        colored_done = 1;
			
 
				-      }
			
 
				-
			
 
				-      if(!alpha_done)
			
 
				-      {
			
 
				-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
			
 
				-        if(a != 65535 && (a != 0 || (profile->key && !matchkey)))
			
 
				-        {
			
 
				-          profile->alpha = 1;
			
 
				-          alpha_done = 1;
			
 
				-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
			
 
				-        }
			
 
				-        else if(a == 0 && !profile->alpha && !profile->key)
			
 
				-        {
			
 
				-          profile->key = 1;
			
 
				-          profile->key_r = r;
			
 
				-          profile->key_g = g;
			
 
				-          profile->key_b = b;
			
 
				-        }
			
 
				-        else if(a == 65535 && profile->key && matchkey)
			
 
				-        {
			
 
				-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
			
 
				-          profile->alpha = 1;
			
 
				-          alpha_done = 1;
			
 
				-        }
			
 
				-      }
			
 
				-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
			
 
				-    }
			
 
				-
			
 
				-    if(profile->key && !profile->alpha)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i)
			
 
				-      {
			
 
				-        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
			
 
				-        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b)
			
 
				-        {
			
 
				-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
			
 
				-          profile->alpha = 1;
			
 
				-          alpha_done = 1;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else /* < 16-bit */
			
 
				-  {
			
 
				-    unsigned char r = 0, g = 0, b = 0, a = 0;
			
 
				-    for(i = 0; i != numpixels; ++i)
			
 
				-    {
			
 
				-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
			
 
				-
			
 
				-      if(!bits_done && profile->bits < 8)
			
 
				-      {
			
 
				-        /*only r is checked, < 8 bits is only relevant for greyscale*/
			
 
				-        unsigned bits = getValueRequiredBits(r);
			
 
				-        if(bits > profile->bits) profile->bits = bits;
			
 
				-      }
			
 
				-      bits_done = (profile->bits >= bpp);
			
 
				-
			
 
				-      if(!colored_done && (r != g || r != b))
			
 
				-      {
			
 
				-        profile->colored = 1;
			
 
				-        colored_done = 1;
			
 
				-        if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
			
 
				-      }
			
 
				-
			
 
				-      if(!alpha_done)
			
 
				-      {
			
 
				-        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
			
 
				-        if(a != 255 && (a != 0 || (profile->key && !matchkey)))
			
 
				-        {
			
 
				-          profile->alpha = 1;
			
 
				-          alpha_done = 1;
			
 
				-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
			
 
				-        }
			
 
				-        else if(a == 0 && !profile->alpha && !profile->key)
			
 
				-        {
			
 
				-          profile->key = 1;
			
 
				-          profile->key_r = r;
			
 
				-          profile->key_g = g;
			
 
				-          profile->key_b = b;
			
 
				-        }
			
 
				-        else if(a == 255 && profile->key && matchkey)
			
 
				-        {
			
 
				-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
			
 
				-          profile->alpha = 1;
			
 
				-          alpha_done = 1;
			
 
				-          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      if(!numcolors_done)
			
 
				-      {
			
 
				-        if(!color_tree_has(&tree, r, g, b, a))
			
 
				-        {
			
 
				-          color_tree_add(&tree, r, g, b, a, profile->numcolors);
			
 
				-          if(profile->numcolors < 256)
			
 
				-          {
			
 
				-            unsigned char* p = profile->palette;
			
 
				-            unsigned n = profile->numcolors;
			
 
				-            p[n * 4 + 0] = r;
			
 
				-            p[n * 4 + 1] = g;
			
 
				-            p[n * 4 + 2] = b;
			
 
				-            p[n * 4 + 3] = a;
			
 
				-          }
			
 
				-          ++profile->numcolors;
			
 
				-          numcolors_done = profile->numcolors >= maxnumcolors;
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      if(alpha_done && numcolors_done && colored_done && bits_done) break;
			
 
				-    }
			
 
				-
			
 
				-    if(profile->key && !profile->alpha)
			
 
				-    {
			
 
				-      for(i = 0; i != numpixels; ++i)
			
 
				-      {
			
 
				-        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
			
 
				-        if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b)
			
 
				-        {
			
 
				-          /* Color key cannot be used if an opaque pixel also has that RGB color. */
			
 
				-          profile->alpha = 1;
			
 
				-          alpha_done = 1;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    /*make the profile's key always 16-bit for consistency - repeat each byte twice*/
			
 
				-    profile->key_r += (profile->key_r << 8);
			
 
				-    profile->key_g += (profile->key_g << 8);
			
 
				-    profile->key_b += (profile->key_b << 8);
			
 
				-  }
			
 
				-
			
 
				-  color_tree_cleanup(&tree);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*Automatically chooses color type that gives smallest amount of bits in the
			
 
				-output image, e.g. grey if there are only greyscale pixels, palette if there
			
 
				-are less than 256 colors, ...
			
 
				-Updates values of mode with a potentially smaller color model. mode_out should
			
 
				-contain the user chosen color model, but will be overwritten with the new chosen one.*/
			
 
				-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
			
 
				-                                   const unsigned char* image, unsigned w, unsigned h,
			
 
				-                                   const LodePNGColorMode* mode_in)
			
 
				-{
			
 
				-  LodePNGColorProfile prof;
			
 
				-  unsigned error = 0;
			
 
				-  unsigned i, n, palettebits, grey_ok, palette_ok;
			
 
				-
			
 
				-  lodepng_color_profile_init(&prof);
			
 
				-  error = lodepng_get_color_profile(&prof, image, w, h, mode_in);
			
 
				-  if(error) return error;
			
 
				-  mode_out->key_defined = 0;
			
 
				-
			
 
				-  if(prof.key && w * h <= 16)
			
 
				-  {
			
 
				-    prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
			
 
				-    if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
			
 
				-  }
			
 
				-  grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/
			
 
				-  n = prof.numcolors;
			
 
				-  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
			
 
				-  palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8;
			
 
				-  if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
			
 
				-  if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/
			
 
				-
			
 
				-  if(palette_ok)
			
 
				-  {
			
 
				-    unsigned char* p = prof.palette;
			
 
				-    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
			
 
				-    for(i = 0; i != prof.numcolors; ++i)
			
 
				-    {
			
 
				-      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
			
 
				-      if(error) break;
			
 
				-    }
			
 
				-
			
 
				-    mode_out->colortype = LCT_PALETTE;
			
 
				-    mode_out->bitdepth = palettebits;
			
 
				-
			
 
				-    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
			
 
				-        && mode_in->bitdepth == mode_out->bitdepth)
			
 
				-    {
			
 
				-      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
			
 
				-      lodepng_color_mode_cleanup(mode_out);
			
 
				-      lodepng_color_mode_copy(mode_out, mode_in);
			
 
				-    }
			
 
				-  }
			
 
				-  else /*8-bit or 16-bit per channel*/
			
 
				-  {
			
 
				-    mode_out->bitdepth = prof.bits;
			
 
				-    mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA)
			
 
				-                                     : (prof.colored ? LCT_RGB : LCT_GREY);
			
 
				-
			
 
				-    if(prof.key && !prof.alpha)
			
 
				-    {
			
 
				-      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/
			
 
				-      mode_out->key_r = prof.key_r & mask;
			
 
				-      mode_out->key_g = prof.key_g & mask;
			
 
				-      mode_out->key_b = prof.key_b & mask;
			
 
				-      mode_out->key_defined = 1;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
			
 
				-
			
 
				-/*
			
 
				-Paeth predicter, used by PNG filter type 4
			
 
				-The parameters are of type short, but should come from unsigned chars, the shorts
			
 
				-are only needed to make the paeth calculation correct.
			
 
				-*/
			
 
				-static unsigned char paethPredictor(short a, short b, short c)
			
 
				-{
			
 
				-  short pa = abs(b - c);
			
 
				-  short pb = abs(a - c);
			
 
				-  short pc = abs(a + b - c - c);
			
 
				-
			
 
				-  if(pc < pa && pc < pb) return (unsigned char)c;
			
 
				-  else if(pb < pa) return (unsigned char)b;
			
 
				-  else return (unsigned char)a;
			
 
				-}
			
 
				-
			
 
				-/*shared values used by multiple Adam7 related functions*/
			
 
				-
			
 
				-static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
			
 
				-static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
			
 
				-static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
			
 
				-static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
			
 
				-
			
 
				-/*
			
 
				-Outputs various dimensions and positions in the image related to the Adam7 reduced images.
			
 
				-passw: output containing the width of the 7 passes
			
 
				-passh: output containing the height of the 7 passes
			
 
				-filter_passstart: output containing the index of the start and end of each
			
 
				- reduced image with filter bytes
			
 
				-padded_passstart output containing the index of the start and end of each
			
 
				- reduced image when without filter bytes but with padded scanlines
			
 
				-passstart: output containing the index of the start and end of each reduced
			
 
				- image without padding between scanlines, but still padding between the images
			
 
				-w, h: width and height of non-interlaced image
			
 
				-bpp: bits per pixel
			
 
				-"padded" is only relevant if bpp is less than 8 and a scanline or image does not
			
 
				- end at a full byte
			
 
				-*/
			
 
				-static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
			
 
				-                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp)
			
 
				-{
			
 
				-  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
			
 
				-  unsigned i;
			
 
				-
			
 
				-  /*calculate width and height in pixels of each pass*/
			
 
				-  for(i = 0; i != 7; ++i)
			
 
				-  {
			
 
				-    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
			
 
				-    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
			
 
				-    if(passw[i] == 0) passh[i] = 0;
			
 
				-    if(passh[i] == 0) passw[i] = 0;
			
 
				-  }
			
 
				-
			
 
				-  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
			
 
				-  for(i = 0; i != 7; ++i)
			
 
				-  {
			
 
				-    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
			
 
				-    filter_passstart[i + 1] = filter_passstart[i]
			
 
				-                            + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0);
			
 
				-    /*bits padded if needed to fill full byte at end of each scanline*/
			
 
				-    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8);
			
 
				-    /*only padded at end of reduced image*/
			
 
				-    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / PNG Decoder                                                            / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*read the information from the header and store it in the LodePNGInfo. return value is error*/
			
 
				-unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
			
 
				-                         const unsigned char* in, size_t insize)
			
 
				-{
			
 
				-  LodePNGInfo* info = &state->info_png;
			
 
				-  if(insize == 0 || in == 0)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
			
 
				-  }
			
 
				-  if(insize < 33)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
			
 
				-  }
			
 
				-
			
 
				-  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
			
 
				-  lodepng_info_cleanup(info);
			
 
				-  lodepng_info_init(info);
			
 
				-
			
 
				-  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
			
 
				-     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
			
 
				-  }
			
 
				-  if(lodepng_chunk_length(in + 8) != 13)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
			
 
				-  }
			
 
				-  if(!lodepng_chunk_type_equals(in + 8, "IHDR"))
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
			
 
				-  }
			
 
				-
			
 
				-  /*read the values given in the header*/
			
 
				-  *w = lodepng_read32bitInt(&in[16]);
			
 
				-  *h = lodepng_read32bitInt(&in[20]);
			
 
				-  info->color.bitdepth = in[24];
			
 
				-  info->color.colortype = (LodePNGColorType)in[25];
			
 
				-  info->compression_method = in[26];
			
 
				-  info->filter_method = in[27];
			
 
				-  info->interlace_method = in[28];
			
 
				-
			
 
				-  if(*w == 0 || *h == 0)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 93);
			
 
				-  }
			
 
				-
			
 
				-  if(!state->decoder.ignore_crc)
			
 
				-  {
			
 
				-    unsigned CRC = lodepng_read32bitInt(&in[29]);
			
 
				-    unsigned checksum = lodepng_crc32(&in[12], 17);
			
 
				-    if(CRC != checksum)
			
 
				-    {
			
 
				-      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  /*error: only compression method 0 is allowed in the specification*/
			
 
				-  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
			
 
				-  /*error: only filter method 0 is allowed in the specification*/
			
 
				-  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
			
 
				-  /*error: only interlace methods 0 and 1 exist in the specification*/
			
 
				-  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
			
 
				-
			
 
				-  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
			
 
				-  return state->error;
			
 
				-}
			
 
				-
			
 
				-static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
			
 
				-                                 size_t bytewidth, unsigned char filterType, size_t length)
			
 
				-{
			
 
				-  /*
			
 
				-  For PNG filter method 0
			
 
				-  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
			
 
				-  the filter works byte per byte (bytewidth = 1)
			
 
				-  precon is the previous unfiltered scanline, recon the result, scanline the current one
			
 
				-  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
			
 
				-  recon and scanline MAY be the same memory address! precon must be disjoint.
			
 
				-  */
			
 
				-
			
 
				-  size_t i;
			
 
				-  switch(filterType)
			
 
				-  {
			
 
				-    case 0:
			
 
				-      for(i = 0; i != length; ++i) recon[i] = scanline[i];
			
 
				-      break;
			
 
				-    case 1:
			
 
				-      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
			
 
				-      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
			
 
				-      break;
			
 
				-    case 2:
			
 
				-      if(precon)
			
 
				-      {
			
 
				-        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        for(i = 0; i != length; ++i) recon[i] = scanline[i];
			
 
				-      }
			
 
				-      break;
			
 
				-    case 3:
			
 
				-      if(precon)
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1);
			
 
				-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1);
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
			
 
				-        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1);
			
 
				-      }
			
 
				-      break;
			
 
				-    case 4:
			
 
				-      if(precon)
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i)
			
 
				-        {
			
 
				-          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
			
 
				-        }
			
 
				-        for(i = bytewidth; i < length; ++i)
			
 
				-        {
			
 
				-          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
			
 
				-        }
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i)
			
 
				-        {
			
 
				-          recon[i] = scanline[i];
			
 
				-        }
			
 
				-        for(i = bytewidth; i < length; ++i)
			
 
				-        {
			
 
				-          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
			
 
				-          recon[i] = (scanline[i] + recon[i - bytewidth]);
			
 
				-        }
			
 
				-      }
			
 
				-      break;
			
 
				-    default: return 36; /*error: unexisting filter type given*/
			
 
				-  }
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
			
 
				-{
			
 
				-  /*
			
 
				-  For PNG filter method 0
			
 
				-  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
			
 
				-  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
			
 
				-  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
			
 
				-  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
			
 
				-  */
			
 
				-
			
 
				-  unsigned y;
			
 
				-  unsigned char* prevline = 0;
			
 
				-
			
 
				-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
			
 
				-  size_t bytewidth = (bpp + 7) / 8;
			
 
				-  size_t linebytes = (w * bpp + 7) / 8;
			
 
				-
			
 
				-  for(y = 0; y < h; ++y)
			
 
				-  {
			
 
				-    size_t outindex = linebytes * y;
			
 
				-    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
			
 
				-    unsigned char filterType = in[inindex];
			
 
				-
			
 
				-    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
			
 
				-
			
 
				-    prevline = &out[outindex];
			
 
				-  }
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-in: Adam7 interlaced image, with no padding bits between scanlines, but between
			
 
				- reduced images so that each reduced image starts at a byte.
			
 
				-out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
			
 
				-bpp: bits per pixel
			
 
				-out has the following size in bits: w * h * bpp.
			
 
				-in is possibly bigger due to padding bits between reduced images.
			
 
				-out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
			
 
				-(because that's likely a little bit faster)
			
 
				-NOTE: comments about padding bits are only relevant if bpp < 8
			
 
				-*/
			
 
				-static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
			
 
				-{
			
 
				-  unsigned passw[7], passh[7];
			
 
				-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
			
 
				-  unsigned i;
			
 
				-
			
 
				-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
			
 
				-
			
 
				-  if(bpp >= 8)
			
 
				-  {
			
 
				-    for(i = 0; i != 7; ++i)
			
 
				-    {
			
 
				-      unsigned x, y, b;
			
 
				-      size_t bytewidth = bpp / 8;
			
 
				-      for(y = 0; y < passh[i]; ++y)
			
 
				-      for(x = 0; x < passw[i]; ++x)
			
 
				-      {
			
 
				-        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
			
 
				-        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
			
 
				-        for(b = 0; b < bytewidth; ++b)
			
 
				-        {
			
 
				-          out[pixeloutstart + b] = in[pixelinstart + b];
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
			
 
				-  {
			
 
				-    for(i = 0; i != 7; ++i)
			
 
				-    {
			
 
				-      unsigned x, y, b;
			
 
				-      unsigned ilinebits = bpp * passw[i];
			
 
				-      unsigned olinebits = bpp * w;
			
 
				-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
			
 
				-      for(y = 0; y < passh[i]; ++y)
			
 
				-      for(x = 0; x < passw[i]; ++x)
			
 
				-      {
			
 
				-        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
			
 
				-        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
			
 
				-        for(b = 0; b < bpp; ++b)
			
 
				-        {
			
 
				-          unsigned char bit = readBitFromReversedStream(&ibp, in);
			
 
				-          /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/
			
 
				-          setBitOfReversedStream0(&obp, out, bit);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-static void removePaddingBits(unsigned char* out, const unsigned char* in,
			
 
				-                              size_t olinebits, size_t ilinebits, unsigned h)
			
 
				-{
			
 
				-  /*
			
 
				-  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
			
 
				-  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
			
 
				-  for the Adam7 code, the color convert code and the output to the user.
			
 
				-  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
			
 
				-  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
			
 
				-  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
			
 
				-  only useful if (ilinebits - olinebits) is a value in the range 1..7
			
 
				-  */
			
 
				-  unsigned y;
			
 
				-  size_t diff = ilinebits - olinebits;
			
 
				-  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
			
 
				-  for(y = 0; y < h; ++y)
			
 
				-  {
			
 
				-    size_t x;
			
 
				-    for(x = 0; x < olinebits; ++x)
			
 
				-    {
			
 
				-      unsigned char bit = readBitFromReversedStream(&ibp, in);
			
 
				-      setBitOfReversedStream(&obp, out, bit);
			
 
				-    }
			
 
				-    ibp += diff;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
			
 
				-the IDAT chunks (with filter index bytes and possible padding bits)
			
 
				-return value is error*/
			
 
				-static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
			
 
				-                                     unsigned w, unsigned h, const LodePNGInfo* info_png)
			
 
				-{
			
 
				-  /*
			
 
				-  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
			
 
				-  Steps:
			
 
				-  *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8)
			
 
				-  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
			
 
				-  NOTE: the in buffer will be overwritten with intermediate data!
			
 
				-  */
			
 
				-  unsigned bpp = lodepng_get_bpp(&info_png->color);
			
 
				-  if(bpp == 0) return 31; /*error: invalid colortype*/
			
 
				-
			
 
				-  if(info_png->interlace_method == 0)
			
 
				-  {
			
 
				-    if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
			
 
				-    {
			
 
				-      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
			
 
				-      removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h);
			
 
				-    }
			
 
				-    /*we can immediately filter into the out buffer, no other steps needed*/
			
 
				-    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
			
 
				-  }
			
 
				-  else /*interlace_method is 1 (Adam7)*/
			
 
				-  {
			
 
				-    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
			
 
				-    unsigned i;
			
 
				-
			
 
				-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
			
 
				-
			
 
				-    for(i = 0; i != 7; ++i)
			
 
				-    {
			
 
				-      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
			
 
				-      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
			
 
				-      move bytes instead of bits or move not at all*/
			
 
				-      if(bpp < 8)
			
 
				-      {
			
 
				-        /*remove padding bits in scanlines; after this there still may be padding
			
 
				-        bits between the different reduced images: each reduced image still starts nicely at a byte*/
			
 
				-        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
			
 
				-                          ((passw[i] * bpp + 7) / 8) * 8, passh[i]);
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    Adam7_deinterlace(out, in, w, h, bpp);
			
 
				-  }
			
 
				-
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  unsigned pos = 0, i;
			
 
				-  if(color->palette) lodepng_free(color->palette);
			
 
				-  color->palettesize = chunkLength / 3;
			
 
				-  color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize);
			
 
				-  if(!color->palette && color->palettesize)
			
 
				-  {
			
 
				-    color->palettesize = 0;
			
 
				-    return 83; /*alloc fail*/
			
 
				-  }
			
 
				-  if(color->palettesize > 256) return 38; /*error: palette too big*/
			
 
				-
			
 
				-  for(i = 0; i != color->palettesize; ++i)
			
 
				-  {
			
 
				-    color->palette[4 * i + 0] = data[pos++]; /*R*/
			
 
				-    color->palette[4 * i + 1] = data[pos++]; /*G*/
			
 
				-    color->palette[4 * i + 2] = data[pos++]; /*B*/
			
 
				-    color->palette[4 * i + 3] = 255; /*alpha*/
			
 
				-  }
			
 
				-
			
 
				-  return 0; /* OK */
			
 
				-}
			
 
				-
			
 
				-static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  unsigned i;
			
 
				-  if(color->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    /*error: more alpha values given than there are palette entries*/
			
 
				-    if(chunkLength > color->palettesize) return 38;
			
 
				-
			
 
				-    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
			
 
				-  }
			
 
				-  else if(color->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    /*error: this chunk must be 2 bytes for greyscale image*/
			
 
				-    if(chunkLength != 2) return 30;
			
 
				-
			
 
				-    color->key_defined = 1;
			
 
				-    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
			
 
				-  }
			
 
				-  else if(color->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    /*error: this chunk must be 6 bytes for RGB image*/
			
 
				-    if(chunkLength != 6) return 41;
			
 
				-
			
 
				-    color->key_defined = 1;
			
 
				-    color->key_r = 256u * data[0] + data[1];
			
 
				-    color->key_g = 256u * data[2] + data[3];
			
 
				-    color->key_b = 256u * data[4] + data[5];
			
 
				-  }
			
 
				-  else return 42; /*error: tRNS chunk not allowed for other color models*/
			
 
				-
			
 
				-  return 0; /* OK */
			
 
				-}
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-/*background color chunk (bKGD)*/
			
 
				-static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  if(info->color.colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    /*error: this chunk must be 1 byte for indexed color image*/
			
 
				-    if(chunkLength != 1) return 43;
			
 
				-
			
 
				-    info->background_defined = 1;
			
 
				-    info->background_r = info->background_g = info->background_b = data[0];
			
 
				-  }
			
 
				-  else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    /*error: this chunk must be 2 bytes for greyscale image*/
			
 
				-    if(chunkLength != 2) return 44;
			
 
				-
			
 
				-    info->background_defined = 1;
			
 
				-    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
			
 
				-  }
			
 
				-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    /*error: this chunk must be 6 bytes for greyscale image*/
			
 
				-    if(chunkLength != 6) return 45;
			
 
				-
			
 
				-    info->background_defined = 1;
			
 
				-    info->background_r = 256u * data[0] + data[1];
			
 
				-    info->background_g = 256u * data[2] + data[3];
			
 
				-    info->background_b = 256u * data[4] + data[5];
			
 
				-  }
			
 
				-
			
 
				-  return 0; /* OK */
			
 
				-}
			
 
				-
			
 
				-/*text chunk (tEXt)*/
			
 
				-static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  char *key = 0, *str = 0;
			
 
				-  unsigned i;
			
 
				-
			
 
				-  while(!error) /*not really a while loop, only used to break on error*/
			
 
				-  {
			
 
				-    unsigned length, string2_begin;
			
 
				-
			
 
				-    length = 0;
			
 
				-    while(length < chunkLength && data[length] != 0) ++length;
			
 
				-    /*even though it's not allowed by the standard, no error is thrown if
			
 
				-    there's no null termination char, if the text is empty*/
			
 
				-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
			
 
				-
			
 
				-    key = (char*)lodepng_malloc(length + 1);
			
 
				-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
			
 
				-
			
 
				-    key[length] = 0;
			
 
				-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
			
 
				-
			
 
				-    string2_begin = length + 1; /*skip keyword null terminator*/
			
 
				-
			
 
				-    length = chunkLength < string2_begin ? 0 : chunkLength - string2_begin;
			
 
				-    str = (char*)lodepng_malloc(length + 1);
			
 
				-    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
			
 
				-
			
 
				-    str[length] = 0;
			
 
				-    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
			
 
				-
			
 
				-    error = lodepng_add_text(info, key, str);
			
 
				-
			
 
				-    break;
			
 
				-  }
			
 
				-
			
 
				-  lodepng_free(key);
			
 
				-  lodepng_free(str);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*compressed text chunk (zTXt)*/
			
 
				-static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
			
 
				-                               const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  unsigned i;
			
 
				-
			
 
				-  unsigned length, string2_begin;
			
 
				-  char *key = 0;
			
 
				-  ucvector decoded;
			
 
				-
			
 
				-  ucvector_init(&decoded);
			
 
				-
			
 
				-  while(!error) /*not really a while loop, only used to break on error*/
			
 
				-  {
			
 
				-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
			
 
				-    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
			
 
				-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
			
 
				-
			
 
				-    key = (char*)lodepng_malloc(length + 1);
			
 
				-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
			
 
				-
			
 
				-    key[length] = 0;
			
 
				-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
			
 
				-
			
 
				-    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
			
 
				-
			
 
				-    string2_begin = length + 2;
			
 
				-    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
			
 
				-
			
 
				-    length = chunkLength - string2_begin;
			
 
				-    /*will fail if zlib error, e.g. if length is too small*/
			
 
				-    error = zlib_decompress(&decoded.data, &decoded.size,
			
 
				-                            (unsigned char*)(&data[string2_begin]),
			
 
				-                            length, zlibsettings);
			
 
				-    if(error) break;
			
 
				-    ucvector_push_back(&decoded, 0);
			
 
				-
			
 
				-    error = lodepng_add_text(info, key, (char*)decoded.data);
			
 
				-
			
 
				-    break;
			
 
				-  }
			
 
				-
			
 
				-  lodepng_free(key);
			
 
				-  ucvector_cleanup(&decoded);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*international text chunk (iTXt)*/
			
 
				-static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
			
 
				-                               const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  unsigned i;
			
 
				-
			
 
				-  unsigned length, begin, compressed;
			
 
				-  char *key = 0, *langtag = 0, *transkey = 0;
			
 
				-  ucvector decoded;
			
 
				-  ucvector_init(&decoded);
			
 
				-
			
 
				-  while(!error) /*not really a while loop, only used to break on error*/
			
 
				-  {
			
 
				-    /*Quick check if the chunk length isn't too small. Even without check
			
 
				-    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
			
 
				-    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
			
 
				-
			
 
				-    /*read the key*/
			
 
				-    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
			
 
				-    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
			
 
				-    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
			
 
				-
			
 
				-    key = (char*)lodepng_malloc(length + 1);
			
 
				-    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
			
 
				-
			
 
				-    key[length] = 0;
			
 
				-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
			
 
				-
			
 
				-    /*read the compression method*/
			
 
				-    compressed = data[length + 1];
			
 
				-    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
			
 
				-
			
 
				-    /*even though it's not allowed by the standard, no error is thrown if
			
 
				-    there's no null termination char, if the text is empty for the next 3 texts*/
			
 
				-
			
 
				-    /*read the langtag*/
			
 
				-    begin = length + 3;
			
 
				-    length = 0;
			
 
				-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
			
 
				-
			
 
				-    langtag = (char*)lodepng_malloc(length + 1);
			
 
				-    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
			
 
				-
			
 
				-    langtag[length] = 0;
			
 
				-    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
			
 
				-
			
 
				-    /*read the transkey*/
			
 
				-    begin += length + 1;
			
 
				-    length = 0;
			
 
				-    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
			
 
				-
			
 
				-    transkey = (char*)lodepng_malloc(length + 1);
			
 
				-    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
			
 
				-
			
 
				-    transkey[length] = 0;
			
 
				-    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
			
 
				-
			
 
				-    /*read the actual text*/
			
 
				-    begin += length + 1;
			
 
				-
			
 
				-    length = chunkLength < begin ? 0 : chunkLength - begin;
			
 
				-
			
 
				-    if(compressed)
			
 
				-    {
			
 
				-      /*will fail if zlib error, e.g. if length is too small*/
			
 
				-      error = zlib_decompress(&decoded.data, &decoded.size,
			
 
				-                              (unsigned char*)(&data[begin]),
			
 
				-                              length, zlibsettings);
			
 
				-      if(error) break;
			
 
				-      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
			
 
				-      ucvector_push_back(&decoded, 0);
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
			
 
				-
			
 
				-      decoded.data[length] = 0;
			
 
				-      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
			
 
				-    }
			
 
				-
			
 
				-    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
			
 
				-
			
 
				-    break;
			
 
				-  }
			
 
				-
			
 
				-  lodepng_free(key);
			
 
				-  lodepng_free(langtag);
			
 
				-  lodepng_free(transkey);
			
 
				-  ucvector_cleanup(&decoded);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
			
 
				-
			
 
				-  info->time_defined = 1;
			
 
				-  info->time.year = 256u * data[0] + data[1];
			
 
				-  info->time.month = data[2];
			
 
				-  info->time.day = data[3];
			
 
				-  info->time.hour = data[4];
			
 
				-  info->time.minute = data[5];
			
 
				-  info->time.second = data[6];
			
 
				-
			
 
				-  return 0; /* OK */
			
 
				-}
			
 
				-
			
 
				-static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
			
 
				-{
			
 
				-  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
			
 
				-
			
 
				-  info->phys_defined = 1;
			
 
				-  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
			
 
				-  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
			
 
				-  info->phys_unit = data[8];
			
 
				-
			
 
				-  return 0; /* OK */
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
			
 
				-static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                          LodePNGState* state,
			
 
				-                          const unsigned char* in, size_t insize)
			
 
				-{
			
 
				-  unsigned char IEND = 0;
			
 
				-  const unsigned char* chunk;
			
 
				-  size_t i;
			
 
				-  ucvector idat; /*the data from idat chunks*/
			
 
				-  ucvector scanlines;
			
 
				-  size_t predict;
			
 
				-  size_t numpixels;
			
 
				-  size_t outsize = 0;
			
 
				-
			
 
				-  /*for unknown chunk order*/
			
 
				-  unsigned unknown = 0;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-  /*provide some proper output values if error will happen*/
			
 
				-  *out = 0;
			
 
				-
			
 
				-  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
			
 
				-  if(state->error) return;
			
 
				-
			
 
				-  numpixels = *w * *h;
			
 
				-
			
 
				-  /*multiplication overflow*/
			
 
				-  if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92);
			
 
				-  /*multiplication overflow possible further below. Allows up to 2^31-1 pixel
			
 
				-  bytes with 16-bit RGBA, the rest is room for filter bytes.*/
			
 
				-  if(numpixels > 268435455) CERROR_RETURN(state->error, 92);
			
 
				-
			
 
				-  ucvector_init(&idat);
			
 
				-  chunk = &in[33]; /*first byte of the first chunk after the header*/
			
 
				-
			
 
				-  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
			
 
				-  IDAT data is put at the start of the in buffer*/
			
 
				-  while(!IEND && !state->error)
			
 
				-  {
			
 
				-    unsigned chunkLength;
			
 
				-    const unsigned char* data; /*the data in the chunk*/
			
 
				-
			
 
				-    /*error: size of the in buffer too small to contain next chunk*/
			
 
				-    if((size_t)((chunk - in) + 12) > insize || chunk < in) CERROR_BREAK(state->error, 30);
			
 
				-
			
 
				-    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
			
 
				-    chunkLength = lodepng_chunk_length(chunk);
			
 
				-    /*error: chunk length larger than the max PNG chunk size*/
			
 
				-    if(chunkLength > 2147483647) CERROR_BREAK(state->error, 63);
			
 
				-
			
 
				-    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in)
			
 
				-    {
			
 
				-      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
			
 
				-    }
			
 
				-
			
 
				-    data = lodepng_chunk_data_const(chunk);
			
 
				-
			
 
				-    /*IDAT chunk, containing compressed image data*/
			
 
				-    if(lodepng_chunk_type_equals(chunk, "IDAT"))
			
 
				-    {
			
 
				-      size_t oldsize = idat.size;
			
 
				-      if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
			
 
				-      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-      critical_pos = 3;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    }
			
 
				-    /*IEND chunk*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "IEND"))
			
 
				-    {
			
 
				-      IEND = 1;
			
 
				-    }
			
 
				-    /*palette chunk (PLTE)*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "PLTE"))
			
 
				-    {
			
 
				-      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
			
 
				-      if(state->error) break;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-      critical_pos = 2;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    }
			
 
				-    /*palette transparency chunk (tRNS)*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "tRNS"))
			
 
				-    {
			
 
				-      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-    /*background color chunk (bKGD)*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "bKGD"))
			
 
				-    {
			
 
				-      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-    /*text chunk (tEXt)*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "tEXt"))
			
 
				-    {
			
 
				-      if(state->decoder.read_text_chunks)
			
 
				-      {
			
 
				-        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
			
 
				-        if(state->error) break;
			
 
				-      }
			
 
				-    }
			
 
				-    /*compressed text chunk (zTXt)*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "zTXt"))
			
 
				-    {
			
 
				-      if(state->decoder.read_text_chunks)
			
 
				-      {
			
 
				-        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
			
 
				-        if(state->error) break;
			
 
				-      }
			
 
				-    }
			
 
				-    /*international text chunk (iTXt)*/
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "iTXt"))
			
 
				-    {
			
 
				-      if(state->decoder.read_text_chunks)
			
 
				-      {
			
 
				-        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
			
 
				-        if(state->error) break;
			
 
				-      }
			
 
				-    }
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "tIME"))
			
 
				-    {
			
 
				-      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-    else if(lodepng_chunk_type_equals(chunk, "pHYs"))
			
 
				-    {
			
 
				-      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    else /*it's not an implemented chunk type, so ignore it: skip over the data*/
			
 
				-    {
			
 
				-      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
			
 
				-      if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69);
			
 
				-
			
 
				-      unknown = 1;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-      if(state->decoder.remember_unknown_chunks)
			
 
				-      {
			
 
				-        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
			
 
				-                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
			
 
				-        if(state->error) break;
			
 
				-      }
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    }
			
 
				-
			
 
				-    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/
			
 
				-    {
			
 
				-      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
			
 
				-    }
			
 
				-
			
 
				-    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
			
 
				-  }
			
 
				-
			
 
				-  ucvector_init(&scanlines);
			
 
				-  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
			
 
				-  If the decompressed size does not match the prediction, the image must be corrupt.*/
			
 
				-  if(state->info_png.interlace_method == 0)
			
 
				-  {
			
 
				-    /*The extra *h is added because this are the filter bytes every scanline starts with*/
			
 
				-    predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h;
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/
			
 
				-    const LodePNGColorMode* color = &state->info_png.color;
			
 
				-    predict = 0;
			
 
				-    predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
			
 
				-    if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
			
 
				-    predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3);
			
 
				-    if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2);
			
 
				-    predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2);
			
 
				-    if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1);
			
 
				-    predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1);
			
 
				-  }
			
 
				-  if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/
			
 
				-  if(!state->error)
			
 
				-  {
			
 
				-    state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data,
			
 
				-                                   idat.size, &state->decoder.zlibsettings);
			
 
				-    if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/
			
 
				-  }
			
 
				-  ucvector_cleanup(&idat);
			
 
				-
			
 
				-  if(!state->error)
			
 
				-  {
			
 
				-    outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
			
 
				-    *out = (unsigned char*)lodepng_malloc(outsize);
			
 
				-    if(!*out) state->error = 83; /*alloc fail*/
			
 
				-  }
			
 
				-  if(!state->error)
			
 
				-  {
			
 
				-    for(i = 0; i < outsize; i++) (*out)[i] = 0;
			
 
				-    state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png);
			
 
				-  }
			
 
				-  ucvector_cleanup(&scanlines);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                        LodePNGState* state,
			
 
				-                        const unsigned char* in, size_t insize)
			
 
				-{
			
 
				-  *out = 0;
			
 
				-  decodeGeneric(out, w, h, state, in, insize);
			
 
				-  if(state->error) return state->error;
			
 
				-  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color))
			
 
				-  {
			
 
				-    /*same color type, no copying or converting of data needed*/
			
 
				-    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
			
 
				-    the raw image has to the end user*/
			
 
				-    if(!state->decoder.color_convert)
			
 
				-    {
			
 
				-      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
			
 
				-      if(state->error) return state->error;
			
 
				-    }
			
 
				-  }
			
 
				-  else
			
 
				-  {
			
 
				-    /*color conversion needed; sort of copy of the data*/
			
 
				-    unsigned char* data = *out;
			
 
				-    size_t outsize;
			
 
				-
			
 
				-    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
			
 
				-    from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/
			
 
				-    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
			
 
				-       && !(state->info_raw.bitdepth == 8))
			
 
				-    {
			
 
				-      return 56; /*unsupported color mode conversion*/
			
 
				-    }
			
 
				-
			
 
				-    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
			
 
				-    *out = (unsigned char*)lodepng_malloc(outsize);
			
 
				-    if(!(*out))
			
 
				-    {
			
 
				-      state->error = 83; /*alloc fail*/
			
 
				-    }
			
 
				-    else state->error = lodepng_convert(*out, data, &state->info_raw,
			
 
				-                                        &state->info_png.color, *w, *h);
			
 
				-    lodepng_free(data);
			
 
				-  }
			
 
				-  return state->error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
			
 
				-                               size_t insize, LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  unsigned error;
			
 
				-  LodePNGState state;
			
 
				-  lodepng_state_init(&state);
			
 
				-  state.info_raw.colortype = colortype;
			
 
				-  state.info_raw.bitdepth = bitdepth;
			
 
				-  error = lodepng_decode(out, w, h, &state, in, insize);
			
 
				-  lodepng_state_cleanup(&state);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
			
 
				-{
			
 
				-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
			
 
				-{
			
 
				-  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
			
 
				-                             LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  unsigned char* buffer = 0;
			
 
				-  size_t buffersize;
			
 
				-  unsigned error;
			
 
				-  error = lodepng_load_file(&buffer, &buffersize, filename);
			
 
				-  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
			
 
				-  lodepng_free(buffer);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
			
 
				-{
			
 
				-  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
			
 
				-{
			
 
				-  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DISK*/
			
 
				-
			
 
				-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings)
			
 
				-{
			
 
				-  settings->color_convert = 1;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  settings->read_text_chunks = 1;
			
 
				-  settings->remember_unknown_chunks = 0;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-  settings->ignore_crc = 0;
			
 
				-  lodepng_decompress_settings_init(&settings->zlibsettings);
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
			
 
				-
			
 
				-void lodepng_state_init(LodePNGState* state)
			
 
				-{
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-  lodepng_decoder_settings_init(&state->decoder);
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-  lodepng_encoder_settings_init(&state->encoder);
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-  lodepng_color_mode_init(&state->info_raw);
			
 
				-  lodepng_info_init(&state->info_png);
			
 
				-  state->error = 1;
			
 
				-}
			
 
				-
			
 
				-void lodepng_state_cleanup(LodePNGState* state)
			
 
				-{
			
 
				-  lodepng_color_mode_cleanup(&state->info_raw);
			
 
				-  lodepng_info_cleanup(&state->info_png);
			
 
				-}
			
 
				-
			
 
				-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source)
			
 
				-{
			
 
				-  lodepng_state_cleanup(dest);
			
 
				-  *dest = *source;
			
 
				-  lodepng_color_mode_init(&dest->info_raw);
			
 
				-  lodepng_info_init(&dest->info_png);
			
 
				-  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
			
 
				-  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
			
 
				-}
			
 
				-
			
 
				-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* / PNG Encoder                                                            / */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-/*chunkName must be string of 4 characters*/
			
 
				-static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length)
			
 
				-{
			
 
				-  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
			
 
				-  out->allocsize = out->size; /*fix the allocsize again*/
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-static void writeSignature(ucvector* out)
			
 
				-{
			
 
				-  /*8 bytes PNG signature, aka the magic bytes*/
			
 
				-  ucvector_push_back(out, 137);
			
 
				-  ucvector_push_back(out, 80);
			
 
				-  ucvector_push_back(out, 78);
			
 
				-  ucvector_push_back(out, 71);
			
 
				-  ucvector_push_back(out, 13);
			
 
				-  ucvector_push_back(out, 10);
			
 
				-  ucvector_push_back(out, 26);
			
 
				-  ucvector_push_back(out, 10);
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
			
 
				-                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  ucvector header;
			
 
				-  ucvector_init(&header);
			
 
				-
			
 
				-  lodepng_add32bitInt(&header, w); /*width*/
			
 
				-  lodepng_add32bitInt(&header, h); /*height*/
			
 
				-  ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/
			
 
				-  ucvector_push_back(&header, (unsigned char)colortype); /*color type*/
			
 
				-  ucvector_push_back(&header, 0); /*compression method*/
			
 
				-  ucvector_push_back(&header, 0); /*filter method*/
			
 
				-  ucvector_push_back(&header, interlace_method); /*interlace method*/
			
 
				-
			
 
				-  error = addChunk(out, "IHDR", header.data, header.size);
			
 
				-  ucvector_cleanup(&header);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  size_t i;
			
 
				-  ucvector PLTE;
			
 
				-  ucvector_init(&PLTE);
			
 
				-  for(i = 0; i != info->palettesize * 4; ++i)
			
 
				-  {
			
 
				-    /*add all channels except alpha channel*/
			
 
				-    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
			
 
				-  }
			
 
				-  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
			
 
				-  ucvector_cleanup(&PLTE);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  size_t i;
			
 
				-  ucvector tRNS;
			
 
				-  ucvector_init(&tRNS);
			
 
				-  if(info->colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    size_t amount = info->palettesize;
			
 
				-    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
			
 
				-    for(i = info->palettesize; i != 0; --i)
			
 
				-    {
			
 
				-      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
			
 
				-      else break;
			
 
				-    }
			
 
				-    /*add only alpha channel*/
			
 
				-    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
			
 
				-  }
			
 
				-  else if(info->colortype == LCT_GREY)
			
 
				-  {
			
 
				-    if(info->key_defined)
			
 
				-    {
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
			
 
				-    }
			
 
				-  }
			
 
				-  else if(info->colortype == LCT_RGB)
			
 
				-  {
			
 
				-    if(info->key_defined)
			
 
				-    {
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
			
 
				-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
			
 
				-  ucvector_cleanup(&tRNS);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
			
 
				-                              LodePNGCompressSettings* zlibsettings)
			
 
				-{
			
 
				-  ucvector zlibdata;
			
 
				-  unsigned error = 0;
			
 
				-
			
 
				-  /*compress with the Zlib compressor*/
			
 
				-  ucvector_init(&zlibdata);
			
 
				-  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
			
 
				-  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
			
 
				-  ucvector_cleanup(&zlibdata);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_IEND(ucvector* out)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  error = addChunk(out, "IEND", 0, 0);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-
			
 
				-static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  size_t i;
			
 
				-  ucvector text;
			
 
				-  ucvector_init(&text);
			
 
				-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
			
 
				-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
			
 
				-  ucvector_push_back(&text, 0); /*0 termination char*/
			
 
				-  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
			
 
				-  error = addChunk(out, "tEXt", text.data, text.size);
			
 
				-  ucvector_cleanup(&text);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
			
 
				-                              LodePNGCompressSettings* zlibsettings)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  ucvector data, compressed;
			
 
				-  size_t i, textsize = strlen(textstring);
			
 
				-
			
 
				-  ucvector_init(&data);
			
 
				-  ucvector_init(&compressed);
			
 
				-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
			
 
				-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
			
 
				-  ucvector_push_back(&data, 0); /*0 termination char*/
			
 
				-  ucvector_push_back(&data, 0); /*compression method: 0*/
			
 
				-
			
 
				-  error = zlib_compress(&compressed.data, &compressed.size,
			
 
				-                        (unsigned char*)textstring, textsize, zlibsettings);
			
 
				-  if(!error)
			
 
				-  {
			
 
				-    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
			
 
				-    error = addChunk(out, "zTXt", data.data, data.size);
			
 
				-  }
			
 
				-
			
 
				-  ucvector_cleanup(&compressed);
			
 
				-  ucvector_cleanup(&data);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
			
 
				-                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  ucvector data;
			
 
				-  size_t i, textsize = strlen(textstring);
			
 
				-
			
 
				-  ucvector_init(&data);
			
 
				-
			
 
				-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
			
 
				-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
			
 
				-  ucvector_push_back(&data, 0); /*null termination char*/
			
 
				-  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
			
 
				-  ucvector_push_back(&data, 0); /*compression method*/
			
 
				-  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
			
 
				-  ucvector_push_back(&data, 0); /*null termination char*/
			
 
				-  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
			
 
				-  ucvector_push_back(&data, 0); /*null termination char*/
			
 
				-
			
 
				-  if(compressed)
			
 
				-  {
			
 
				-    ucvector compressed_data;
			
 
				-    ucvector_init(&compressed_data);
			
 
				-    error = zlib_compress(&compressed_data.data, &compressed_data.size,
			
 
				-                          (unsigned char*)textstring, textsize, zlibsettings);
			
 
				-    if(!error)
			
 
				-    {
			
 
				-      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
			
 
				-    }
			
 
				-    ucvector_cleanup(&compressed_data);
			
 
				-  }
			
 
				-  else /*not compressed*/
			
 
				-  {
			
 
				-    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
			
 
				-  }
			
 
				-
			
 
				-  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
			
 
				-  ucvector_cleanup(&data);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  ucvector bKGD;
			
 
				-  ucvector_init(&bKGD);
			
 
				-  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
			
 
				-  {
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
			
 
				-  }
			
 
				-  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
			
 
				-  {
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8));
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255));
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8));
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255));
			
 
				-  }
			
 
				-  else if(info->color.colortype == LCT_PALETTE)
			
 
				-  {
			
 
				-    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/
			
 
				-  }
			
 
				-
			
 
				-  error = addChunk(out, "bKGD", bKGD.data, bKGD.size);
			
 
				-  ucvector_cleanup(&bKGD);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  unsigned char* data = (unsigned char*)lodepng_malloc(7);
			
 
				-  if(!data) return 83; /*alloc fail*/
			
 
				-  data[0] = (unsigned char)(time->year >> 8);
			
 
				-  data[1] = (unsigned char)(time->year & 255);
			
 
				-  data[2] = (unsigned char)time->month;
			
 
				-  data[3] = (unsigned char)time->day;
			
 
				-  data[4] = (unsigned char)time->hour;
			
 
				-  data[5] = (unsigned char)time->minute;
			
 
				-  data[6] = (unsigned char)time->second;
			
 
				-  error = addChunk(out, "tIME", data, 7);
			
 
				-  lodepng_free(data);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info)
			
 
				-{
			
 
				-  unsigned error = 0;
			
 
				-  ucvector data;
			
 
				-  ucvector_init(&data);
			
 
				-
			
 
				-  lodepng_add32bitInt(&data, info->phys_x);
			
 
				-  lodepng_add32bitInt(&data, info->phys_y);
			
 
				-  ucvector_push_back(&data, info->phys_unit);
			
 
				-
			
 
				-  error = addChunk(out, "pHYs", data.data, data.size);
			
 
				-  ucvector_cleanup(&data);
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
			
 
				-                           size_t length, size_t bytewidth, unsigned char filterType)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  switch(filterType)
			
 
				-  {
			
 
				-    case 0: /*None*/
			
 
				-      for(i = 0; i != length; ++i) out[i] = scanline[i];
			
 
				-      break;
			
 
				-    case 1: /*Sub*/
			
 
				-      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
			
 
				-      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
			
 
				-      break;
			
 
				-    case 2: /*Up*/
			
 
				-      if(prevline)
			
 
				-      {
			
 
				-        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        for(i = 0; i != length; ++i) out[i] = scanline[i];
			
 
				-      }
			
 
				-      break;
			
 
				-    case 3: /*Average*/
			
 
				-      if(prevline)
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
			
 
				-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
			
 
				-        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
			
 
				-      }
			
 
				-      break;
			
 
				-    case 4: /*Paeth*/
			
 
				-      if(prevline)
			
 
				-      {
			
 
				-        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
			
 
				-        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
			
 
				-        for(i = bytewidth; i < length; ++i)
			
 
				-        {
			
 
				-          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
			
 
				-        }
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
			
 
				-        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
			
 
				-        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
			
 
				-      }
			
 
				-      break;
			
 
				-    default: return; /*unexisting filter type given*/
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/* log2 approximation. A slight bit faster than std::log. */
			
 
				-static float flog2(float f)
			
 
				-{
			
 
				-  float result = 0;
			
 
				-  while(f > 32) { result += 4; f /= 16; }
			
 
				-  while(f > 2) { ++result; f /= 2; }
			
 
				-  return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f);
			
 
				-}
			
 
				-
			
 
				-static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
			
 
				-                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings)
			
 
				-{
			
 
				-  /*
			
 
				-  For PNG filter method 0
			
 
				-  out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are
			
 
				-  the scanlines with 1 extra byte per scanline
			
 
				-  */
			
 
				-
			
 
				-  unsigned bpp = lodepng_get_bpp(info);
			
 
				-  /*the width of a scanline in bytes, not including the filter type*/
			
 
				-  size_t linebytes = (w * bpp + 7) / 8;
			
 
				-  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
			
 
				-  size_t bytewidth = (bpp + 7) / 8;
			
 
				-  const unsigned char* prevline = 0;
			
 
				-  unsigned x, y;
			
 
				-  unsigned error = 0;
			
 
				-  LodePNGFilterStrategy strategy = settings->filter_strategy;
			
 
				-
			
 
				-  /*
			
 
				-  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
			
 
				-   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
			
 
				-      use fixed filtering, with the filter None).
			
 
				-   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
			
 
				-     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
			
 
				-     all five filters and select the filter that produces the smallest sum of absolute values per row.
			
 
				-  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
			
 
				-
			
 
				-  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
			
 
				-  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
			
 
				-  heuristic is used.
			
 
				-  */
			
 
				-  if(settings->filter_palette_zero &&
			
 
				-     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
			
 
				-
			
 
				-  if(bpp == 0) return 31; /*error: invalid color type*/
			
 
				-
			
 
				-  if(strategy == LFS_ZERO)
			
 
				-  {
			
 
				-    for(y = 0; y != h; ++y)
			
 
				-    {
			
 
				-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
			
 
				-      size_t inindex = linebytes * y;
			
 
				-      out[outindex] = 0; /*filter type byte*/
			
 
				-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0);
			
 
				-      prevline = &in[inindex];
			
 
				-    }
			
 
				-  }
			
 
				-  else if(strategy == LFS_MINSUM)
			
 
				-  {
			
 
				-    /*adaptive filtering*/
			
 
				-    size_t sum[5];
			
 
				-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
			
 
				-    size_t smallest = 0;
			
 
				-    unsigned char type, bestType = 0;
			
 
				-
			
 
				-    for(type = 0; type != 5; ++type)
			
 
				-    {
			
 
				-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
			
 
				-      if(!attempt[type]) return 83; /*alloc fail*/
			
 
				-    }
			
 
				-
			
 
				-    if(!error)
			
 
				-    {
			
 
				-      for(y = 0; y != h; ++y)
			
 
				-      {
			
 
				-        /*try the 5 filter types*/
			
 
				-        for(type = 0; type != 5; ++type)
			
 
				-        {
			
 
				-          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
			
 
				-
			
 
				-          /*calculate the sum of the result*/
			
 
				-          sum[type] = 0;
			
 
				-          if(type == 0)
			
 
				-          {
			
 
				-            for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]);
			
 
				-          }
			
 
				-          else
			
 
				-          {
			
 
				-            for(x = 0; x != linebytes; ++x)
			
 
				-            {
			
 
				-              /*For differences, each byte should be treated as signed, values above 127 are negative
			
 
				-              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
			
 
				-              This means filtertype 0 is almost never chosen, but that is justified.*/
			
 
				-              unsigned char s = attempt[type][x];
			
 
				-              sum[type] += s < 128 ? s : (255U - s);
			
 
				-            }
			
 
				-          }
			
 
				-
			
 
				-          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
			
 
				-          if(type == 0 || sum[type] < smallest)
			
 
				-          {
			
 
				-            bestType = type;
			
 
				-            smallest = sum[type];
			
 
				-          }
			
 
				-        }
			
 
				-
			
 
				-        prevline = &in[y * linebytes];
			
 
				-
			
 
				-        /*now fill the out values*/
			
 
				-        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
			
 
				-        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
			
 
				-  }
			
 
				-  else if(strategy == LFS_ENTROPY)
			
 
				-  {
			
 
				-    float sum[5];
			
 
				-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
			
 
				-    float smallest = 0;
			
 
				-    unsigned type, bestType = 0;
			
 
				-    unsigned count[256];
			
 
				-
			
 
				-    for(type = 0; type != 5; ++type)
			
 
				-    {
			
 
				-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
			
 
				-      if(!attempt[type]) return 83; /*alloc fail*/
			
 
				-    }
			
 
				-
			
 
				-    for(y = 0; y != h; ++y)
			
 
				-    {
			
 
				-      /*try the 5 filter types*/
			
 
				-      for(type = 0; type != 5; ++type)
			
 
				-      {
			
 
				-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
			
 
				-        for(x = 0; x != 256; ++x) count[x] = 0;
			
 
				-        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
			
 
				-        ++count[type]; /*the filter type itself is part of the scanline*/
			
 
				-        sum[type] = 0;
			
 
				-        for(x = 0; x != 256; ++x)
			
 
				-        {
			
 
				-          float p = count[x] / (float)(linebytes + 1);
			
 
				-          sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p;
			
 
				-        }
			
 
				-        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
			
 
				-        if(type == 0 || sum[type] < smallest)
			
 
				-        {
			
 
				-          bestType = type;
			
 
				-          smallest = sum[type];
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      prevline = &in[y * linebytes];
			
 
				-
			
 
				-      /*now fill the out values*/
			
 
				-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
			
 
				-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
			
 
				-    }
			
 
				-
			
 
				-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
			
 
				-  }
			
 
				-  else if(strategy == LFS_PREDEFINED)
			
 
				-  {
			
 
				-    for(y = 0; y != h; ++y)
			
 
				-    {
			
 
				-      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
			
 
				-      size_t inindex = linebytes * y;
			
 
				-      unsigned char type = settings->predefined_filters[y];
			
 
				-      out[outindex] = type; /*filter type byte*/
			
 
				-      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
			
 
				-      prevline = &in[inindex];
			
 
				-    }
			
 
				-  }
			
 
				-  else if(strategy == LFS_BRUTE_FORCE)
			
 
				-  {
			
 
				-    /*brute force filter chooser.
			
 
				-    deflate the scanline after every filter attempt to see which one deflates best.
			
 
				-    This is very slow and gives only slightly smaller, sometimes even larger, result*/
			
 
				-    size_t size[5];
			
 
				-    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
			
 
				-    size_t smallest = 0;
			
 
				-    unsigned type = 0, bestType = 0;
			
 
				-    unsigned char* dummy;
			
 
				-    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
			
 
				-    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
			
 
				-    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
			
 
				-    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
			
 
				-    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
			
 
				-    zlibsettings.btype = 1;
			
 
				-    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
			
 
				-    images only, so disable it*/
			
 
				-    zlibsettings.custom_zlib = 0;
			
 
				-    zlibsettings.custom_deflate = 0;
			
 
				-    for(type = 0; type != 5; ++type)
			
 
				-    {
			
 
				-      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
			
 
				-      if(!attempt[type]) return 83; /*alloc fail*/
			
 
				-    }
			
 
				-    for(y = 0; y != h; ++y) /*try the 5 filter types*/
			
 
				-    {
			
 
				-      for(type = 0; type != 5; ++type)
			
 
				-      {
			
 
				-        unsigned testsize = linebytes;
			
 
				-        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
			
 
				-
			
 
				-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
			
 
				-        size[type] = 0;
			
 
				-        dummy = 0;
			
 
				-        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
			
 
				-        lodepng_free(dummy);
			
 
				-        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
			
 
				-        if(type == 0 || size[type] < smallest)
			
 
				-        {
			
 
				-          bestType = type;
			
 
				-          smallest = size[type];
			
 
				-        }
			
 
				-      }
			
 
				-      prevline = &in[y * linebytes];
			
 
				-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
			
 
				-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
			
 
				-    }
			
 
				-    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
			
 
				-  }
			
 
				-  else return 88; /* unknown filter strategy */
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-static void addPaddingBits(unsigned char* out, const unsigned char* in,
			
 
				-                           size_t olinebits, size_t ilinebits, unsigned h)
			
 
				-{
			
 
				-  /*The opposite of the removePaddingBits function
			
 
				-  olinebits must be >= ilinebits*/
			
 
				-  unsigned y;
			
 
				-  size_t diff = olinebits - ilinebits;
			
 
				-  size_t obp = 0, ibp = 0; /*bit pointers*/
			
 
				-  for(y = 0; y != h; ++y)
			
 
				-  {
			
 
				-    size_t x;
			
 
				-    for(x = 0; x < ilinebits; ++x)
			
 
				-    {
			
 
				-      unsigned char bit = readBitFromReversedStream(&ibp, in);
			
 
				-      setBitOfReversedStream(&obp, out, bit);
			
 
				-    }
			
 
				-    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
			
 
				-    "Use of uninitialised value of size ###" warning from valgrind*/
			
 
				-    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-in: non-interlaced image with size w*h
			
 
				-out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
			
 
				- no padding bits between scanlines, but between reduced images so that each
			
 
				- reduced image starts at a byte.
			
 
				-bpp: bits per pixel
			
 
				-there are no padding bits, not between scanlines, not between reduced images
			
 
				-in has the following size in bits: w * h * bpp.
			
 
				-out is possibly bigger due to padding bits between reduced images
			
 
				-NOTE: comments about padding bits are only relevant if bpp < 8
			
 
				-*/
			
 
				-static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
			
 
				-{
			
 
				-  unsigned passw[7], passh[7];
			
 
				-  size_t filter_passstart[8], padded_passstart[8], passstart[8];
			
 
				-  unsigned i;
			
 
				-
			
 
				-  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
			
 
				-
			
 
				-  if(bpp >= 8)
			
 
				-  {
			
 
				-    for(i = 0; i != 7; ++i)
			
 
				-    {
			
 
				-      unsigned x, y, b;
			
 
				-      size_t bytewidth = bpp / 8;
			
 
				-      for(y = 0; y < passh[i]; ++y)
			
 
				-      for(x = 0; x < passw[i]; ++x)
			
 
				-      {
			
 
				-        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
			
 
				-        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
			
 
				-        for(b = 0; b < bytewidth; ++b)
			
 
				-        {
			
 
				-          out[pixeloutstart + b] = in[pixelinstart + b];
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
			
 
				-  {
			
 
				-    for(i = 0; i != 7; ++i)
			
 
				-    {
			
 
				-      unsigned x, y, b;
			
 
				-      unsigned ilinebits = bpp * passw[i];
			
 
				-      unsigned olinebits = bpp * w;
			
 
				-      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
			
 
				-      for(y = 0; y < passh[i]; ++y)
			
 
				-      for(x = 0; x < passw[i]; ++x)
			
 
				-      {
			
 
				-        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
			
 
				-        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
			
 
				-        for(b = 0; b < bpp; ++b)
			
 
				-        {
			
 
				-          unsigned char bit = readBitFromReversedStream(&ibp, in);
			
 
				-          setBitOfReversedStream(&obp, out, bit);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
			
 
				-return value is error**/
			
 
				-static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
			
 
				-                                    unsigned w, unsigned h,
			
 
				-                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings)
			
 
				-{
			
 
				-  /*
			
 
				-  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
			
 
				-  *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter
			
 
				-  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
			
 
				-  */
			
 
				-  unsigned bpp = lodepng_get_bpp(&info_png->color);
			
 
				-  unsigned error = 0;
			
 
				-
			
 
				-  if(info_png->interlace_method == 0)
			
 
				-  {
			
 
				-    *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/
			
 
				-    *out = (unsigned char*)lodepng_malloc(*outsize);
			
 
				-    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
			
 
				-
			
 
				-    if(!error)
			
 
				-    {
			
 
				-      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
			
 
				-      if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
			
 
				-      {
			
 
				-        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8));
			
 
				-        if(!padded) error = 83; /*alloc fail*/
			
 
				-        if(!error)
			
 
				-        {
			
 
				-          addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h);
			
 
				-          error = filter(*out, padded, w, h, &info_png->color, settings);
			
 
				-        }
			
 
				-        lodepng_free(padded);
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        /*we can immediately filter into the out buffer, no other steps needed*/
			
 
				-        error = filter(*out, in, w, h, &info_png->color, settings);
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  else /*interlace_method is 1 (Adam7)*/
			
 
				-  {
			
 
				-    unsigned passw[7], passh[7];
			
 
				-    size_t filter_passstart[8], padded_passstart[8], passstart[8];
			
 
				-    unsigned char* adam7;
			
 
				-
			
 
				-    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
			
 
				-
			
 
				-    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
			
 
				-    *out = (unsigned char*)lodepng_malloc(*outsize);
			
 
				-    if(!(*out)) error = 83; /*alloc fail*/
			
 
				-
			
 
				-    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
			
 
				-    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
			
 
				-
			
 
				-    if(!error)
			
 
				-    {
			
 
				-      unsigned i;
			
 
				-
			
 
				-      Adam7_interlace(adam7, in, w, h, bpp);
			
 
				-      for(i = 0; i != 7; ++i)
			
 
				-      {
			
 
				-        if(bpp < 8)
			
 
				-        {
			
 
				-          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
			
 
				-          if(!padded) ERROR_BREAK(83); /*alloc fail*/
			
 
				-          addPaddingBits(padded, &adam7[passstart[i]],
			
 
				-                         ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]);
			
 
				-          error = filter(&(*out)[filter_passstart[i]], padded,
			
 
				-                         passw[i], passh[i], &info_png->color, settings);
			
 
				-          lodepng_free(padded);
			
 
				-        }
			
 
				-        else
			
 
				-        {
			
 
				-          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
			
 
				-                         passw[i], passh[i], &info_png->color, settings);
			
 
				-        }
			
 
				-
			
 
				-        if(error) break;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    lodepng_free(adam7);
			
 
				-  }
			
 
				-
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
			
 
				-returns 0 if the palette is opaque,
			
 
				-returns 1 if the palette has a single color with alpha 0 ==> color key
			
 
				-returns 2 if the palette is semi-translucent.
			
 
				-*/
			
 
				-static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize)
			
 
				-{
			
 
				-  size_t i;
			
 
				-  unsigned key = 0;
			
 
				-  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
			
 
				-  for(i = 0; i != palettesize; ++i)
			
 
				-  {
			
 
				-    if(!key && palette[4 * i + 3] == 0)
			
 
				-    {
			
 
				-      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
			
 
				-      key = 1;
			
 
				-      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
			
 
				-    }
			
 
				-    else if(palette[4 * i + 3] != 255) return 2;
			
 
				-    /*when key, no opaque RGB may have key's RGB*/
			
 
				-    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
			
 
				-  }
			
 
				-  return key;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize)
			
 
				-{
			
 
				-  unsigned char* inchunk = data;
			
 
				-  while((size_t)(inchunk - data) < datasize)
			
 
				-  {
			
 
				-    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
			
 
				-    out->allocsize = out->size; /*fix the allocsize again*/
			
 
				-    inchunk = lodepng_chunk_next(inchunk);
			
 
				-  }
			
 
				-  return 0;
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
			
 
				-                        const unsigned char* image, unsigned w, unsigned h,
			
 
				-                        LodePNGState* state)
			
 
				-{
			
 
				-  LodePNGInfo info;
			
 
				-  ucvector outv;
			
 
				-  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
			
 
				-  size_t datasize = 0;
			
 
				-
			
 
				-  /*provide some proper output values if error will happen*/
			
 
				-  *out = 0;
			
 
				-  *outsize = 0;
			
 
				-  state->error = 0;
			
 
				-
			
 
				-  lodepng_info_init(&info);
			
 
				-  lodepng_info_copy(&info, &state->info_png);
			
 
				-
			
 
				-  if((info.color.colortype == LCT_PALETTE || state->encoder.force_palette)
			
 
				-      && (info.color.palettesize == 0 || info.color.palettesize > 256))
			
 
				-  {
			
 
				-    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
			
 
				-    return state->error;
			
 
				-  }
			
 
				-
			
 
				-  if(state->encoder.auto_convert)
			
 
				-  {
			
 
				-    state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw);
			
 
				-  }
			
 
				-  if(state->error) return state->error;
			
 
				-
			
 
				-  if(state->encoder.zlibsettings.btype > 2)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/
			
 
				-  }
			
 
				-  if(state->info_png.interlace_method > 1)
			
 
				-  {
			
 
				-    CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/
			
 
				-  }
			
 
				-
			
 
				-  state->error = checkColorValidity(info.color.colortype, info.color.bitdepth);
			
 
				-  if(state->error) return state->error; /*error: unexisting color type given*/
			
 
				-  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
			
 
				-  if(state->error) return state->error; /*error: unexisting color type given*/
			
 
				-
			
 
				-  if(!lodepng_color_mode_equal(&state->info_raw, &info.color))
			
 
				-  {
			
 
				-    unsigned char* converted;
			
 
				-    size_t size = (w * h * (size_t)lodepng_get_bpp(&info.color) + 7) / 8;
			
 
				-
			
 
				-    converted = (unsigned char*)lodepng_malloc(size);
			
 
				-    if(!converted && size) state->error = 83; /*alloc fail*/
			
 
				-    if(!state->error)
			
 
				-    {
			
 
				-      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
			
 
				-    }
			
 
				-    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
			
 
				-    lodepng_free(converted);
			
 
				-  }
			
 
				-  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
			
 
				-
			
 
				-  ucvector_init(&outv);
			
 
				-  while(!state->error) /*while only executed once, to break on error*/
			
 
				-  {
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-    size_t i;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    /*write signature and chunks*/
			
 
				-    writeSignature(&outv);
			
 
				-    /*IHDR*/
			
 
				-    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-    /*unknown chunks between IHDR and PLTE*/
			
 
				-    if(info.unknown_chunks_data[0])
			
 
				-    {
			
 
				-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    /*PLTE*/
			
 
				-    if(info.color.colortype == LCT_PALETTE)
			
 
				-    {
			
 
				-      addChunk_PLTE(&outv, &info.color);
			
 
				-    }
			
 
				-    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA))
			
 
				-    {
			
 
				-      addChunk_PLTE(&outv, &info.color);
			
 
				-    }
			
 
				-    /*tRNS*/
			
 
				-    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0)
			
 
				-    {
			
 
				-      addChunk_tRNS(&outv, &info.color);
			
 
				-    }
			
 
				-    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined)
			
 
				-    {
			
 
				-      addChunk_tRNS(&outv, &info.color);
			
 
				-    }
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-    /*bKGD (must come between PLTE and the IDAt chunks*/
			
 
				-    if(info.background_defined) addChunk_bKGD(&outv, &info);
			
 
				-    /*pHYs (must come before the IDAT chunks)*/
			
 
				-    if(info.phys_defined) addChunk_pHYs(&outv, &info);
			
 
				-
			
 
				-    /*unknown chunks between PLTE and IDAT*/
			
 
				-    if(info.unknown_chunks_data[1])
			
 
				-    {
			
 
				-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    /*IDAT (multiple IDAT chunks must be consecutive)*/
			
 
				-    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
			
 
				-    if(state->error) break;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-    /*tIME*/
			
 
				-    if(info.time_defined) addChunk_tIME(&outv, &info.time);
			
 
				-    /*tEXt and/or zTXt*/
			
 
				-    for(i = 0; i != info.text_num; ++i)
			
 
				-    {
			
 
				-      if(strlen(info.text_keys[i]) > 79)
			
 
				-      {
			
 
				-        state->error = 66; /*text chunk too large*/
			
 
				-        break;
			
 
				-      }
			
 
				-      if(strlen(info.text_keys[i]) < 1)
			
 
				-      {
			
 
				-        state->error = 67; /*text chunk too small*/
			
 
				-        break;
			
 
				-      }
			
 
				-      if(state->encoder.text_compression)
			
 
				-      {
			
 
				-        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
			
 
				-      }
			
 
				-    }
			
 
				-    /*LodePNG version id in text chunk*/
			
 
				-    if(state->encoder.add_id)
			
 
				-    {
			
 
				-      unsigned alread_added_id_text = 0;
			
 
				-      for(i = 0; i != info.text_num; ++i)
			
 
				-      {
			
 
				-        if(!strcmp(info.text_keys[i], "LodePNG"))
			
 
				-        {
			
 
				-          alread_added_id_text = 1;
			
 
				-          break;
			
 
				-        }
			
 
				-      }
			
 
				-      if(alread_added_id_text == 0)
			
 
				-      {
			
 
				-        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
			
 
				-      }
			
 
				-    }
			
 
				-    /*iTXt*/
			
 
				-    for(i = 0; i != info.itext_num; ++i)
			
 
				-    {
			
 
				-      if(strlen(info.itext_keys[i]) > 79)
			
 
				-      {
			
 
				-        state->error = 66; /*text chunk too large*/
			
 
				-        break;
			
 
				-      }
			
 
				-      if(strlen(info.itext_keys[i]) < 1)
			
 
				-      {
			
 
				-        state->error = 67; /*text chunk too small*/
			
 
				-        break;
			
 
				-      }
			
 
				-      addChunk_iTXt(&outv, state->encoder.text_compression,
			
 
				-                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
			
 
				-                    &state->encoder.zlibsettings);
			
 
				-    }
			
 
				-
			
 
				-    /*unknown chunks between IDAT and IEND*/
			
 
				-    if(info.unknown_chunks_data[2])
			
 
				-    {
			
 
				-      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
			
 
				-      if(state->error) break;
			
 
				-    }
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-    addChunk_IEND(&outv);
			
 
				-
			
 
				-    break; /*this isn't really a while loop; no error happened so break out now!*/
			
 
				-  }
			
 
				-
			
 
				-  lodepng_info_cleanup(&info);
			
 
				-  lodepng_free(data);
			
 
				-  /*instead of cleaning the vector up, give it to the output*/
			
 
				-  *out = outv.data;
			
 
				-  *outsize = outv.size;
			
 
				-
			
 
				-  return state->error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
			
 
				-                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  unsigned error;
			
 
				-  LodePNGState state;
			
 
				-  lodepng_state_init(&state);
			
 
				-  state.info_raw.colortype = colortype;
			
 
				-  state.info_raw.bitdepth = bitdepth;
			
 
				-  state.info_png.color.colortype = colortype;
			
 
				-  state.info_png.color.bitdepth = bitdepth;
			
 
				-  lodepng_encode(out, outsize, image, w, h, &state);
			
 
				-  error = state.error;
			
 
				-  lodepng_state_cleanup(&state);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
			
 
				-{
			
 
				-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
			
 
				-{
			
 
				-  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
			
 
				-                             LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  unsigned char* buffer;
			
 
				-  size_t buffersize;
			
 
				-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
			
 
				-  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
			
 
				-  lodepng_free(buffer);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
			
 
				-{
			
 
				-  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
			
 
				-}
			
 
				-
			
 
				-unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
			
 
				-{
			
 
				-  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_DISK*/
			
 
				-
			
 
				-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings)
			
 
				-{
			
 
				-  lodepng_compress_settings_init(&settings->zlibsettings);
			
 
				-  settings->filter_palette_zero = 1;
			
 
				-  settings->filter_strategy = LFS_MINSUM;
			
 
				-  settings->auto_convert = 1;
			
 
				-  settings->force_palette = 0;
			
 
				-  settings->predefined_filters = 0;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  settings->add_id = 0;
			
 
				-  settings->text_compression = 1;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-}
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-#endif /*LODEPNG_COMPILE_PNG*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ERROR_TEXT
			
 
				-/*
			
 
				-This returns the description of a numerical error code in English. This is also
			
 
				-the documentation of all the error codes.
			
 
				-*/
			
 
				-const char* lodepng_error_text(unsigned code)
			
 
				-{
			
 
				-  switch(code)
			
 
				-  {
			
 
				-    case 0: return "no error, everything went ok";
			
 
				-    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
			
 
				-    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
			
 
				-    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
			
 
				-    case 13: return "problem while processing dynamic deflate block";
			
 
				-    case 14: return "problem while processing dynamic deflate block";
			
 
				-    case 15: return "problem while processing dynamic deflate block";
			
 
				-    case 16: return "unexisting code while processing dynamic deflate block";
			
 
				-    case 17: return "end of out buffer memory reached while inflating";
			
 
				-    case 18: return "invalid distance code while inflating";
			
 
				-    case 19: return "end of out buffer memory reached while inflating";
			
 
				-    case 20: return "invalid deflate block BTYPE encountered while decoding";
			
 
				-    case 21: return "NLEN is not ones complement of LEN in a deflate block";
			
 
				-     /*end of out buffer memory reached while inflating:
			
 
				-     This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
			
 
				-     all the pixels of the image, given the color depth and image dimensions. Something that doesn't
			
 
				-     happen in a normal, well encoded, PNG image.*/
			
 
				-    case 22: return "end of out buffer memory reached while inflating";
			
 
				-    case 23: return "end of in buffer memory reached while inflating";
			
 
				-    case 24: return "invalid FCHECK in zlib header";
			
 
				-    case 25: return "invalid compression method in zlib header";
			
 
				-    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
			
 
				-    case 27: return "PNG file is smaller than a PNG header";
			
 
				-    /*Checks the magic file header, the first 8 bytes of the PNG file*/
			
 
				-    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
			
 
				-    case 29: return "first chunk is not the header chunk";
			
 
				-    case 30: return "chunk length too large, chunk broken off at end of file";
			
 
				-    case 31: return "illegal PNG color type or bpp";
			
 
				-    case 32: return "illegal PNG compression method";
			
 
				-    case 33: return "illegal PNG filter method";
			
 
				-    case 34: return "illegal PNG interlace method";
			
 
				-    case 35: return "chunk length of a chunk is too large or the chunk too small";
			
 
				-    case 36: return "illegal PNG filter type encountered";
			
 
				-    case 37: return "illegal bit depth for this color type given";
			
 
				-    case 38: return "the palette is too big"; /*more than 256 colors*/
			
 
				-    case 39: return "more palette alpha values given in tRNS chunk than there are colors in the palette";
			
 
				-    case 40: return "tRNS chunk has wrong size for greyscale image";
			
 
				-    case 41: return "tRNS chunk has wrong size for RGB image";
			
 
				-    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
			
 
				-    case 43: return "bKGD chunk has wrong size for palette image";
			
 
				-    case 44: return "bKGD chunk has wrong size for greyscale image";
			
 
				-    case 45: return "bKGD chunk has wrong size for RGB image";
			
 
				-    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
			
 
				-    case 49: return "jumped past memory while generating dynamic huffman tree";
			
 
				-    case 50: return "jumped past memory while generating dynamic huffman tree";
			
 
				-    case 51: return "jumped past memory while inflating huffman block";
			
 
				-    case 52: return "jumped past memory while inflating";
			
 
				-    case 53: return "size of zlib data too small";
			
 
				-    case 54: return "repeat symbol in tree while there was no value symbol yet";
			
 
				-    /*jumped past tree while generating huffman tree, this could be when the
			
 
				-    tree will have more leaves than symbols after generating it out of the
			
 
				-    given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
			
 
				-    case 55: return "jumped past tree while generating huffman tree";
			
 
				-    case 56: return "given output image colortype or bitdepth not supported for color conversion";
			
 
				-    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
			
 
				-    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
			
 
				-    case 59: return "requested color conversion not supported";
			
 
				-    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
			
 
				-    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
			
 
				-    /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/
			
 
				-    case 62: return "conversion from color to greyscale not supported";
			
 
				-    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; /*(2^31-1)*/
			
 
				-    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
			
 
				-    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
			
 
				-    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
			
 
				-    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
			
 
				-    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
			
 
				-    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
			
 
				-    case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)";
			
 
				-    case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)";
			
 
				-    case 73: return "invalid tIME chunk size";
			
 
				-    case 74: return "invalid pHYs chunk size";
			
 
				-    /*length could be wrong, or data chopped off*/
			
 
				-    case 75: return "no null termination char found while decoding text chunk";
			
 
				-    case 76: return "iTXt chunk too short to contain required bytes";
			
 
				-    case 77: return "integer overflow in buffer size";
			
 
				-    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
			
 
				-    case 79: return "failed to open file for writing";
			
 
				-    case 80: return "tried creating a tree of 0 symbols";
			
 
				-    case 81: return "lazy matching at pos 0 is impossible";
			
 
				-    case 82: return "color conversion to palette requested while a color isn't in palette";
			
 
				-    case 83: return "memory allocation failed";
			
 
				-    case 84: return "given image too small to contain all pixels to be encoded";
			
 
				-    case 86: return "impossible offset in lz77 encoding (internal bug)";
			
 
				-    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
			
 
				-    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
			
 
				-    case 89: return "text chunk keyword too short or long: must have size 1-79";
			
 
				-    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
			
 
				-    case 90: return "windowsize must be a power of two";
			
 
				-    case 91: return "invalid decompressed idat size";
			
 
				-    case 92: return "too many pixels, not supported";
			
 
				-    case 93: return "zero width or height is invalid";
			
 
				-    case 94: return "header chunk must have a size of 13 bytes";
			
 
				-  }
			
 
				-  return "unknown error code";
			
 
				-}
			
 
				-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
			
 
				-
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* // C++ Wrapper                                                          // */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-/* ////////////////////////////////////////////////////////////////////////// */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_CPP
			
 
				-namespace lodepng
			
 
				-{
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename)
			
 
				-{
			
 
				-  long size = lodepng_filesize(filename.c_str());
			
 
				-  if(size < 0) return 78;
			
 
				-  buffer.resize((size_t)size);
			
 
				-  return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str());
			
 
				-}
			
 
				-
			
 
				-/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
			
 
				-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename)
			
 
				-{
			
 
				-  return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str());
			
 
				-}
			
 
				-#endif /* LODEPNG_COMPILE_DISK */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ZLIB
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
			
 
				-                    const LodePNGDecompressSettings& settings)
			
 
				-{
			
 
				-  unsigned char* buffer = 0;
			
 
				-  size_t buffersize = 0;
			
 
				-  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
			
 
				-  if(buffer)
			
 
				-  {
			
 
				-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
			
 
				-    lodepng_free(buffer);
			
 
				-  }
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
			
 
				-                    const LodePNGDecompressSettings& settings)
			
 
				-{
			
 
				-  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
			
 
				-}
			
 
				-#endif /* LODEPNG_COMPILE_DECODER */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
			
 
				-                  const LodePNGCompressSettings& settings)
			
 
				-{
			
 
				-  unsigned char* buffer = 0;
			
 
				-  size_t buffersize = 0;
			
 
				-  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
			
 
				-  if(buffer)
			
 
				-  {
			
 
				-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
			
 
				-    lodepng_free(buffer);
			
 
				-  }
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
			
 
				-                  const LodePNGCompressSettings& settings)
			
 
				-{
			
 
				-  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
			
 
				-}
			
 
				-#endif /* LODEPNG_COMPILE_ENCODER */
			
 
				-#endif /* LODEPNG_COMPILE_ZLIB */
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-
			
 
				-State::State()
			
 
				-{
			
 
				-  lodepng_state_init(this);
			
 
				-}
			
 
				-
			
 
				-State::State(const State& other)
			
 
				-{
			
 
				-  lodepng_state_init(this);
			
 
				-  lodepng_state_copy(this, &other);
			
 
				-}
			
 
				-
			
 
				-State::~State()
			
 
				-{
			
 
				-  lodepng_state_cleanup(this);
			
 
				-}
			
 
				-
			
 
				-State& State::operator=(const State& other)
			
 
				-{
			
 
				-  lodepng_state_copy(this, &other);
			
 
				-  return *this;
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
			
 
				-                size_t insize, LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  unsigned char* buffer;
			
 
				-  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
			
 
				-  if(buffer && !error)
			
 
				-  {
			
 
				-    State state;
			
 
				-    state.info_raw.colortype = colortype;
			
 
				-    state.info_raw.bitdepth = bitdepth;
			
 
				-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
			
 
				-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
			
 
				-    lodepng_free(buffer);
			
 
				-  }
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
			
 
				-}
			
 
				-
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                State& state,
			
 
				-                const unsigned char* in, size_t insize)
			
 
				-{
			
 
				-  unsigned char* buffer = NULL;
			
 
				-  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
			
 
				-  if(buffer && !error)
			
 
				-  {
			
 
				-    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
			
 
				-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
			
 
				-  }
			
 
				-  lodepng_free(buffer);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                State& state,
			
 
				-                const std::vector<unsigned char>& in)
			
 
				-{
			
 
				-  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
			
 
				-                LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  std::vector<unsigned char> buffer;
			
 
				-  unsigned error = load_file(buffer, filename);
			
 
				-  if(error) return error;
			
 
				-  return decode(out, w, h, buffer, colortype, bitdepth);
			
 
				-}
			
 
				-#endif /* LODEPNG_COMPILE_DECODER */
			
 
				-#endif /* LODEPNG_COMPILE_DISK */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  unsigned char* buffer;
			
 
				-  size_t buffersize;
			
 
				-  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
			
 
				-  if(buffer)
			
 
				-  {
			
 
				-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
			
 
				-    lodepng_free(buffer);
			
 
				-  }
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
			
 
				-  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
			
 
				-}
			
 
				-
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const unsigned char* in, unsigned w, unsigned h,
			
 
				-                State& state)
			
 
				-{
			
 
				-  unsigned char* buffer;
			
 
				-  size_t buffersize;
			
 
				-  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
			
 
				-  if(buffer)
			
 
				-  {
			
 
				-    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
			
 
				-    lodepng_free(buffer);
			
 
				-  }
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
			
 
				-                State& state)
			
 
				-{
			
 
				-  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
			
 
				-  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
			
 
				-}
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-unsigned encode(const std::string& filename,
			
 
				-                const unsigned char* in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  std::vector<unsigned char> buffer;
			
 
				-  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
			
 
				-  if(!error) error = save_file(buffer, filename);
			
 
				-  return error;
			
 
				-}
			
 
				-
			
 
				-unsigned encode(const std::string& filename,
			
 
				-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype, unsigned bitdepth)
			
 
				-{
			
 
				-  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
			
 
				-  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
			
 
				-}
			
 
				-#endif /* LODEPNG_COMPILE_DISK */
			
 
				-#endif /* LODEPNG_COMPILE_ENCODER */
			
 
				-#endif /* LODEPNG_COMPILE_PNG */
			
 
				-} /* namespace lodepng */
			
 
				-#endif /*LODEPNG_COMPILE_CPP*/
			
--- a/3rdparty/lodepng/lodepng.h
+++ b/3rdparty/lodepng/lodepng.h
@@ -1,1759 +0,0 @@
 
				-/*
			
 
				-LodePNG version 20160501
			
 
				-
			
 
				-Copyright (c) 2005-2016 Lode Vandevenne
			
 
				-
			
 
				-This software is provided 'as-is', without any express or implied
			
 
				-warranty. In no event will the authors be held liable for any damages
			
 
				-arising from the use of this software.
			
 
				-
			
 
				-Permission is granted to anyone to use this software for any purpose,
			
 
				-including commercial applications, and to alter it and redistribute it
			
 
				-freely, subject to the following restrictions:
			
 
				-
			
 
				-    1. The origin of this software must not be misrepresented; you must not
			
 
				-    claim that you wrote the original software. If you use this software
			
 
				-    in a product, an acknowledgment in the product documentation would be
			
 
				-    appreciated but is not required.
			
 
				-
			
 
				-    2. Altered source versions must be plainly marked as such, and must not be
			
 
				-    misrepresented as being the original software.
			
 
				-
			
 
				-    3. This notice may not be removed or altered from any source
			
 
				-    distribution.
			
 
				-*/
			
 
				-
			
 
				-#ifndef LODEPNG_H
			
 
				-#define LODEPNG_H
			
 
				-
			
 
				-#include <string.h> /*for size_t*/
			
 
				-
			
 
				-extern const char* LODEPNG_VERSION_STRING;
			
 
				-
			
 
				-/*
			
 
				-The following #defines are used to create code sections. They can be disabled
			
 
				-to disable code sections, which can give faster compile time and smaller binary.
			
 
				-The "NO_COMPILE" defines are designed to be used to pass as defines to the
			
 
				-compiler command to disable them without modifying this header, e.g.
			
 
				--DLODEPNG_NO_COMPILE_ZLIB for gcc.
			
 
				-In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
			
 
				-allow implementing a custom lodepng_crc32.
			
 
				-*/
			
 
				-/*deflate & zlib. If disabled, you must specify alternative zlib functions in
			
 
				-the custom_zlib field of the compress and decompress settings*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_ZLIB
			
 
				-#define LODEPNG_COMPILE_ZLIB
			
 
				-#endif
			
 
				-/*png encoder and png decoder*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_PNG
			
 
				-#define LODEPNG_COMPILE_PNG
			
 
				-#endif
			
 
				-/*deflate&zlib decoder and png decoder*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_DECODER
			
 
				-#define LODEPNG_COMPILE_DECODER
			
 
				-#endif
			
 
				-/*deflate&zlib encoder and png encoder*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_ENCODER
			
 
				-#define LODEPNG_COMPILE_ENCODER
			
 
				-#endif
			
 
				-/*the optional built in harddisk file loading and saving functions*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_DISK
			
 
				-#define LODEPNG_COMPILE_DISK
			
 
				-#endif
			
 
				-/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
			
 
				-#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-#endif
			
 
				-/*ability to convert error numerical codes to English text string*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
			
 
				-#define LODEPNG_COMPILE_ERROR_TEXT
			
 
				-#endif
			
 
				-/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
			
 
				-you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
			
 
				-source files with custom allocators.*/
			
 
				-#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
			
 
				-#define LODEPNG_COMPILE_ALLOCATORS
			
 
				-#endif
			
 
				-/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
			
 
				-#ifdef __cplusplus
			
 
				-#ifndef LODEPNG_NO_COMPILE_CPP
			
 
				-#define LODEPNG_COMPILE_CPP
			
 
				-#endif
			
 
				-#endif
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_CPP
			
 
				-#include <vector>
			
 
				-#include <string>
			
 
				-#endif /*LODEPNG_COMPILE_CPP*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-/*The PNG color types (also used for raw).*/
			
 
				-typedef enum LodePNGColorType
			
 
				-{
			
 
				-  LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/
			
 
				-  LCT_RGB = 2, /*RGB: 8,16 bit*/
			
 
				-  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
			
 
				-  LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/
			
 
				-  LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/
			
 
				-} LodePNGColorType;
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*
			
 
				-Converts PNG data in memory to raw pixel data.
			
 
				-out: Output parameter. Pointer to buffer that will contain the raw pixel data.
			
 
				-     After decoding, its size is w * h * (bytes per pixel) bytes larger than
			
 
				-     initially. Bytes per pixel depends on colortype and bitdepth.
			
 
				-     Must be freed after usage with free(*out).
			
 
				-     Note: for 16-bit per channel colors, uses big endian format like PNG does.
			
 
				-w: Output parameter. Pointer to width of pixel data.
			
 
				-h: Output parameter. Pointer to height of pixel data.
			
 
				-in: Memory buffer with the PNG file.
			
 
				-insize: size of the in buffer.
			
 
				-colortype: the desired color type for the raw output image. See explanation on PNG color types.
			
 
				-bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
			
 
				-Return value: LodePNG error code (0 means no error).
			
 
				-*/
			
 
				-unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                               const unsigned char* in, size_t insize,
			
 
				-                               LodePNGColorType colortype, unsigned bitdepth);
			
 
				-
			
 
				-/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
			
 
				-unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                          const unsigned char* in, size_t insize);
			
 
				-
			
 
				-/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
			
 
				-unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                          const unsigned char* in, size_t insize);
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-/*
			
 
				-Load PNG from disk, from file with given name.
			
 
				-Same as the other decode functions, but instead takes a filename as input.
			
 
				-*/
			
 
				-unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                             const char* filename,
			
 
				-                             LodePNGColorType colortype, unsigned bitdepth);
			
 
				-
			
 
				-/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
			
 
				-unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                               const char* filename);
			
 
				-
			
 
				-/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
			
 
				-unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                               const char* filename);
			
 
				-#endif /*LODEPNG_COMPILE_DISK*/
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*
			
 
				-Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
			
 
				-  of the output PNG image cannot be chosen, they are automatically determined
			
 
				-  by the colortype, bitdepth and content of the input pixel data.
			
 
				-  Note: for 16-bit per channel colors, needs big endian format like PNG does.
			
 
				-out: Output parameter. Pointer to buffer that will contain the PNG image data.
			
 
				-     Must be freed after usage with free(*out).
			
 
				-outsize: Output parameter. Pointer to the size in bytes of the out buffer.
			
 
				-image: The raw pixel data to encode. The size of this buffer should be
			
 
				-       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
			
 
				-w: width of the raw pixel data in pixels.
			
 
				-h: height of the raw pixel data in pixels.
			
 
				-colortype: the color type of the raw input image. See explanation on PNG color types.
			
 
				-bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
			
 
				-Return value: LodePNG error code (0 means no error).
			
 
				-*/
			
 
				-unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
			
 
				-                               const unsigned char* image, unsigned w, unsigned h,
			
 
				-                               LodePNGColorType colortype, unsigned bitdepth);
			
 
				-
			
 
				-/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
			
 
				-unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
			
 
				-                          const unsigned char* image, unsigned w, unsigned h);
			
 
				-
			
 
				-/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
			
 
				-unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
			
 
				-                          const unsigned char* image, unsigned w, unsigned h);
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-/*
			
 
				-Converts raw pixel data into a PNG file on disk.
			
 
				-Same as the other encode functions, but instead takes a filename as output.
			
 
				-NOTE: This overwrites existing files without warning!
			
 
				-*/
			
 
				-unsigned lodepng_encode_file(const char* filename,
			
 
				-                             const unsigned char* image, unsigned w, unsigned h,
			
 
				-                             LodePNGColorType colortype, unsigned bitdepth);
			
 
				-
			
 
				-/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
			
 
				-unsigned lodepng_encode32_file(const char* filename,
			
 
				-                               const unsigned char* image, unsigned w, unsigned h);
			
 
				-
			
 
				-/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
			
 
				-unsigned lodepng_encode24_file(const char* filename,
			
 
				-                               const unsigned char* image, unsigned w, unsigned h);
			
 
				-#endif /*LODEPNG_COMPILE_DISK*/
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_CPP
			
 
				-namespace lodepng
			
 
				-{
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
			
 
				-is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                const unsigned char* in, size_t insize,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                const std::vector<unsigned char>& in,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-/*
			
 
				-Converts PNG file from disk to raw pixel data in memory.
			
 
				-Same as the other decode functions, but instead takes a filename as input.
			
 
				-*/
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                const std::string& filename,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-#endif /* LODEPNG_COMPILE_DISK */
			
 
				-#endif /* LODEPNG_COMPILE_DECODER */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
			
 
				-is that of the raw input data. The output PNG color type will be auto chosen.*/
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const unsigned char* in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-/*
			
 
				-Converts 32-bit RGBA raw pixel data into a PNG file on disk.
			
 
				-Same as the other encode functions, but instead takes a filename as output.
			
 
				-NOTE: This overwrites existing files without warning!
			
 
				-*/
			
 
				-unsigned encode(const std::string& filename,
			
 
				-                const unsigned char* in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-unsigned encode(const std::string& filename,
			
 
				-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
			
 
				-                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
			
 
				-#endif /* LODEPNG_COMPILE_DISK */
			
 
				-#endif /* LODEPNG_COMPILE_ENCODER */
			
 
				-} /* namespace lodepng */
			
 
				-#endif /*LODEPNG_COMPILE_CPP*/
			
 
				-#endif /*LODEPNG_COMPILE_PNG*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ERROR_TEXT
			
 
				-/*Returns an English description of the numerical error code.*/
			
 
				-const char* lodepng_error_text(unsigned code);
			
 
				-#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*Settings for zlib decompression*/
			
 
				-typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
			
 
				-struct LodePNGDecompressSettings
			
 
				-{
			
 
				-  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
			
 
				-
			
 
				-  /*use custom zlib decoder instead of built in one (default: null)*/
			
 
				-  unsigned (*custom_zlib)(unsigned char**, size_t*,
			
 
				-                          const unsigned char*, size_t,
			
 
				-                          const LodePNGDecompressSettings*);
			
 
				-  /*use custom deflate decoder instead of built in one (default: null)
			
 
				-  if custom_zlib is used, custom_deflate is ignored since only the built in
			
 
				-  zlib function will call custom_deflate*/
			
 
				-  unsigned (*custom_inflate)(unsigned char**, size_t*,
			
 
				-                             const unsigned char*, size_t,
			
 
				-                             const LodePNGDecompressSettings*);
			
 
				-
			
 
				-  const void* custom_context; /*optional custom settings for custom functions*/
			
 
				-};
			
 
				-
			
 
				-extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
			
 
				-void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*
			
 
				-Settings for zlib compression. Tweaking these settings tweaks the balance
			
 
				-between speed and compression ratio.
			
 
				-*/
			
 
				-typedef struct LodePNGCompressSettings LodePNGCompressSettings;
			
 
				-struct LodePNGCompressSettings /*deflate = compress*/
			
 
				-{
			
 
				-  /*LZ77 related settings*/
			
 
				-  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
			
 
				-  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
			
 
				-  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
			
 
				-  unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
			
 
				-  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
			
 
				-  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
			
 
				-
			
 
				-  /*use custom zlib encoder instead of built in one (default: null)*/
			
 
				-  unsigned (*custom_zlib)(unsigned char**, size_t*,
			
 
				-                          const unsigned char*, size_t,
			
 
				-                          const LodePNGCompressSettings*);
			
 
				-  /*use custom deflate encoder instead of built in one (default: null)
			
 
				-  if custom_zlib is used, custom_deflate is ignored since only the built in
			
 
				-  zlib function will call custom_deflate*/
			
 
				-  unsigned (*custom_deflate)(unsigned char**, size_t*,
			
 
				-                             const unsigned char*, size_t,
			
 
				-                             const LodePNGCompressSettings*);
			
 
				-
			
 
				-  const void* custom_context; /*optional custom settings for custom functions*/
			
 
				-};
			
 
				-
			
 
				-extern const LodePNGCompressSettings lodepng_default_compress_settings;
			
 
				-void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-/*
			
 
				-Color mode of an image. Contains all information required to decode the pixel
			
 
				-bits to RGBA colors. This information is the same as used in the PNG file
			
 
				-format, and is used both for PNG and raw image data in LodePNG.
			
 
				-*/
			
 
				-typedef struct LodePNGColorMode
			
 
				-{
			
 
				-  /*header (IHDR)*/
			
 
				-  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
			
 
				-  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
			
 
				-
			
 
				-  /*
			
 
				-  palette (PLTE and tRNS)
			
 
				-
			
 
				-  Dynamically allocated with the colors of the palette, including alpha.
			
 
				-  When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use
			
 
				-  lodepng_palette_clear, then for each color use lodepng_palette_add.
			
 
				-  If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette.
			
 
				-
			
 
				-  When decoding, by default you can ignore this palette, since LodePNG already
			
 
				-  fills the palette colors in the pixels of the raw RGBA output.
			
 
				-
			
 
				-  The palette is only supported for color type 3.
			
 
				-  */
			
 
				-  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
			
 
				-  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
			
 
				-
			
 
				-  /*
			
 
				-  transparent color key (tRNS)
			
 
				-
			
 
				-  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
			
 
				-  For greyscale PNGs, r, g and b will all 3 be set to the same.
			
 
				-
			
 
				-  When decoding, by default you can ignore this information, since LodePNG sets
			
 
				-  pixels with this key to transparent already in the raw RGBA output.
			
 
				-
			
 
				-  The color key is only supported for color types 0 and 2.
			
 
				-  */
			
 
				-  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
			
 
				-  unsigned key_r;       /*red/greyscale component of color key*/
			
 
				-  unsigned key_g;       /*green component of color key*/
			
 
				-  unsigned key_b;       /*blue component of color key*/
			
 
				-} LodePNGColorMode;
			
 
				-
			
 
				-/*init, cleanup and copy functions to use with this struct*/
			
 
				-void lodepng_color_mode_init(LodePNGColorMode* info);
			
 
				-void lodepng_color_mode_cleanup(LodePNGColorMode* info);
			
 
				-/*return value is error code (0 means no error)*/
			
 
				-unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
			
 
				-
			
 
				-void lodepng_palette_clear(LodePNGColorMode* info);
			
 
				-/*add 1 color to the palette*/
			
 
				-unsigned lodepng_palette_add(LodePNGColorMode* info,
			
 
				-                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
			
 
				-
			
 
				-/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
			
 
				-unsigned lodepng_get_bpp(const LodePNGColorMode* info);
			
 
				-/*get the amount of color channels used, based on colortype in the struct.
			
 
				-If a palette is used, it counts as 1 channel.*/
			
 
				-unsigned lodepng_get_channels(const LodePNGColorMode* info);
			
 
				-/*is it a greyscale type? (only colortype 0 or 4)*/
			
 
				-unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
			
 
				-/*has it got an alpha channel? (only colortype 2 or 6)*/
			
 
				-unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
			
 
				-/*has it got a palette? (only colortype 3)*/
			
 
				-unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
			
 
				-/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
			
 
				-Loops through the palette to check this.*/
			
 
				-unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
			
 
				-/*
			
 
				-Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
			
 
				-Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
			
 
				-Returns false if the image can only have opaque pixels.
			
 
				-In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
			
 
				-or if "key_defined" is true.
			
 
				-*/
			
 
				-unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
			
 
				-/*Returns the byte size of a raw image buffer with given width, height and color mode*/
			
 
				-size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-/*The information of a Time chunk in PNG.*/
			
 
				-typedef struct LodePNGTime
			
 
				-{
			
 
				-  unsigned year;    /*2 bytes used (0-65535)*/
			
 
				-  unsigned month;   /*1-12*/
			
 
				-  unsigned day;     /*1-31*/
			
 
				-  unsigned hour;    /*0-23*/
			
 
				-  unsigned minute;  /*0-59*/
			
 
				-  unsigned second;  /*0-60 (to allow for leap seconds)*/
			
 
				-} LodePNGTime;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-/*Information about the PNG image, except pixels, width and height.*/
			
 
				-typedef struct LodePNGInfo
			
 
				-{
			
 
				-  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
			
 
				-  unsigned compression_method;/*compression method of the original file. Always 0.*/
			
 
				-  unsigned filter_method;     /*filter method of the original file*/
			
 
				-  unsigned interlace_method;  /*interlace method of the original file*/
			
 
				-  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  /*
			
 
				-  suggested background color chunk (bKGD)
			
 
				-  This color uses the same color mode as the PNG (except alpha channel), which can be 1-bit to 16-bit.
			
 
				-
			
 
				-  For greyscale PNGs, r, g and b will all 3 be set to the same. When encoding
			
 
				-  the encoder writes the red one. For palette PNGs: When decoding, the RGB value
			
 
				-  will be stored, not a palette index. But when encoding, specify the index of
			
 
				-  the palette in background_r, the other two are then ignored.
			
 
				-
			
 
				-  The decoder does not use this background color to edit the color of pixels.
			
 
				-  */
			
 
				-  unsigned background_defined; /*is a suggested background color given?*/
			
 
				-  unsigned background_r;       /*red component of suggested background color*/
			
 
				-  unsigned background_g;       /*green component of suggested background color*/
			
 
				-  unsigned background_b;       /*blue component of suggested background color*/
			
 
				-
			
 
				-  /*
			
 
				-  non-international text chunks (tEXt and zTXt)
			
 
				-
			
 
				-  The char** arrays each contain num strings. The actual messages are in
			
 
				-  text_strings, while text_keys are keywords that give a short description what
			
 
				-  the actual text represents, e.g. Title, Author, Description, or anything else.
			
 
				-
			
 
				-  A keyword is minimum 1 character and maximum 79 characters long. It's
			
 
				-  discouraged to use a single line length longer than 79 characters for texts.
			
 
				-
			
 
				-  Don't allocate these text buffers yourself. Use the init/cleanup functions
			
 
				-  correctly and use lodepng_add_text and lodepng_clear_text.
			
 
				-  */
			
 
				-  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
			
 
				-  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
			
 
				-  char** text_strings; /*the actual text*/
			
 
				-
			
 
				-  /*
			
 
				-  international text chunks (iTXt)
			
 
				-  Similar to the non-international text chunks, but with additional strings
			
 
				-  "langtags" and "transkeys".
			
 
				-  */
			
 
				-  size_t itext_num; /*the amount of international texts in this PNG*/
			
 
				-  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
			
 
				-  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
			
 
				-  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
			
 
				-  char** itext_strings; /*the actual international text - UTF-8 string*/
			
 
				-
			
 
				-  /*time chunk (tIME)*/
			
 
				-  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
			
 
				-  LodePNGTime time;
			
 
				-
			
 
				-  /*phys chunk (pHYs)*/
			
 
				-  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
			
 
				-  unsigned phys_x; /*pixels per unit in x direction*/
			
 
				-  unsigned phys_y; /*pixels per unit in y direction*/
			
 
				-  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
			
 
				-
			
 
				-  /*
			
 
				-  unknown chunks
			
 
				-  There are 3 buffers, one for each position in the PNG where unknown chunks can appear
			
 
				-  each buffer contains all unknown chunks for that position consecutively
			
 
				-  The 3 buffers are the unknown chunks between certain critical chunks:
			
 
				-  0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
			
 
				-  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
			
 
				-  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
			
 
				-  */
			
 
				-  unsigned char* unknown_chunks_data[3];
			
 
				-  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-} LodePNGInfo;
			
 
				-
			
 
				-/*init, cleanup and copy functions to use with this struct*/
			
 
				-void lodepng_info_init(LodePNGInfo* info);
			
 
				-void lodepng_info_cleanup(LodePNGInfo* info);
			
 
				-/*return value is error code (0 means no error)*/
			
 
				-unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
			
 
				-unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
			
 
				-
			
 
				-void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
			
 
				-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
			
 
				-                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-
			
 
				-/*
			
 
				-Converts raw buffer from one color type to another color type, based on
			
 
				-LodePNGColorMode structs to describe the input and output color type.
			
 
				-See the reference manual at the end of this header file to see which color conversions are supported.
			
 
				-return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
			
 
				-The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
			
 
				-of the output color type (lodepng_get_bpp).
			
 
				-For < 8 bpp images, there should not be padding bits at the end of scanlines.
			
 
				-For 16-bit per channel colors, uses big endian format like PNG does.
			
 
				-Return value is LodePNG error code
			
 
				-*/
			
 
				-unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
			
 
				-                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
			
 
				-                         unsigned w, unsigned h);
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*
			
 
				-Settings for the decoder. This contains settings for the PNG and the Zlib
			
 
				-decoder, but not the Info settings from the Info structs.
			
 
				-*/
			
 
				-typedef struct LodePNGDecoderSettings
			
 
				-{
			
 
				-  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
			
 
				-
			
 
				-  unsigned ignore_crc; /*ignore CRC checksums*/
			
 
				-
			
 
				-  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
			
 
				-  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
			
 
				-  unsigned remember_unknown_chunks;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-} LodePNGDecoderSettings;
			
 
				-
			
 
				-void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
			
 
				-typedef enum LodePNGFilterStrategy
			
 
				-{
			
 
				-  /*every filter at zero*/
			
 
				-  LFS_ZERO,
			
 
				-  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
			
 
				-  LFS_MINSUM,
			
 
				-  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
			
 
				-  on the image, this is better or worse than minsum.*/
			
 
				-  LFS_ENTROPY,
			
 
				-  /*
			
 
				-  Brute-force-search PNG filters by compressing each filter for each scanline.
			
 
				-  Experimental, very slow, and only rarely gives better compression than MINSUM.
			
 
				-  */
			
 
				-  LFS_BRUTE_FORCE,
			
 
				-  /*use predefined_filters buffer: you specify the filter type for each scanline*/
			
 
				-  LFS_PREDEFINED
			
 
				-} LodePNGFilterStrategy;
			
 
				-
			
 
				-/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding.
			
 
				-Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
			
 
				-typedef struct LodePNGColorProfile
			
 
				-{
			
 
				-  unsigned colored; /*not greyscale*/
			
 
				-  unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/
			
 
				-  unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/
			
 
				-  unsigned short key_g;
			
 
				-  unsigned short key_b;
			
 
				-  unsigned alpha; /*alpha channel or alpha palette required*/
			
 
				-  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/
			
 
				-  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/
			
 
				-  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/
			
 
				-} LodePNGColorProfile;
			
 
				-
			
 
				-void lodepng_color_profile_init(LodePNGColorProfile* profile);
			
 
				-
			
 
				-/*Get a LodePNGColorProfile of the image.*/
			
 
				-unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
			
 
				-                                   const unsigned char* image, unsigned w, unsigned h,
			
 
				-                                   const LodePNGColorMode* mode_in);
			
 
				-/*The function LodePNG uses internally to decide the PNG color with auto_convert.
			
 
				-Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/
			
 
				-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
			
 
				-                                   const unsigned char* image, unsigned w, unsigned h,
			
 
				-                                   const LodePNGColorMode* mode_in);
			
 
				-
			
 
				-/*Settings for the encoder.*/
			
 
				-typedef struct LodePNGEncoderSettings
			
 
				-{
			
 
				-  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
			
 
				-
			
 
				-  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
			
 
				-
			
 
				-  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
			
 
				-  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
			
 
				-  completely follow the official PNG heuristic, filter_palette_zero must be true and
			
 
				-  filter_strategy must be LFS_MINSUM*/
			
 
				-  unsigned filter_palette_zero;
			
 
				-  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
			
 
				-  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
			
 
				-  LodePNGFilterStrategy filter_strategy;
			
 
				-  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
			
 
				-  the same length as the amount of scanlines in the image, and each value must <= 5. You
			
 
				-  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
			
 
				-  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
			
 
				-  const unsigned char* predefined_filters;
			
 
				-
			
 
				-  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
			
 
				-  If colortype is 3, PLTE is _always_ created.*/
			
 
				-  unsigned force_palette;
			
 
				-#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
			
 
				-  /*add LodePNG identifier and version as a text chunk, for debugging*/
			
 
				-  unsigned add_id;
			
 
				-  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
			
 
				-  unsigned text_compression;
			
 
				-#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
			
 
				-} LodePNGEncoderSettings;
			
 
				-
			
 
				-void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-
			
 
				-#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
			
 
				-/*The settings, state and information for extended encoding and decoding.*/
			
 
				-typedef struct LodePNGState
			
 
				-{
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-  LodePNGDecoderSettings decoder; /*the decoding settings*/
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-  LodePNGEncoderSettings encoder; /*the encoding settings*/
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
			
 
				-  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
			
 
				-  unsigned error;
			
 
				-#ifdef LODEPNG_COMPILE_CPP
			
 
				-  /* For the lodepng::State subclass. */
			
 
				-  virtual ~LodePNGState(){}
			
 
				-#endif
			
 
				-} LodePNGState;
			
 
				-
			
 
				-/*init, cleanup and copy functions to use with this struct*/
			
 
				-void lodepng_state_init(LodePNGState* state);
			
 
				-void lodepng_state_cleanup(LodePNGState* state);
			
 
				-void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
			
 
				-#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*
			
 
				-Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
			
 
				-getting much more information about the PNG image and color mode.
			
 
				-*/
			
 
				-unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
			
 
				-                        LodePNGState* state,
			
 
				-                        const unsigned char* in, size_t insize);
			
 
				-
			
 
				-/*
			
 
				-Read the PNG header, but not the actual data. This returns only the information
			
 
				-that is in the header chunk of the PNG, such as width, height and color type. The
			
 
				-information is placed in the info_png field of the LodePNGState.
			
 
				-*/
			
 
				-unsigned lodepng_inspect(unsigned* w, unsigned* h,
			
 
				-                         LodePNGState* state,
			
 
				-                         const unsigned char* in, size_t insize);
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
			
 
				-unsigned lodepng_encode(unsigned char** out, size_t* outsize,
			
 
				-                        const unsigned char* image, unsigned w, unsigned h,
			
 
				-                        LodePNGState* state);
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-/*
			
 
				-The lodepng_chunk functions are normally not needed, except to traverse the
			
 
				-unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
			
 
				-It also allows traversing the chunks of an encoded PNG file yourself.
			
 
				-
			
 
				-PNG standard chunk naming conventions:
			
 
				-First byte: uppercase = critical, lowercase = ancillary
			
 
				-Second byte: uppercase = public, lowercase = private
			
 
				-Third byte: must be uppercase
			
 
				-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
			
 
				-*/
			
 
				-
			
 
				-/*
			
 
				-Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
			
 
				-There must be at least 4 bytes to read from. If the result value is too large,
			
 
				-it may be corrupt data.
			
 
				-*/
			
 
				-unsigned lodepng_chunk_length(const unsigned char* chunk);
			
 
				-
			
 
				-/*puts the 4-byte type in null terminated string*/
			
 
				-void lodepng_chunk_type(char type[5], const unsigned char* chunk);
			
 
				-
			
 
				-/*check if the type is the given type*/
			
 
				-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
			
 
				-
			
 
				-/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
			
 
				-unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
			
 
				-
			
 
				-/*0: public, 1: private (see PNG standard)*/
			
 
				-unsigned char lodepng_chunk_private(const unsigned char* chunk);
			
 
				-
			
 
				-/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
			
 
				-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
			
 
				-
			
 
				-/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
			
 
				-unsigned char* lodepng_chunk_data(unsigned char* chunk);
			
 
				-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
			
 
				-
			
 
				-/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
			
 
				-unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
			
 
				-
			
 
				-/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
			
 
				-void lodepng_chunk_generate_crc(unsigned char* chunk);
			
 
				-
			
 
				-/*iterate to next chunks. don't use on IEND chunk, as there is no next chunk then*/
			
 
				-unsigned char* lodepng_chunk_next(unsigned char* chunk);
			
 
				-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
			
 
				-
			
 
				-/*
			
 
				-Appends chunk to the data in out. The given chunk should already have its chunk header.
			
 
				-The out variable and outlength are updated to reflect the new reallocated buffer.
			
 
				-Returns error code (0 if it went ok)
			
 
				-*/
			
 
				-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
			
 
				-
			
 
				-/*
			
 
				-Appends new chunk to out. The chunk to append is given by giving its length, type
			
 
				-and data separately. The type is a 4-letter string.
			
 
				-The out variable and outlength are updated to reflect the new reallocated buffer.
			
 
				-Returne error code (0 if it went ok)
			
 
				-*/
			
 
				-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
			
 
				-                              const char* type, const unsigned char* data);
			
 
				-
			
 
				-
			
 
				-/*Calculate CRC32 of buffer*/
			
 
				-unsigned lodepng_crc32(const unsigned char* buf, size_t len);
			
 
				-#endif /*LODEPNG_COMPILE_PNG*/
			
 
				-
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ZLIB
			
 
				-/*
			
 
				-This zlib part can be used independently to zlib compress and decompress a
			
 
				-buffer. It cannot be used to create gzip files however, and it only supports the
			
 
				-part of zlib that is required for PNG, it does not support dictionaries.
			
 
				-*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
			
 
				-unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
			
 
				-                         const unsigned char* in, size_t insize,
			
 
				-                         const LodePNGDecompressSettings* settings);
			
 
				-
			
 
				-/*
			
 
				-Decompresses Zlib data. Reallocates the out buffer and appends the data. The
			
 
				-data must be according to the zlib specification.
			
 
				-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
			
 
				-buffer and *outsize its size in bytes. out must be freed by user after usage.
			
 
				-*/
			
 
				-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
			
 
				-                                 const unsigned char* in, size_t insize,
			
 
				-                                 const LodePNGDecompressSettings* settings);
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/*
			
 
				-Compresses data with Zlib. Reallocates the out buffer and appends the data.
			
 
				-Zlib adds a small header and trailer around the deflate data.
			
 
				-The data is output in the format of the zlib specification.
			
 
				-Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
			
 
				-buffer and *outsize its size in bytes. out must be freed by user after usage.
			
 
				-*/
			
 
				-unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
			
 
				-                               const unsigned char* in, size_t insize,
			
 
				-                               const LodePNGCompressSettings* settings);
			
 
				-
			
 
				-/*
			
 
				-Find length-limited Huffman code for given frequencies. This function is in the
			
 
				-public interface only for tests, it's used internally by lodepng_deflate.
			
 
				-*/
			
 
				-unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
			
 
				-                                      size_t numcodes, unsigned maxbitlen);
			
 
				-
			
 
				-/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
			
 
				-unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
			
 
				-                         const unsigned char* in, size_t insize,
			
 
				-                         const LodePNGCompressSettings* settings);
			
 
				-
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-#endif /*LODEPNG_COMPILE_ZLIB*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-/*
			
 
				-Load a file from disk into buffer. The function allocates the out buffer, and
			
 
				-after usage you should free it.
			
 
				-out: output parameter, contains pointer to loaded buffer.
			
 
				-outsize: output parameter, size of the allocated out buffer
			
 
				-filename: the path to the file to load
			
 
				-return value: error code (0 means ok)
			
 
				-*/
			
 
				-unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
			
 
				-
			
 
				-/*
			
 
				-Save a file from buffer to disk. Warning, if it exists, this function overwrites
			
 
				-the file without warning!
			
 
				-buffer: the buffer to write
			
 
				-buffersize: size of the buffer to write
			
 
				-filename: the path to the file to save to
			
 
				-return value: error code (0 means ok)
			
 
				-*/
			
 
				-unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
			
 
				-#endif /*LODEPNG_COMPILE_DISK*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_CPP
			
 
				-/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
			
 
				-namespace lodepng
			
 
				-{
			
 
				-#ifdef LODEPNG_COMPILE_PNG
			
 
				-class State : public LodePNGState
			
 
				-{
			
 
				-  public:
			
 
				-    State();
			
 
				-    State(const State& other);
			
 
				-    virtual ~State();
			
 
				-    State& operator=(const State& other);
			
 
				-};
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/* Same as other lodepng::decode, but using a State for more settings and information. */
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                State& state,
			
 
				-                const unsigned char* in, size_t insize);
			
 
				-unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
			
 
				-                State& state,
			
 
				-                const std::vector<unsigned char>& in);
			
 
				-#endif /*LODEPNG_COMPILE_DECODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/* Same as other lodepng::encode, but using a State for more settings and information. */
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const unsigned char* in, unsigned w, unsigned h,
			
 
				-                State& state);
			
 
				-unsigned encode(std::vector<unsigned char>& out,
			
 
				-                const std::vector<unsigned char>& in, unsigned w, unsigned h,
			
 
				-                State& state);
			
 
				-#endif /*LODEPNG_COMPILE_ENCODER*/
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_DISK
			
 
				-/*
			
 
				-Load a file from disk into an std::vector.
			
 
				-return value: error code (0 means ok)
			
 
				-*/
			
 
				-unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
			
 
				-
			
 
				-/*
			
 
				-Save the binary data in an std::vector to a file on disk. The file is overwritten
			
 
				-without warning.
			
 
				-*/
			
 
				-unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
			
 
				-#endif /* LODEPNG_COMPILE_DISK */
			
 
				-#endif /* LODEPNG_COMPILE_PNG */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ZLIB
			
 
				-#ifdef LODEPNG_COMPILE_DECODER
			
 
				-/* Zlib-decompress an unsigned char buffer */
			
 
				-unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
			
 
				-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
			
 
				-
			
 
				-/* Zlib-decompress an std::vector */
			
 
				-unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
			
 
				-                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
			
 
				-#endif /* LODEPNG_COMPILE_DECODER */
			
 
				-
			
 
				-#ifdef LODEPNG_COMPILE_ENCODER
			
 
				-/* Zlib-compress an unsigned char buffer */
			
 
				-unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
			
 
				-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
			
 
				-
			
 
				-/* Zlib-compress an std::vector */
			
 
				-unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
			
 
				-                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
			
 
				-#endif /* LODEPNG_COMPILE_ENCODER */
			
 
				-#endif /* LODEPNG_COMPILE_ZLIB */
			
 
				-} /* namespace lodepng */
			
 
				-#endif /*LODEPNG_COMPILE_CPP*/
			
 
				-
			
 
				-/*
			
 
				-TODO:
			
 
				-[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
			
 
				-[.] check compatibility with various compilers  - done but needs to be redone for every newer version
			
 
				-[X] converting color to 16-bit per channel types
			
 
				-[ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values)
			
 
				-[ ] make sure encoder generates no chunks with size > (2^31)-1
			
 
				-[ ] partial decoding (stream processing)
			
 
				-[X] let the "isFullyOpaque" function check color keys and transparent palettes too
			
 
				-[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
			
 
				-[ ] don't stop decoding on errors like 69, 57, 58 (make warnings)
			
 
				-[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
			
 
				-[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
			
 
				-[ ] allow user to give data (void*) to custom allocator
			
 
				-*/
			
 
				-
			
 
				-#endif /*LODEPNG_H inclusion guard*/
			
 
				-
			
 
				-/*
			
 
				-LodePNG Documentation
			
 
				----------------------
			
 
				-
			
 
				-0. table of contents
			
 
				---------------------
			
 
				-
			
 
				-  1. about
			
 
				-   1.1. supported features
			
 
				-   1.2. features not supported
			
 
				-  2. C and C++ version
			
 
				-  3. security
			
 
				-  4. decoding
			
 
				-  5. encoding
			
 
				-  6. color conversions
			
 
				-    6.1. PNG color types
			
 
				-    6.2. color conversions
			
 
				-    6.3. padding bits
			
 
				-    6.4. A note about 16-bits per channel and endianness
			
 
				-  7. error values
			
 
				-  8. chunks and PNG editing
			
 
				-  9. compiler support
			
 
				-  10. examples
			
 
				-   10.1. decoder C++ example
			
 
				-   10.2. decoder C example
			
 
				-  11. state settings reference
			
 
				-  12. changes
			
 
				-  13. contact information
			
 
				-
			
 
				-
			
 
				-1. about
			
 
				---------
			
 
				-
			
 
				-PNG is a file format to store raster images losslessly with good compression,
			
 
				-supporting different color types and alpha channel.
			
 
				-
			
 
				-LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
			
 
				-Specification (Second Edition) - W3C Recommendation 10 November 2003.
			
 
				-
			
 
				-The specifications used are:
			
 
				-
			
 
				-*) Portable Network Graphics (PNG) Specification (Second Edition):
			
 
				-     http://www.w3.org/TR/2003/REC-PNG-20031110
			
 
				-*) RFC 1950 ZLIB Compressed Data Format version 3.3:
			
 
				-     http://www.gzip.org/zlib/rfc-zlib.html
			
 
				-*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
			
 
				-     http://www.gzip.org/zlib/rfc-deflate.html
			
 
				-
			
 
				-The most recent version of LodePNG can currently be found at
			
 
				-http://lodev.org/lodepng/
			
 
				-
			
 
				-LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
			
 
				-extra functionality.
			
 
				-
			
 
				-LodePNG exists out of two files:
			
 
				--lodepng.h: the header file for both C and C++
			
 
				--lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
			
 
				-
			
 
				-If you want to start using LodePNG right away without reading this doc, get the
			
 
				-examples from the LodePNG website to see how to use it in code, or check the
			
 
				-smaller examples in chapter 13 here.
			
 
				-
			
 
				-LodePNG is simple but only supports the basic requirements. To achieve
			
 
				-simplicity, the following design choices were made: There are no dependencies
			
 
				-on any external library. There are functions to decode and encode a PNG with
			
 
				-a single function call, and extended versions of these functions taking a
			
 
				-LodePNGState struct allowing to specify or get more information. By default
			
 
				-the colors of the raw image are always RGB or RGBA, no matter what color type
			
 
				-the PNG file uses. To read and write files, there are simple functions to
			
 
				-convert the files to/from buffers in memory.
			
 
				-
			
 
				-This all makes LodePNG suitable for loading textures in games, demos and small
			
 
				-programs, ... It's less suitable for full fledged image editors, loading PNGs
			
 
				-over network (it requires all the image data to be available before decoding can
			
 
				-begin), life-critical systems, ...
			
 
				-
			
 
				-1.1. supported features
			
 
				------------------------
			
 
				-
			
 
				-The following features are supported by the decoder:
			
 
				-
			
 
				-*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
			
 
				-   or the same color type as the PNG
			
 
				-*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
			
 
				-*) Adam7 interlace and deinterlace for any color type
			
 
				-*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
			
 
				-*) support for alpha channels, including RGBA color model, translucent palettes and color keying
			
 
				-*) zlib decompression (inflate)
			
 
				-*) zlib compression (deflate)
			
 
				-*) CRC32 and ADLER32 checksums
			
 
				-*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
			
 
				-*) the following chunks are supported (generated/interpreted) by both encoder and decoder:
			
 
				-    IHDR: header information
			
 
				-    PLTE: color palette
			
 
				-    IDAT: pixel data
			
 
				-    IEND: the final chunk
			
 
				-    tRNS: transparency for palettized images
			
 
				-    tEXt: textual information
			
 
				-    zTXt: compressed textual information
			
 
				-    iTXt: international textual information
			
 
				-    bKGD: suggested background color
			
 
				-    pHYs: physical dimensions
			
 
				-    tIME: modification time
			
 
				-
			
 
				-1.2. features not supported
			
 
				----------------------------
			
 
				-
			
 
				-The following features are _not_ supported:
			
 
				-
			
 
				-*) some features needed to make a conformant PNG-Editor might be still missing.
			
 
				-*) partial loading/stream processing. All data must be available and is processed in one call.
			
 
				-*) The following public chunks are not supported but treated as unknown chunks by LodePNG
			
 
				-    cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT
			
 
				-   Some of these are not supported on purpose: LodePNG wants to provide the RGB values
			
 
				-   stored in the pixels, not values modified by system dependent gamma or color models.
			
 
				-
			
 
				-
			
 
				-2. C and C++ version
			
 
				---------------------
			
 
				-
			
 
				-The C version uses buffers allocated with alloc that you need to free()
			
 
				-yourself. You need to use init and cleanup functions for each struct whenever
			
 
				-using a struct from the C version to avoid exploits and memory leaks.
			
 
				-
			
 
				-The C++ version has extra functions with std::vectors in the interface and the
			
 
				-lodepng::State class which is a LodePNGState with constructor and destructor.
			
 
				-
			
 
				-These files work without modification for both C and C++ compilers because all
			
 
				-the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
			
 
				-ignore it, and the C code is made to compile both with strict ISO C90 and C++.
			
 
				-
			
 
				-To use the C++ version, you need to rename the source file to lodepng.cpp
			
 
				-(instead of lodepng.c), and compile it with a C++ compiler.
			
 
				-
			
 
				-To use the C version, you need to rename the source file to lodepng.c (instead
			
 
				-of lodepng.cpp), and compile it with a C compiler.
			
 
				-
			
 
				-
			
 
				-3. Security
			
 
				------------
			
 
				-
			
 
				-Even if carefully designed, it's always possible that LodePNG contains possible
			
 
				-exploits. If you discover one, please let me know, and it will be fixed.
			
 
				-
			
 
				-When using LodePNG, care has to be taken with the C version of LodePNG, as well
			
 
				-as the C-style structs when working with C++. The following conventions are used
			
 
				-for all C-style structs:
			
 
				-
			
 
				--if a struct has a corresponding init function, always call the init function when making a new one
			
 
				--if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
			
 
				--if a struct has a corresponding copy function, use the copy function instead of "=".
			
 
				- The destination must also be inited already.
			
 
				-
			
 
				-
			
 
				-4. Decoding
			
 
				------------
			
 
				-
			
 
				-Decoding converts a PNG compressed image to a raw pixel buffer.
			
 
				-
			
 
				-Most documentation on using the decoder is at its declarations in the header
			
 
				-above. For C, simple decoding can be done with functions such as
			
 
				-lodepng_decode32, and more advanced decoding can be done with the struct
			
 
				-LodePNGState and lodepng_decode. For C++, all decoding can be done with the
			
 
				-various lodepng::decode functions, and lodepng::State can be used for advanced
			
 
				-features.
			
 
				-
			
 
				-When using the LodePNGState, it uses the following fields for decoding:
			
 
				-*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
			
 
				-*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
			
 
				-*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
			
 
				-
			
 
				-LodePNGInfo info_png
			
 
				---------------------
			
 
				-
			
 
				-After decoding, this contains extra information of the PNG image, except the actual
			
 
				-pixels, width and height because these are already gotten directly from the decoder
			
 
				-functions.
			
 
				-
			
 
				-It contains for example the original color type of the PNG image, text comments,
			
 
				-suggested background color, etc... More details about the LodePNGInfo struct are
			
 
				-at its declaration documentation.
			
 
				-
			
 
				-LodePNGColorMode info_raw
			
 
				--------------------------
			
 
				-
			
 
				-When decoding, here you can specify which color type you want
			
 
				-the resulting raw image to be. If this is different from the colortype of the
			
 
				-PNG, then the decoder will automatically convert the result. This conversion
			
 
				-always works, except if you want it to convert a color PNG to greyscale or to
			
 
				-a palette with missing colors.
			
 
				-
			
 
				-By default, 32-bit color is used for the result.
			
 
				-
			
 
				-LodePNGDecoderSettings decoder
			
 
				-------------------------------
			
 
				-
			
 
				-The settings can be used to ignore the errors created by invalid CRC and Adler32
			
 
				-chunks, and to disable the decoding of tEXt chunks.
			
 
				-
			
 
				-There's also a setting color_convert, true by default. If false, no conversion
			
 
				-is done, the resulting data will be as it was in the PNG (after decompression)
			
 
				-and you'll have to puzzle the colors of the pixels together yourself using the
			
 
				-color type information in the LodePNGInfo.
			
 
				-
			
 
				-
			
 
				-5. Encoding
			
 
				------------
			
 
				-
			
 
				-Encoding converts a raw pixel buffer to a PNG compressed image.
			
 
				-
			
 
				-Most documentation on using the encoder is at its declarations in the header
			
 
				-above. For C, simple encoding can be done with functions such as
			
 
				-lodepng_encode32, and more advanced decoding can be done with the struct
			
 
				-LodePNGState and lodepng_encode. For C++, all encoding can be done with the
			
 
				-various lodepng::encode functions, and lodepng::State can be used for advanced
			
 
				-features.
			
 
				-
			
 
				-Like the decoder, the encoder can also give errors. However it gives less errors
			
 
				-since the encoder input is trusted, the decoder input (a PNG image that could
			
 
				-be forged by anyone) is not trusted.
			
 
				-
			
 
				-When using the LodePNGState, it uses the following fields for encoding:
			
 
				-*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
			
 
				-*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
			
 
				-*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
			
 
				-
			
 
				-LodePNGInfo info_png
			
 
				---------------------
			
 
				-
			
 
				-When encoding, you use this the opposite way as when decoding: for encoding,
			
 
				-you fill in the values you want the PNG to have before encoding. By default it's
			
 
				-not needed to specify a color type for the PNG since it's automatically chosen,
			
 
				-but it's possible to choose it yourself given the right settings.
			
 
				-
			
 
				-The encoder will not always exactly match the LodePNGInfo struct you give,
			
 
				-it tries as close as possible. Some things are ignored by the encoder. The
			
 
				-encoder uses, for example, the following settings from it when applicable:
			
 
				-colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
			
 
				-background color, the interlace method, unknown chunks, ...
			
 
				-
			
 
				-When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
			
 
				-If the palette contains any colors for which the alpha channel is not 255 (so
			
 
				-there are translucent colors in the palette), it'll add a tRNS chunk.
			
 
				-
			
 
				-LodePNGColorMode info_raw
			
 
				--------------------------
			
 
				-
			
 
				-You specify the color type of the raw image that you give to the input here,
			
 
				-including a possible transparent color key and palette you happen to be using in
			
 
				-your raw image data.
			
 
				-
			
 
				-By default, 32-bit color is assumed, meaning your input has to be in RGBA
			
 
				-format with 4 bytes (unsigned chars) per pixel.
			
 
				-
			
 
				-LodePNGEncoderSettings encoder
			
 
				-------------------------------
			
 
				-
			
 
				-The following settings are supported (some are in sub-structs):
			
 
				-*) auto_convert: when this option is enabled, the encoder will
			
 
				-automatically choose the smallest possible color mode (including color key) that
			
 
				-can encode the colors of all pixels without information loss.
			
 
				-*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
			
 
				-   2 = dynamic huffman tree (best compression). Should be 2 for proper
			
 
				-   compression.
			
 
				-*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
			
 
				-   true for proper compression.
			
 
				-*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
			
 
				-   2048 by default, but can be set to 32768 for better, but slow, compression.
			
 
				-*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
			
 
				-   chunk if force_palette is true. This can used as suggested palette to convert
			
 
				-   to by viewers that don't support more than 256 colors (if those still exist)
			
 
				-*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
			
 
				-*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
			
 
				-  zTXt chunks use zlib compression on the text. This gives a smaller result on
			
 
				-  large texts but a larger result on small texts (such as a single program name).
			
 
				-  It's all tEXt or all zTXt though, there's no separate setting per text yet.
			
 
				-
			
 
				-
			
 
				-6. color conversions
			
 
				---------------------
			
 
				-
			
 
				-An important thing to note about LodePNG, is that the color type of the PNG, and
			
 
				-the color type of the raw image, are completely independent. By default, when
			
 
				-you decode a PNG, you get the result as a raw image in the color type you want,
			
 
				-no matter whether the PNG was encoded with a palette, greyscale or RGBA color.
			
 
				-And if you encode an image, by default LodePNG will automatically choose the PNG
			
 
				-color type that gives good compression based on the values of colors and amount
			
 
				-of colors in the image. It can be configured to let you control it instead as
			
 
				-well, though.
			
 
				-
			
 
				-To be able to do this, LodePNG does conversions from one color mode to another.
			
 
				-It can convert from almost any color type to any other color type, except the
			
 
				-following conversions: RGB to greyscale is not supported, and converting to a
			
 
				-palette when the palette doesn't have a required color is not supported. This is
			
 
				-not supported on purpose: this is information loss which requires a color
			
 
				-reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey
			
 
				-is easy, but there are multiple ways if you want to give some channels more
			
 
				-weight).
			
 
				-
			
 
				-By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
			
 
				-color, no matter what color type the PNG has. And by default when encoding,
			
 
				-LodePNG automatically picks the best color model for the output PNG, and expects
			
 
				-the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
			
 
				-the color format of the images yourself, you can skip this chapter.
			
 
				-
			
 
				-6.1. PNG color types
			
 
				---------------------
			
 
				-
			
 
				-A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
			
 
				-as well as palettized color modes. After the zlib decompression and unfiltering
			
 
				-in the PNG image is done, the raw pixel data will have that color type and thus
			
 
				-a certain amount of bits per pixel. If you want the output raw image after
			
 
				-decoding to have another color type, a conversion is done by LodePNG.
			
 
				-
			
 
				-The PNG specification gives the following color types:
			
 
				-
			
 
				-0: greyscale, bit depths 1, 2, 4, 8, 16
			
 
				-2: RGB, bit depths 8 and 16
			
 
				-3: palette, bit depths 1, 2, 4 and 8
			
 
				-4: greyscale with alpha, bit depths 8 and 16
			
 
				-6: RGBA, bit depths 8 and 16
			
 
				-
			
 
				-Bit depth is the amount of bits per pixel per color channel. So the total amount
			
 
				-of bits per pixel is: amount of channels * bitdepth.
			
 
				-
			
 
				-6.2. color conversions
			
 
				-----------------------
			
 
				-
			
 
				-As explained in the sections about the encoder and decoder, you can specify
			
 
				-color types and bit depths in info_png and info_raw to change the default
			
 
				-behaviour.
			
 
				-
			
 
				-If, when decoding, you want the raw image to be something else than the default,
			
 
				-you need to set the color type and bit depth you want in the LodePNGColorMode,
			
 
				-or the parameters colortype and bitdepth of the simple decoding function.
			
 
				-
			
 
				-If, when encoding, you use another color type than the default in the raw input
			
 
				-image, you need to specify its color type and bit depth in the LodePNGColorMode
			
 
				-of the raw image, or use the parameters colortype and bitdepth of the simple
			
 
				-encoding function.
			
 
				-
			
 
				-If, when encoding, you don't want LodePNG to choose the output PNG color type
			
 
				-but control it yourself, you need to set auto_convert in the encoder settings
			
 
				-to false, and specify the color type you want in the LodePNGInfo of the
			
 
				-encoder (including palette: it can generate a palette if auto_convert is true,
			
 
				-otherwise not).
			
 
				-
			
 
				-If the input and output color type differ (whether user chosen or auto chosen),
			
 
				-LodePNG will do a color conversion, which follows the rules below, and may
			
 
				-sometimes result in an error.
			
 
				-
			
 
				-To avoid some confusion:
			
 
				--the decoder converts from PNG to raw image
			
 
				--the encoder converts from raw image to PNG
			
 
				--the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
			
 
				--the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
			
 
				--when encoding, the color type in LodePNGInfo is ignored if auto_convert
			
 
				- is enabled, it is automatically generated instead
			
 
				--when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
			
 
				- PNG image, but it can be ignored since the raw image has the color type you requested instead
			
 
				--if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
			
 
				- between the color types is done if the color types are supported. If it is not
			
 
				- supported, an error is returned. If the types are the same, no conversion is done.
			
 
				--even though some conversions aren't supported, LodePNG supports loading PNGs from any
			
 
				- colortype and saving PNGs to any colortype, sometimes it just requires preparing
			
 
				- the raw image correctly before encoding.
			
 
				--both encoder and decoder use the same color converter.
			
 
				-
			
 
				-Non supported color conversions:
			
 
				--color to greyscale: no error is thrown, but the result will look ugly because
			
 
				-only the red channel is taken
			
 
				--anything to palette when that palette does not have that color in it: in this
			
 
				-case an error is thrown
			
 
				-
			
 
				-Supported color conversions:
			
 
				--anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
			
 
				--any grey or grey+alpha, to grey or grey+alpha
			
 
				--anything to a palette, as long as the palette has the requested colors in it
			
 
				--removing alpha channel
			
 
				--higher to smaller bitdepth, and vice versa
			
 
				-
			
 
				-If you want no color conversion to be done (e.g. for speed or control):
			
 
				--In the encoder, you can make it save a PNG with any color type by giving the
			
 
				-raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
			
 
				-false.
			
 
				--In the decoder, you can make it store the pixel data in the same color type
			
 
				-as the PNG has, by setting the color_convert setting to false. Settings in
			
 
				-info_raw are then ignored.
			
 
				-
			
 
				-The function lodepng_convert does the color conversion. It is available in the
			
 
				-interface but normally isn't needed since the encoder and decoder already call
			
 
				-it.
			
 
				-
			
 
				-6.3. padding bits
			
 
				------------------
			
 
				-
			
 
				-In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
			
 
				-have a bit amount that isn't a multiple of 8, then padding bits are used so that each
			
 
				-scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
			
 
				-The raw input image you give to the encoder, and the raw output image you get from the decoder
			
 
				-will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
			
 
				-of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte,
			
 
				-not the first bit of a new byte.
			
 
				-
			
 
				-6.4. A note about 16-bits per channel and endianness
			
 
				-----------------------------------------------------
			
 
				-
			
 
				-LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
			
 
				-for any other color format. The 16-bit values are stored in big endian (most
			
 
				-significant byte first) in these arrays. This is the opposite order of the
			
 
				-little endian used by x86 CPU's.
			
 
				-
			
 
				-LodePNG always uses big endian because the PNG file format does so internally.
			
 
				-Conversions to other formats than PNG uses internally are not supported by
			
 
				-LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
			
 
				-colors, the order in which you store R, G, B and A, and so on. Supporting and
			
 
				-converting to/from all that is outside the scope of LodePNG.
			
 
				-
			
 
				-This may mean that, depending on your use case, you may want to convert the big
			
 
				-endian output of LodePNG to little endian with a for loop. This is certainly not
			
 
				-always needed, many applications and libraries support big endian 16-bit colors
			
 
				-anyway, but it means you cannot simply cast the unsigned char* buffer to an
			
 
				-unsigned short* buffer on x86 CPUs.
			
 
				-
			
 
				-
			
 
				-7. error values
			
 
				----------------
			
 
				-
			
 
				-All functions in LodePNG that return an error code, return 0 if everything went
			
 
				-OK, or a non-zero code if there was an error.
			
 
				-
			
 
				-The meaning of the LodePNG error values can be retrieved with the function
			
 
				-lodepng_error_text: given the numerical error code, it returns a description
			
 
				-of the error in English as a string.
			
 
				-
			
 
				-Check the implementation of lodepng_error_text to see the meaning of each code.
			
 
				-
			
 
				-
			
 
				-8. chunks and PNG editing
			
 
				--------------------------
			
 
				-
			
 
				-If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
			
 
				-editor that should follow the rules about handling of unknown chunks, or if your
			
 
				-program is able to read other types of chunks than the ones handled by LodePNG,
			
 
				-then that's possible with the chunk functions of LodePNG.
			
 
				-
			
 
				-A PNG chunk has the following layout:
			
 
				-
			
 
				-4 bytes length
			
 
				-4 bytes type name
			
 
				-length bytes data
			
 
				-4 bytes CRC
			
 
				-
			
 
				-8.1. iterating through chunks
			
 
				------------------------------
			
 
				-
			
 
				-If you have a buffer containing the PNG image data, then the first chunk (the
			
 
				-IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
			
 
				-signature of the PNG and are not part of a chunk. But if you start at byte 8
			
 
				-then you have a chunk, and can check the following things of it.
			
 
				-
			
 
				-NOTE: none of these functions check for memory buffer boundaries. To avoid
			
 
				-exploits, always make sure the buffer contains all the data of the chunks.
			
 
				-When using lodepng_chunk_next, make sure the returned value is within the
			
 
				-allocated memory.
			
 
				-
			
 
				-unsigned lodepng_chunk_length(const unsigned char* chunk):
			
 
				-
			
 
				-Get the length of the chunk's data. The total chunk length is this length + 12.
			
 
				-
			
 
				-void lodepng_chunk_type(char type[5], const unsigned char* chunk):
			
 
				-unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
			
 
				-
			
 
				-Get the type of the chunk or compare if it's a certain type
			
 
				-
			
 
				-unsigned char lodepng_chunk_critical(const unsigned char* chunk):
			
 
				-unsigned char lodepng_chunk_private(const unsigned char* chunk):
			
 
				-unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
			
 
				-
			
 
				-Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
			
 
				-Check if the chunk is private (public chunks are part of the standard, private ones not).
			
 
				-Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
			
 
				-chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
			
 
				-program doesn't handle that type of unknown chunk.
			
 
				-
			
 
				-unsigned char* lodepng_chunk_data(unsigned char* chunk):
			
 
				-const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
			
 
				-
			
 
				-Get a pointer to the start of the data of the chunk.
			
 
				-
			
 
				-unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
			
 
				-void lodepng_chunk_generate_crc(unsigned char* chunk):
			
 
				-
			
 
				-Check if the crc is correct or generate a correct one.
			
 
				-
			
 
				-unsigned char* lodepng_chunk_next(unsigned char* chunk):
			
 
				-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
			
 
				-
			
 
				-Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
			
 
				-functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
			
 
				-data available in the buffer to be able to go to the next chunk.
			
 
				-
			
 
				-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
			
 
				-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
			
 
				-                              const char* type, const unsigned char* data):
			
 
				-
			
 
				-These functions are used to create new chunks that are appended to the data in *out that has
			
 
				-length *outlength. The append function appends an existing chunk to the new data. The create
			
 
				-function creates a new chunk with the given parameters and appends it. Type is the 4-letter
			
 
				-name of the chunk.
			
 
				-
			
 
				-8.2. chunks in info_png
			
 
				------------------------
			
 
				-
			
 
				-The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
			
 
				-buffers (each with size) to contain 3 types of unknown chunks:
			
 
				-the ones that come before the PLTE chunk, the ones that come between the PLTE
			
 
				-and the IDAT chunks, and the ones that come after the IDAT chunks.
			
 
				-It's necessary to make the distionction between these 3 cases because the PNG
			
 
				-standard forces to keep the ordering of unknown chunks compared to the critical
			
 
				-chunks, but does not force any other ordering rules.
			
 
				-
			
 
				-info_png.unknown_chunks_data[0] is the chunks before PLTE
			
 
				-info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
			
 
				-info_png.unknown_chunks_data[2] is the chunks after IDAT
			
 
				-
			
 
				-The chunks in these 3 buffers can be iterated through and read by using the same
			
 
				-way described in the previous subchapter.
			
 
				-
			
 
				-When using the decoder to decode a PNG, you can make it store all unknown chunks
			
 
				-if you set the option settings.remember_unknown_chunks to 1. By default, this
			
 
				-option is off (0).
			
 
				-
			
 
				-The encoder will always encode unknown chunks that are stored in the info_png.
			
 
				-If you need it to add a particular chunk that isn't known by LodePNG, you can
			
 
				-use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
			
 
				-info_png.unknown_chunks_data[x].
			
 
				-
			
 
				-Chunks that are known by LodePNG should not be added in that way. E.g. to make
			
 
				-LodePNG add a bKGD chunk, set background_defined to true and add the correct
			
 
				-parameters there instead.
			
 
				-
			
 
				-
			
 
				-9. compiler support
			
 
				--------------------
			
 
				-
			
 
				-No libraries other than the current standard C library are needed to compile
			
 
				-LodePNG. For the C++ version, only the standard C++ library is needed on top.
			
 
				-Add the files lodepng.c(pp) and lodepng.h to your project, include
			
 
				-lodepng.h where needed, and your program can read/write PNG files.
			
 
				-
			
 
				-It is compatible with C90 and up, and C++03 and up.
			
 
				-
			
 
				-If performance is important, use optimization when compiling! For both the
			
 
				-encoder and decoder, this makes a large difference.
			
 
				-
			
 
				-Make sure that LodePNG is compiled with the same compiler of the same version
			
 
				-and with the same settings as the rest of the program, or the interfaces with
			
 
				-std::vectors and std::strings in C++ can be incompatible.
			
 
				-
			
 
				-CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
			
 
				-
			
 
				-*) gcc and g++
			
 
				-
			
 
				-LodePNG is developed in gcc so this compiler is natively supported. It gives no
			
 
				-warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
			
 
				-version 4.7.1 on Linux, 32-bit and 64-bit.
			
 
				-
			
 
				-*) Clang
			
 
				-
			
 
				-Fully supported and warning-free.
			
 
				-
			
 
				-*) Mingw
			
 
				-
			
 
				-The Mingw compiler (a port of gcc for Windows) should be fully supported by
			
 
				-LodePNG.
			
 
				-
			
 
				-*) Visual Studio and Visual C++ Express Edition
			
 
				-
			
 
				-LodePNG should be warning-free with warning level W4. Two warnings were disabled
			
 
				-with pragmas though: warning 4244 about implicit conversions, and warning 4996
			
 
				-where it wants to use a non-standard function fopen_s instead of the standard C
			
 
				-fopen.
			
 
				-
			
 
				-Visual Studio may want "stdafx.h" files to be included in each source file and
			
 
				-give an error "unexpected end of file while looking for precompiled header".
			
 
				-This is not standard C++ and will not be added to the stock LodePNG. You can
			
 
				-disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
			
 
				-Precompiled Headers, and set it to Not Using Precompiled Headers there.
			
 
				-
			
 
				-NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
			
 
				-VS6, are not guaranteed to work.
			
 
				-
			
 
				-*) Compilers on Macintosh
			
 
				-
			
 
				-LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
			
 
				-C and C++.
			
 
				-
			
 
				-*) Other Compilers
			
 
				-
			
 
				-If you encounter problems on any compilers, feel free to let me know and I may
			
 
				-try to fix it if the compiler is modern and standards complient.
			
 
				-
			
 
				-
			
 
				-10. examples
			
 
				-------------
			
 
				-
			
 
				-This decoder example shows the most basic usage of LodePNG. More complex
			
 
				-examples can be found on the LodePNG website.
			
 
				-
			
 
				-10.1. decoder C++ example
			
 
				--------------------------
			
 
				-
			
 
				-#include "lodepng.h"
			
 
				-#include <iostream>
			
 
				-
			
 
				-int main(int argc, char *argv[])
			
 
				-{
			
 
				-  const char* filename = argc > 1 ? argv[1] : "test.png";
			
 
				-
			
 
				-  //load and decode
			
 
				-  std::vector<unsigned char> image;
			
 
				-  unsigned width, height;
			
 
				-  unsigned error = lodepng::decode(image, width, height, filename);
			
 
				-
			
 
				-  //if there's an error, display it
			
 
				-  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
			
 
				-
			
 
				-  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
			
 
				-}
			
 
				-
			
 
				-10.2. decoder C example
			
 
				------------------------
			
 
				-
			
 
				-#include "lodepng.h"
			
 
				-
			
 
				-int main(int argc, char *argv[])
			
 
				-{
			
 
				-  unsigned error;
			
 
				-  unsigned char* image;
			
 
				-  size_t width, height;
			
 
				-  const char* filename = argc > 1 ? argv[1] : "test.png";
			
 
				-
			
 
				-  error = lodepng_decode32_file(&image, &width, &height, filename);
			
 
				-
			
 
				-  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
			
 
				-
			
 
				-  / * use image here * /
			
 
				-
			
 
				-  free(image);
			
 
				-  return 0;
			
 
				-}
			
 
				-
			
 
				-11. state settings reference
			
 
				-----------------------------
			
 
				-
			
 
				-A quick reference of some settings to set on the LodePNGState
			
 
				-
			
 
				-For decoding:
			
 
				-
			
 
				-state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
			
 
				-state.decoder.zlibsettings.custom_...: use custom inflate function
			
 
				-state.decoder.ignore_crc: ignore CRC checksums
			
 
				-state.decoder.color_convert: convert internal PNG color to chosen one
			
 
				-state.decoder.read_text_chunks: whether to read in text metadata chunks
			
 
				-state.decoder.remember_unknown_chunks: whether to read in unknown chunks
			
 
				-state.info_raw.colortype: desired color type for decoded image
			
 
				-state.info_raw.bitdepth: desired bit depth for decoded image
			
 
				-state.info_raw....: more color settings, see struct LodePNGColorMode
			
 
				-state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
			
 
				-
			
 
				-For encoding:
			
 
				-
			
 
				-state.encoder.zlibsettings.btype: disable compression by setting it to 0
			
 
				-state.encoder.zlibsettings.use_lz77: use LZ77 in compression
			
 
				-state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
			
 
				-state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
			
 
				-state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
			
 
				-state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
			
 
				-state.encoder.zlibsettings.custom_...: use custom deflate function
			
 
				-state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
			
 
				-state.encoder.filter_palette_zero: PNG filter strategy for palette
			
 
				-state.encoder.filter_strategy: PNG filter strategy to encode with
			
 
				-state.encoder.force_palette: add palette even if not encoding to one
			
 
				-state.encoder.add_id: add LodePNG identifier and version as a text chunk
			
 
				-state.encoder.text_compression: use compressed text chunks for metadata
			
 
				-state.info_raw.colortype: color type of raw input image you provide
			
 
				-state.info_raw.bitdepth: bit depth of raw input image you provide
			
 
				-state.info_raw: more color settings, see struct LodePNGColorMode
			
 
				-state.info_png.color.colortype: desired color type if auto_convert is false
			
 
				-state.info_png.color.bitdepth: desired bit depth if auto_convert is false
			
 
				-state.info_png.color....: more color settings, see struct LodePNGColorMode
			
 
				-state.info_png....: more PNG related settings, see struct LodePNGInfo
			
 
				-
			
 
				-
			
 
				-12. changes
			
 
				------------
			
 
				-
			
 
				-The version number of LodePNG is the date of the change given in the format
			
 
				-yyyymmdd.
			
 
				-
			
 
				-Some changes aren't backwards compatible. Those are indicated with a (!)
			
 
				-symbol.
			
 
				-
			
 
				-*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort).
			
 
				-*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within
			
 
				-   the limits of pure C90).
			
 
				-*) 08 dec 2015: Made load_file function return error if file can't be opened.
			
 
				-*) 24 okt 2015: Bugfix with decoding to palette output.
			
 
				-*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
			
 
				-*) 23 aug 2014: Reduced needless memory usage of decoder.
			
 
				-*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
			
 
				-    simplicity. Made ColorProfile public.
			
 
				-*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
			
 
				-*) 22 dec 2013: Power of two windowsize required for optimization.
			
 
				-*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
			
 
				-*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
			
 
				-*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
			
 
				-    prefix for the custom allocators and made it possible with a new #define to
			
 
				-    use custom ones in your project without needing to change lodepng's code.
			
 
				-*) 28 jan 2013: Bugfix with color key.
			
 
				-*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
			
 
				-*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
			
 
				-    (no palette). Better deflate tree encoding. New compression tweak settings.
			
 
				-    Faster color conversions while decoding. Some internal cleanups.
			
 
				-*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
			
 
				-*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
			
 
				-    and made it work with function pointers instead.
			
 
				-*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
			
 
				-    and free functions and toggle #defines from compiler flags. Small fixes.
			
 
				-*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
			
 
				-*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
			
 
				-    redundant C++ codec classes. Reduced amount of structs. Everything changed,
			
 
				-    but it is cleaner now imho and functionality remains the same. Also fixed
			
 
				-    several bugs and shrunk the implementation code. Made new samples.
			
 
				-*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
			
 
				-    PNG color model and bit depth, based on the amount and type of colors of the
			
 
				-    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
			
 
				-*) 9 okt 2011: simpler hash chain implementation for the encoder.
			
 
				-*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
			
 
				-*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
			
 
				-    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
			
 
				-    better ones (it's quite significant). A setting to do an experimental, slow,
			
 
				-    brute force search for PNG filter types is added.
			
 
				-*) 17 aug 2011 (!): changed some C zlib related function names.
			
 
				-*) 16 aug 2011: made the code less wide (max 120 characters per line).
			
 
				-*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
			
 
				-*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
			
 
				-*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
			
 
				-    to optimize long sequences of zeros.
			
 
				-*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
			
 
				-    LodePNG_InfoColor_canHaveAlpha functions for convenience.
			
 
				-*) 7 nov 2010: added LodePNG_error_text function to get error code description.
			
 
				-*) 30 okt 2010: made decoding slightly faster
			
 
				-*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
			
 
				-     Reorganized the documentation and the declaration order in the header.
			
 
				-*) 08 aug 2010: only changed some comments and external samples.
			
 
				-*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
			
 
				-*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
			
 
				-*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
			
 
				-    read by ignoring the problem but windows apps couldn't.
			
 
				-*) 06 jun 2008: added more error checks for out of memory cases.
			
 
				-*) 26 apr 2008: added a few more checks here and there to ensure more safety.
			
 
				-*) 06 mar 2008: crash with encoding of strings fixed
			
 
				-*) 02 feb 2008: support for international text chunks added (iTXt)
			
 
				-*) 23 jan 2008: small cleanups, and #defines to divide code in sections
			
 
				-*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
			
 
				-*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
			
 
				-*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
			
 
				-    Also various fixes, such as in the deflate and the padding bits code.
			
 
				-*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
			
 
				-    filtering code of encoder.
			
 
				-*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
			
 
				-    C++ wrapper around this provides an interface almost identical to before.
			
 
				-    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
			
 
				-    are together in these files but it works both for C and C++ compilers.
			
 
				-*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
			
 
				-*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
			
 
				-*) 09 aug 2007: some VS2005 warnings removed again
			
 
				-*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
			
 
				-*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
			
 
				-*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
			
 
				-    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
			
 
				-*) 02 jun 2007: made the encoder add a tag with version by default
			
 
				-*) 27 may 2007: zlib and png code separated (but still in the same file),
			
 
				-    simple encoder/decoder functions added for more simple usage cases
			
 
				-*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
			
 
				-    moved some examples from here to lodepng_examples.cpp
			
 
				-*) 12 may 2007: palette decoding bug fixed
			
 
				-*) 24 apr 2007: changed the license from BSD to the zlib license
			
 
				-*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
			
 
				-*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
			
 
				-    palettized PNG images. Plus little interface change with palette and texts.
			
 
				-*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
			
 
				-    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
			
 
				-*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
			
 
				-    and supported by the encoder, resulting in smaller PNGs at the output.
			
 
				-*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
			
 
				-*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
			
 
				-    greyscale type to 8-bit greyscale with or without alpha.
			
 
				-*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
			
 
				-    to convert to and is more uniform. See the manual for how it works now.
			
 
				-*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
			
 
				-    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
			
 
				-    at last made the decoder give errors for incorrect Adler32 or Crc.
			
 
				-*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
			
 
				-*) 29 dec 2006: Added support for encoding images without alpha channel, and
			
 
				-    cleaned out code as well as making certain parts faster.
			
 
				-*) 28 dec 2006: Added "Settings" to the encoder.
			
 
				-*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
			
 
				-    Removed some code duplication in the decoder. Fixed little bug in an example.
			
 
				-*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
			
 
				-    Fixed a bug of the decoder with 16-bit per color.
			
 
				-*) 15 okt 2006: Changed documentation structure
			
 
				-*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
			
 
				-    given image buffer, however for now it's not compressed.
			
 
				-*) 08 sep 2006: (!) Changed to interface with a Decoder class
			
 
				-*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
			
 
				-    way. Renamed decodePNG to decodePNGGeneric.
			
 
				-*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
			
 
				-    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
			
 
				-*) 28 jul 2006: Cleaned the code and added new error checks.
			
 
				-    Corrected terminology "deflate" into "inflate".
			
 
				-*) 23 jun 2006: Added SDL example in the documentation in the header, this
			
 
				-    example allows easy debugging by displaying the PNG and its transparency.
			
 
				-*) 22 jun 2006: (!) Changed way to obtain error value. Added
			
 
				-    loadFile function for convenience. Made decodePNG32 faster.
			
 
				-*) 21 jun 2006: (!) Changed type of info vector to unsigned.
			
 
				-    Changed position of palette in info vector. Fixed an important bug that
			
 
				-    happened on PNGs with an uncompressed block.
			
 
				-*) 16 jun 2006: Internally changed unsigned into unsigned where
			
 
				-    needed, and performed some optimizations.
			
 
				-*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
			
 
				-    in LodePNG namespace. Changed the order of the parameters. Rewrote the
			
 
				-    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
			
 
				-*) 22 apr 2006: Optimized and improved some code
			
 
				-*) 07 sep 2005: (!) Changed to std::vector interface
			
 
				-*) 12 aug 2005: Initial release (C++, decoder only)
			
 
				-
			
 
				-
			
 
				-13. contact information
			
 
				------------------------
			
 
				-
			
 
				-Feel free to contact me with suggestions, problems, comments, ... concerning
			
 
				-LodePNG. If you encounter a PNG image that doesn't work properly with this
			
 
				-decoder, feel free to send it and I'll use it to find and fix the problem.
			
 
				-
			
 
				-My email address is (puzzle the account and domain together with an @ symbol):
			
 
				-Domain: gmail dot com.
			
 
				-Account: lode dot vandevenne.
			
 
				-
			
 
				-
			
 
				-Copyright (c) 2005-2016 Lode Vandevenne
			
 
				-*/
			
--- a/3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt
+++ b/3rdparty/nvtt/NVIDIA_Texture_Tools_LICENSE.txt
@@ -1,24 +0,0 @@
 
				-NVIDIA Texture Tools 2.0 is licensed under the MIT license.

			
 
				-

			
 
				-Copyright (c) 2007 NVIDIA Corporation

			
 
				-

			
 
				-Permission is hereby granted, free of charge, to any person

			
 
				-obtaining a copy of this software and associated documentation

			
 
				-files (the "Software"), to deal in the Software without

			
 
				-restriction, including without limitation the rights to use,

			
 
				-copy, modify, merge, publish, distribute, sublicense, and/or sell

			
 
				-copies of the Software, and to permit persons to whom the

			
 
				-Software is furnished to do so, subject to the following

			
 
				-conditions:

			
 
				-

			
 
				-The above copyright notice and this permission notice shall be

			
 
				-included in all copies or substantial portions of the Software.

			
 
				-

			
 
				-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

			
 
				-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES

			
 
				-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

			
 
				-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

			
 
				-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

			
 
				-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

			
 
				-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

			
 
				-OTHER DEALINGS IN THE SOFTWARE.

			
--- a/3rdparty/nvtt/bc6h/bits.h
+++ b/3rdparty/nvtt/bc6h/bits.h
@@ -1,75 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-#ifndef _ZOH_BITS_H
			
 
				-#define _ZOH_BITS_H
			
 
				-
			
 
				-// read/write a bitstream
			
 
				-
			
 
				-#include "nvcore/debug.h"
			
 
				-
			
 
				-namespace ZOH {
			
 
				-
			
 
				-class Bits
			
 
				-{
			
 
				-public:
			
 
				-
			
 
				-	Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
			
 
				-	Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
			
 
				-
			
 
				-	void write(int value, int nbits) {
			
 
				-		nvAssert (nbits >= 0 && nbits < 32);
			
 
				-		nvAssert (sizeof(int)>= 4);
			
 
				-		for (int i=0; i<nbits; ++i)
			
 
				-			writeone(value>>i);
			
 
				-	}
			
 
				-	int read(int nbits) { 
			
 
				-		nvAssert (nbits >= 0 && nbits < 32);
			
 
				-		nvAssert (sizeof(int)>= 4);
			
 
				-		int out = 0;
			
 
				-		for (int i=0; i<nbits; ++i)
			
 
				-			out |= readone() << i;
			
 
				-		return out;
			
 
				-	}
			
 
				-	int getptr() { return bptr; }
			
 
				-	void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
			
 
				-	int getsize() { return bend; }
			
 
				-
			
 
				-private:
			
 
				-	int	bptr;		// next bit to read
			
 
				-	int bend;		// last written bit + 1
			
 
				-	char *bits;		// ptr to user bit stream
			
 
				-	const char *cbits;	// ptr to const user bit stream
			
 
				-	int maxbits;	// max size of user bit stream
			
 
				-	char readonly;	// 1 if this is a read-only stream
			
 
				-
			
 
				-	int readone() {
			
 
				-		nvAssert (bptr < bend);
			
 
				-		if (bptr >= bend) return 0;
			
 
				-		int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
			
 
				-		++bptr;
			
 
				-		return bit != 0;
			
 
				-	}
			
 
				-	void writeone(int bit) {
			
 
				-		nvAssert (!readonly); // "Writing a read-only bit stream"
			
 
				-		nvAssert (bptr < maxbits);
			
 
				-		if (bptr >= maxbits) return;
			
 
				-		if (bit&1)
			
 
				-			bits[bptr>>3] |= 1 << (bptr & 7);
			
 
				-		else
			
 
				-			bits[bptr>>3] &= ~(1 << (bptr & 7));
			
 
				-		if (bptr++ >= bend) bend = bptr;
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc6h/shapes_two.h
+++ b/3rdparty/nvtt/bc6h/shapes_two.h
@@ -1,133 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-#pragma once
			
 
				-#ifndef _ZOH_SHAPES_TWO_H
			
 
				-#define _ZOH_SHAPES_TWO_H
			
 
				-
			
 
				-// shapes for two regions
			
 
				-
			
 
				-#define NREGIONS 2
			
 
				-#define NSHAPES 64
			
 
				-#define SHAPEBITS 6
			
 
				-
			
 
				-static const int shapes[NSHAPES*16] = 
			
 
				-{
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
			
 
				-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   0, 0, 0, 1,   
			
 
				-0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
			
 
				-0, 0, 0, 1,   1, 1, 1, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
			
 
				-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-0, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 1,   0, 0, 0, 0,   
			
 
				-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 0,   
			
 
				-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 0,   0, 1, 1, 1,   
			
 
				-1, 0, 0, 0,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 1, 1,   
			
 
				-1, 1, 1, 0,   0, 0, 0, 0,   1, 0, 0, 0,   0, 0, 0, 1,   
			
 
				-1, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 1, 1, 1,   
			
 
				-0, 0, 0, 1,   1, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 0,   1, 1, 0, 0,   1, 0, 0, 0,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   0, 0, 0, 1,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 0, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
			
 
				-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
			
 
				-0, 0, 0, 0,   1, 1, 0, 0,   0, 1, 1, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 0, 0, 1,   0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 1,   1, 0, 0, 1,   
			
 
				-1, 1, 1, 0,   1, 1, 1, 1,   1, 0, 0, 0,   1, 0, 0, 1,   
			
 
				-1, 0, 0, 0,   0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   0, 0, 1, 1,   
			
 
				-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   0, 0, 1, 1,   
			
 
				-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   1, 1, 0, 0,   
			
 
				-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 1, 0, 1,   0, 1, 1, 0,   0, 1, 0, 1,   
			
 
				-1, 1, 0, 0,   0, 1, 0, 1,   1, 0, 0, 1,   1, 0, 1, 0,   
			
 
				-0, 0, 1, 1,   1, 0, 1, 0,   0, 1, 1, 0,   1, 0, 1, 0,   
			
 
				-1, 1, 0, 0,   1, 0, 1, 0,   1, 0, 0, 1,   0, 1, 0, 1,   
			
 
				-
			
 
				-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   1, 0, 1, 1,   
			
 
				-1, 1, 0, 0,   1, 1, 0, 0,   0, 1, 0, 0,   1, 1, 0, 1,   
			
 
				-1, 1, 1, 0,   1, 0, 0, 0,   1, 1, 0, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
			
 
				-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   0, 1, 1, 0,   
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   0, 0, 0, 0,   
			
 
				-
			
 
				-0, 1, 0, 0,   0, 0, 1, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-1, 1, 1, 0,   0, 1, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-0, 1, 0, 0,   0, 0, 1, 0,   0, 1, 1, 1,   1, 1, 1, 0,   
			
 
				-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 1, 1,   
			
 
				-1, 1, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   
			
 
				-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   1, 1, 0, 0,   
			
 
				-0, 0, 1, 1,   1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 1, 1, 0,   0, 1, 1, 1,   0, 0, 0, 1,   
			
 
				-1, 1, 0, 0,   0, 0, 1, 1,   1, 1, 1, 0,   1, 0, 0, 0,   
			
 
				-1, 1, 0, 0,   0, 0, 1, 1,   1, 0, 0, 0,   1, 1, 1, 0,   
			
 
				-1, 0, 0, 1,   1, 0, 0, 1,   0, 0, 0, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-1, 1, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 0,   0, 1, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 1, 1, 1,   
			
 
				-
			
 
				-};
			
 
				-
			
 
				-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
			
 
				-
			
 
				-static const int shapeindex_to_compressed_indices[NSHAPES*2] = 
			
 
				-{
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-
			
 
				-	0,15,  0, 2,  0, 8,  0, 2,
			
 
				-	0, 2,  0, 8,  0, 8,  0,15,
			
 
				-	0, 2,  0, 8,  0, 2,  0, 2,
			
 
				-	0, 8,  0, 8,  0, 2,  0, 2,
			
 
				-
			
 
				-	0,15,  0,15,  0, 6,  0, 8,
			
 
				-	0, 2,  0, 8,  0,15,  0,15,
			
 
				-	0, 2,  0, 8,  0, 2,  0, 2,
			
 
				-	0, 2,  0,15,  0,15,  0, 6,
			
 
				-
			
 
				-	0, 6,  0, 2,  0, 6,  0, 8,
			
 
				-	0,15,  0,15,  0, 2,  0, 2,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0, 2,  0, 2,  0,15
			
 
				-
			
 
				-};
			
 
				-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*2+(region)]
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc6h/tile.h
+++ b/3rdparty/nvtt/bc6h/tile.h
@@ -1,82 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-#ifndef _ZOH_TILE_H
			
 
				-#define _ZOH_TILE_H
			
 
				-
			
 
				-#include "zoh_utils.h"
			
 
				-#include "nvmath/vector.h"
			
 
				-#include <math.h>
			
 
				-
			
 
				-namespace ZOH {
			
 
				-
			
 
				-//#define	USE_IMPORTANCE_MAP	1		// define this if you want to increase importance of some pixels in tile
			
 
				-class Tile
			
 
				-{
			
 
				-public:
			
 
				-	// NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value
			
 
				-	static float half2float(uint16 h)
			
 
				-	{
			
 
				-		return (float) Utils::ushort_to_format(h);
			
 
				-	}
			
 
				-	// NOTE: this is the inverse of the above operation
			
 
				-	static uint16 float2half(float f)
			
 
				-	{
			
 
				-		return Utils::format_to_ushort((int)f);
			
 
				-	}
			
 
				-
			
 
				-	// look for adjacent pixels that are identical. if there are enough of them, increase their importance
			
 
				-	void generate_importance_map()
			
 
				-	{
			
 
				-		// initialize
			
 
				-		for (int y=0; y<size_y; ++y)
			
 
				-		for (int x=0; x<size_x; ++x)
			
 
				-		{
			
 
				-			// my importance is increased if I am identical to any of my 4-neighbors
			
 
				-			importance_map[y][x] = match_4_neighbor(x,y) ? 5.0f : 1.0f;
			
 
				-		}
			
 
				-	}
			
 
				-	bool is_equal(int x, int y, int xn, int yn)
			
 
				-	{
			
 
				-		if (xn < 0 || xn >= size_x || yn < 0 || yn >= size_y)
			
 
				-			return false;
			
 
				-		return( (data[y][x].x == data[yn][xn].x) &&
			
 
				-				(data[y][x].y == data[yn][xn].y) &&
			
 
				-				(data[y][x].z == data[yn][xn].z) );
			
 
				-	}
			
 
				-
			
 
				-#ifdef USE_IMPORTANCE_MAP
			
 
				-	bool match_4_neighbor(int x, int y)
			
 
				-	{
			
 
				-		return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1);
			
 
				-	}
			
 
				-#else
			
 
				-	bool match_4_neighbor(int, int)
			
 
				-	{
			
 
				-		return false;
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-	Tile() {};
			
 
				-	~Tile(){};
			
 
				-	Tile(int xs, int ys) {size_x = xs; size_y = ys;}
			
 
				-
			
 
				-	static const int TILE_H = 4;
			
 
				-	static const int TILE_W = 4;
			
 
				-	static const int TILE_TOTAL = TILE_H * TILE_W;
			
 
				-    nv::Vector3 data[TILE_H][TILE_W];
			
 
				-	float importance_map[TILE_H][TILE_W];
			
 
				-	int	size_x, size_y;			// actual size of tile
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif // _ZOH_TILE_H
			
--- a/3rdparty/nvtt/bc6h/zoh.cpp
+++ b/3rdparty/nvtt/bc6h/zoh.cpp
@@ -1,197 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// the zoh compressor and decompressor
			
 
				-
			
 
				-#include "tile.h"
			
 
				-#include "zoh.h"
			
 
				-
			
 
				-#include <string.h> // memcpy
			
 
				-
			
 
				-using namespace ZOH;
			
 
				-
			
 
				-
			
 
				-bool ZOH::isone(const char *block)
			
 
				-{
			
 
				-	char code = block[0] & 0x1F;
			
 
				-
			
 
				-	return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f);
			
 
				-}
			
 
				-
			
 
				-void ZOH::compress(const Tile &t, char *block)
			
 
				-{
			
 
				-	char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE];
			
 
				-
			
 
				-	float mseone = ZOH::compressone(t, oneblock);
			
 
				-	float msetwo = ZOH::compresstwo(t, twoblock);
			
 
				-
			
 
				-	if (mseone <= msetwo)
			
 
				-		memcpy(block, oneblock, ZOH::BLOCKSIZE);
			
 
				-	else
			
 
				-		memcpy(block, twoblock, ZOH::BLOCKSIZE);
			
 
				-}
			
 
				-
			
 
				-void ZOH::decompress(const char *block, Tile &t)
			
 
				-{
			
 
				-	if (ZOH::isone(block))
			
 
				-		ZOH::decompressone(block, t);
			
 
				-	else
			
 
				-		ZOH::decompresstwo(block, t);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-void ZOH::compress(string inf, string zohf)
			
 
				-{
			
 
				-	Array2D<Rgba> pixels;
			
 
				-	int w, h;
			
 
				-	char block[ZOH::BLOCKSIZE];
			
 
				-
			
 
				-	Exr::readRgba(inf, pixels, w, h);
			
 
				-	FILE *zohfile = fopen(zohf.c_str(), "wb");
			
 
				-	if (zohfile == NULL) throw "Unable to open .zoh file for write";
			
 
				-
			
 
				-	// stuff for progress bar O.o
			
 
				-	int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
			
 
				-	int tilecnt = 0;
			
 
				-	int ndots = 25;
			
 
				-	int dotcnt = 0;
			
 
				-	printf("Progress [");
			
 
				-	for (int i=0; i<ndots;++i) printf(" ");
			
 
				-	printf("]\rProgress ["); fflush(stdout);
			
 
				-
			
 
				-	// convert to tiles and compress each tile
			
 
				-	for (int y=0; y<h; y+=Tile::TILE_H)
			
 
				-	{
			
 
				-		int ysize = min(Tile::TILE_H, h-y);
			
 
				-		for (int x=0; x<w; x+=Tile::TILE_W)
			
 
				-		{
			
 
				-			int xsize = min(Tile::TILE_W, w-x);
			
 
				-			Tile t(xsize, ysize);
			
 
				-
			
 
				-			t.insert(pixels, x, y);
			
 
				-
			
 
				-			ZOH::compress(t, block);
			
 
				-			if (fwrite(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
			
 
				-				throw "File error on write";
			
 
				-
			
 
				-			// progress bar
			
 
				-			++tilecnt;
			
 
				-			if (tilecnt > (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; }
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	printf("]\n");		// advance to next line finally
			
 
				-
			
 
				-	if (fclose(zohfile)) throw "Close failed on .zoh file";
			
 
				-}
			
 
				-
			
 
				-static int str2int(std::string s)
			
 
				-{
			
 
				-	int thing;
			
 
				-	std::stringstream str (stringstream::in | stringstream::out);
			
 
				-	str << s;
			
 
				-	str >> thing;
			
 
				-	return thing;
			
 
				-}
			
 
				-
			
 
				-// zoh file name is ...-w-h.zoh, extract width and height
			
 
				-static void extract(string zohf, int &w, int &h)
			
 
				-{
			
 
				-	size_t n = zohf.rfind('.', zohf.length()-1);
			
 
				-	size_t n1 = zohf.rfind('-', n-1);
			
 
				-	size_t n2 = zohf.rfind('-', n1-1);
			
 
				-	string width = zohf.substr(n2+1, n1-n2-1);
			
 
				-	w = str2int(width);
			
 
				-	string height = zohf.substr(n1+1, n-n1-1);
			
 
				-	h = str2int(height);
			
 
				-}
			
 
				-
			
 
				-static int mode_to_prec[] = {
			
 
				-	10,7,11,10,
			
 
				-	10,7,11,11,
			
 
				-	10,7,11,12,
			
 
				-	10,7,9,16,
			
 
				-	10,7,8,-1,
			
 
				-	10,7,8,-1,
			
 
				-	10,7,8,-1,
			
 
				-	10,7,6,-1,
			
 
				-};
			
 
				-
			
 
				-static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions;
			
 
				-
			
 
				-static void stats(char block[ZOH::BLOCKSIZE])
			
 
				-{
			
 
				-	char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++;
			
 
				-	int prec = mode_to_prec[mode];
			
 
				-	nvAssert (prec != -1);
			
 
				-	if (!ZOH::isone(block))
			
 
				-	{
			
 
				-		tworegions++;
			
 
				-		prechisttwo[prec]++;
			
 
				-		int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3);
			
 
				-		shapeindexhist[shapeindex]++;
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		oneregion++;
			
 
				-		prechistone[prec]++;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void printstats()
			
 
				-{
			
 
				-	printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]);
			
 
				-	printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]);
			
 
				-	printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]);
			
 
				-	printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]);
			
 
				-	printf("\nOne region %5.2f%%  Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions));
			
 
				-	printf("\n");
			
 
				-}
			
 
				-
			
 
				-void ZOH::decompress(string zohf, string outf)
			
 
				-{
			
 
				-	Array2D<Rgba> pixels;
			
 
				-	int w, h;
			
 
				-	char block[ZOH::BLOCKSIZE];
			
 
				-
			
 
				-	extract(zohf, w, h);
			
 
				-	FILE *zohfile = fopen(zohf.c_str(), "rb");
			
 
				-	if (zohfile == NULL) throw "Unable to open .zoh file for read";
			
 
				-	pixels.resizeErase(h, w);
			
 
				-
			
 
				-	// convert to tiles and decompress each tile
			
 
				-	for (int y=0; y<h; y+=Tile::TILE_H)
			
 
				-	{
			
 
				-		int ysize = min(Tile::TILE_H, h-y);
			
 
				-		for (int x=0; x<w; x+=Tile::TILE_W)
			
 
				-		{
			
 
				-			int xsize = min(Tile::TILE_W, w-x);
			
 
				-			Tile t(xsize, ysize);
			
 
				-
			
 
				-			if (fread(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
			
 
				-				throw "File error on read";
			
 
				-
			
 
				-			stats(block);	// collect statistics
			
 
				-
			
 
				-			ZOH::decompress(block, t);
			
 
				-
			
 
				-			t.extract(pixels, x, y);
			
 
				-		}
			
 
				-	}
			
 
				-	if (fclose(zohfile)) throw "Close failed on .zoh file";
			
 
				-	Exr::writeRgba(outf, pixels, w, h);
			
 
				-
			
 
				-#ifndef EXTERNAL_RELEASE
			
 
				-	printstats();	// print statistics
			
 
				-#endif
			
 
				-}
			
 
				-*/
			
--- a/3rdparty/nvtt/bc6h/zoh.h
+++ b/3rdparty/nvtt/bc6h/zoh.h
@@ -1,65 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-#pragma once
			
 
				-#ifndef _ZOH_H
			
 
				-#define _ZOH_H
			
 
				-
			
 
				-#include "tile.h"
			
 
				-
			
 
				-namespace ZOH {
			
 
				-
			
 
				-// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f
			
 
				-
			
 
				-static const int NREGIONS_TWO	= 2;
			
 
				-static const int NREGIONS_ONE	= 1;
			
 
				-static const int NCHANNELS		= 3;
			
 
				-
			
 
				-struct FltEndpts
			
 
				-{
			
 
				-    nv::Vector3 A;
			
 
				-    nv::Vector3 B;
			
 
				-};
			
 
				-
			
 
				-struct IntEndpts
			
 
				-{
			
 
				-	int A[NCHANNELS];
			
 
				-	int B[NCHANNELS];
			
 
				-};
			
 
				-
			
 
				-struct ComprEndpts
			
 
				-{
			
 
				-	uint A[NCHANNELS];
			
 
				-	uint B[NCHANNELS];
			
 
				-};
			
 
				-
			
 
				-static const int BLOCKSIZE=16;
			
 
				-static const int BITSIZE=128;
			
 
				-
			
 
				-void compress(const Tile &t, char *block);
			
 
				-void decompress(const char *block, Tile &t);
			
 
				-
			
 
				-float compressone(const Tile &t, char *block);
			
 
				-float compresstwo(const Tile &t, char *block);
			
 
				-void decompressone(const char *block, Tile &t);
			
 
				-void decompresstwo(const char *block, Tile &t);
			
 
				-
			
 
				-float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block);
			
 
				-float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]);
			
 
				-
			
 
				-float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block);
			
 
				-float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]);
			
 
				-
			
 
				-bool isone(const char *block);
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif // _ZOH_H
			
--- a/3rdparty/nvtt/bc6h/zoh_utils.cpp
+++ b/3rdparty/nvtt/bc6h/zoh_utils.cpp
@@ -1,324 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Utility and common routines
			
 
				-
			
 
				-#include "zoh_utils.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include <math.h>
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace ZOH;
			
 
				-
			
 
				-static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
			
 
				-static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
			
 
				-
			
 
				-/*static*/ Format Utils::FORMAT;
			
 
				-
			
 
				-int Utils::lerp(int a, int b, int i, int denom)
			
 
				-{
			
 
				-	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
			
 
				-	nvDebugCheck (i >= 0 && i <= denom);
			
 
				-
			
 
				-	int round = 32, shift = 6;
			
 
				-	const int *weights;
			
 
				-
			
 
				-	switch(denom)
			
 
				-	{
			
 
				-	case 3:		denom *= 5; i *= 5;	// fall through to case 15
			
 
				-	case 15:	weights = denom15_weights_64; break;
			
 
				-	case 7:		weights = denom7_weights_64; break;
			
 
				-	default:	nvDebugCheck(0);
			
 
				-	}
			
 
				-
			
 
				-	return (a*weights[denom-i] +b*weights[i] + round) >> shift;
			
 
				-}
			
 
				-
			
 
				-Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom)
			
 
				-{
			
 
				-	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
			
 
				-	nvDebugCheck (i >= 0 && i <= denom);
			
 
				-
			
 
				-	int shift = 6;
			
 
				-	const int *weights;
			
 
				-
			
 
				-	switch(denom)
			
 
				-	{
			
 
				-	case 3:		denom *= 5; i *= 5;	// fall through to case 15
			
 
				-	case 15:	weights = denom15_weights_64; break;
			
 
				-	case 7:		weights = denom7_weights_64; break;
			
 
				-	default:	nvUnreachable();
			
 
				-	}
			
 
				-
			
 
				-	// no need to round these as this is an exact division
			
 
				-	return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				-	For unsigned f16, clamp the input to [0,F16MAX]. Thus u15.
			
 
				-	For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16.
			
 
				-
			
 
				-	The conversions proceed as follows:
			
 
				-
			
 
				-	unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX.
			
 
				-	signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value
			
 
				-	unsigned int: get bits. return as a positive value.
			
 
				-	signed int. get bits. return as a value in -32768..32767.
			
 
				-
			
 
				-	The inverse conversions are just the inverse of the above.
			
 
				-*/
			
 
				-
			
 
				-// clamp the 3 channels of the input vector to the allowable range based on FORMAT
			
 
				-// note that each channel is a float storing the allowable range as a bit pattern converted to float
			
 
				-// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX]
			
 
				-
			
 
				-void Utils::clamp(Vector3 &v)
			
 
				-{
			
 
				-	for (int i=0; i<3; ++i)
			
 
				-	{
			
 
				-		switch(Utils::FORMAT)
			
 
				-		{
			
 
				-		case UNSIGNED_F16:
			
 
				-			if (v.component[i] < 0.0) v.component[i] = 0;
			
 
				-			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
			
 
				-			break;
			
 
				-
			
 
				-		case SIGNED_F16:
			
 
				-			if (v.component[i] < -F16MAX) v.component[i] = -F16MAX;
			
 
				-			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
			
 
				-			break;
			
 
				-
			
 
				-		default:
			
 
				-			nvUnreachable();
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// convert a u16 value to s17 (represented as an int) based on the format expected
			
 
				-int Utils::ushort_to_format(unsigned short input)
			
 
				-{
			
 
				-	int out, s;
			
 
				-
			
 
				-	// clamp to the valid range we are expecting
			
 
				-	switch (Utils::FORMAT)
			
 
				-	{
			
 
				-	case UNSIGNED_F16:
			
 
				-		if (input & F16S_MASK) out = 0;
			
 
				-		else if (input > F16MAX) out = F16MAX;
			
 
				-		else out = input;
			
 
				-		break;
			
 
				-
			
 
				-	case SIGNED_F16:
			
 
				-		s = input & F16S_MASK;
			
 
				-		input &= F16EM_MASK;
			
 
				-		if (input > F16MAX) out = F16MAX;
			
 
				-		else out = input;
			
 
				-		out = s ? -out : out;
			
 
				-		break;
			
 
				-	}
			
 
				-	return out;
			
 
				-}
			
 
				-
			
 
				-// convert a s17 value to u16 based on the format expected
			
 
				-unsigned short Utils::format_to_ushort(int input)
			
 
				-{
			
 
				-	unsigned short out;
			
 
				-
			
 
				-	// clamp to the valid range we are expecting
			
 
				-	switch (Utils::FORMAT)
			
 
				-	{
			
 
				-	case UNSIGNED_F16:
			
 
				-		nvDebugCheck (input >= 0 && input <= F16MAX);
			
 
				-		out = input;
			
 
				-		break;
			
 
				-
			
 
				-	case SIGNED_F16:
			
 
				-		nvDebugCheck (input >= -F16MAX && input <= F16MAX);
			
 
				-		// convert to sign-magnitude
			
 
				-		int s;
			
 
				-		if (input < 0) { s = F16S_MASK; input = -input; }
			
 
				-		else           { s = 0; }
			
 
				-		out = s | input;
			
 
				-		break;
			
 
				-	}
			
 
				-	return out;
			
 
				-}
			
 
				-
			
 
				-// quantize the input range into equal-sized bins
			
 
				-int Utils::quantize(float value, int prec)
			
 
				-{
			
 
				-	int q, ivalue, s;
			
 
				-
			
 
				-	nvDebugCheck (prec > 1);	// didn't bother to make it work for 1
			
 
				-
			
 
				-	value = (float)floor(value + 0.5);
			
 
				-
			
 
				-	int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0;	// bias precisions 11..16 to get a more accurate quantization
			
 
				-
			
 
				-	switch (Utils::FORMAT)
			
 
				-	{
			
 
				-	case UNSIGNED_F16:
			
 
				-		nvDebugCheck (value >= 0 && value <= F16MAX);
			
 
				-		ivalue = (int)value;
			
 
				-		q = ((ivalue << prec) + bias) / (F16MAX+1);
			
 
				-		nvDebugCheck (q >= 0 && q < (1 << prec));
			
 
				-		break;
			
 
				-
			
 
				-	case SIGNED_F16:
			
 
				-		nvDebugCheck (value >= -F16MAX && value <= F16MAX);
			
 
				-		// convert to sign-magnitude
			
 
				-		ivalue = (int)value;
			
 
				-		if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0;
			
 
				-
			
 
				-		q = ((ivalue << (prec-1)) + bias) / (F16MAX+1);
			
 
				-		if (s)
			
 
				-			q = -q;
			
 
				-		nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
			
 
				-		break;
			
 
				-	}
			
 
				-
			
 
				-	return q;
			
 
				-}
			
 
				-
			
 
				-int Utils::finish_unquantize(int q, int prec)
			
 
				-{
			
 
				-	if (Utils::FORMAT == UNSIGNED_F16)
			
 
				-		return (q * 31) >> 6;										// scale the magnitude by 31/64
			
 
				-	else if (Utils::FORMAT == SIGNED_F16)
			
 
				-		return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5;		// scale the magnitude by 31/32
			
 
				-	else
			
 
				-		return q;
			
 
				-}
			
 
				-
			
 
				-// unquantize each bin to midpoint of original bin range, except
			
 
				-// for the end bins which we push to an endpoint of the bin range.
			
 
				-// we do this to ensure we can represent all possible original values.
			
 
				-// the asymmetric end bins do not affect PSNR for the test images.
			
 
				-//
			
 
				-// code this function assuming an arbitrary bit pattern as the encoded block
			
 
				-int Utils::unquantize(int q, int prec)
			
 
				-{
			
 
				-	int unq, s;
			
 
				-
			
 
				-	nvDebugCheck (prec > 1);	// not implemented for prec 1
			
 
				-
			
 
				-	switch (Utils::FORMAT)
			
 
				-	{
			
 
				-	// modify this case to move the multiplication by 31 after interpolation.
			
 
				-	// Need to use finish_unquantize.
			
 
				-
			
 
				-	// since we have 16 bits available, let's unquantize this to 16 bits unsigned
			
 
				-	// thus the scale factor is [0-7c00)/[0-10000) = 31/64
			
 
				-	case UNSIGNED_F16:
			
 
				-		if (prec >= 15) 
			
 
				-			unq = q;
			
 
				-		else if (q == 0) 
			
 
				-			unq = 0;
			
 
				-		else if (q == ((1<<prec)-1)) 
			
 
				-			unq = U16MAX;
			
 
				-		else
			
 
				-			unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
			
 
				-		break;
			
 
				-
			
 
				-	// here, let's stick with S16 (no apparent quality benefit from going to S17)
			
 
				-	// range is (-7c00..7c00)/(-8000..8000) = 31/32
			
 
				-	case SIGNED_F16:
			
 
				-		// don't remove this test even though it appears equivalent to the code below
			
 
				-		// as it isn't -- the code below can overflow for prec = 16
			
 
				-		if (prec >= 16)
			
 
				-			unq = q;
			
 
				-		else
			
 
				-		{
			
 
				-			if (q < 0) { s = 1; q = -q; } else s = 0;
			
 
				-
			
 
				-			if (q == 0)
			
 
				-				unq = 0;
			
 
				-			else if (q >= ((1<<(prec-1))-1))
			
 
				-				unq = s ? -S16MAX : S16MAX;
			
 
				-			else
			
 
				-			{
			
 
				-				unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
			
 
				-				if (s)
			
 
				-					unq = -unq;
			
 
				-			}
			
 
				-		}
			
 
				-		break;
			
 
				-	}
			
 
				-	return unq;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-
			
 
				-// pick a norm!
			
 
				-#define	NORM_EUCLIDEAN 1
			
 
				-
			
 
				-float Utils::norm(const Vector3 &a, const Vector3 &b)
			
 
				-{
			
 
				-#ifdef	NORM_EUCLIDEAN
			
 
				-	return lengthSquared(a - b);
			
 
				-#endif
			
 
				-#ifdef	NORM_ABS
			
 
				-	Vector3 err = a - b;
			
 
				-	return fabs(err.x) + fabs(err.y) + fabs(err.z);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-// parse <name>[<start>{:<end>}]{,}	
			
 
				-// the pointer starts here         ^
			
 
				-// name is 1 or 2 chars and matches field names. start and end are decimal numbers
			
 
				-void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len)
			
 
				-{
			
 
				-	if (ptr <= 0) return;
			
 
				-	--ptr;
			
 
				-	if (encoding[ptr] == ',') --ptr;
			
 
				-	nvDebugCheck (encoding[ptr] == ']');
			
 
				-	--ptr;
			
 
				-	endbit = 0;
			
 
				-	int scale = 1;
			
 
				-	while (encoding[ptr] != ':' && encoding[ptr] != '[')
			
 
				-	{
			
 
				-		nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
			
 
				-		endbit += (encoding[ptr--] - '0') * scale;
			
 
				-		scale *= 10;
			
 
				-	}
			
 
				-	int startbit = 0; scale = 1;
			
 
				-	if (encoding[ptr] == '[')
			
 
				-		startbit = endbit;
			
 
				-	else  
			
 
				-	{
			
 
				-		ptr--;
			
 
				-		while (encoding[ptr] != '[')
			
 
				-		{
			
 
				-			nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
			
 
				-			startbit += (encoding[ptr--] - '0') * scale;
			
 
				-			scale *= 10;
			
 
				-		}
			
 
				-	}
			
 
				-	len = startbit - endbit + 1;	// startbit>=endbit note
			
 
				-	--ptr;
			
 
				-	if (encoding[ptr] == 'm')		field = FIELD_M;
			
 
				-	else if (encoding[ptr] == 'd')	field = FIELD_D;
			
 
				-	else {
			
 
				-		// it's wxyz
			
 
				-		nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z');
			
 
				-		int foo = encoding[ptr--] - 'w';
			
 
				-		// now it is r g or b
			
 
				-		if (encoding[ptr] == 'r')		foo += 10;
			
 
				-		else if (encoding[ptr] == 'g')	foo += 20;
			
 
				-		else if (encoding[ptr] == 'b')	foo += 30;
			
 
				-		else nvDebugCheck(0);
			
 
				-		field = (Field) foo;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-
			
--- a/3rdparty/nvtt/bc6h/zoh_utils.h
+++ b/3rdparty/nvtt/bc6h/zoh_utils.h
@@ -1,72 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// utility class holding common routines
			
 
				-#ifndef _ZOH_UTILS_H
			
 
				-#define _ZOH_UTILS_H
			
 
				-
			
 
				-#include "nvmath/vector.h"
			
 
				-
			
 
				-namespace ZOH {
			
 
				-
			
 
				-inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); }
			
 
				-
			
 
				-enum Field {
			
 
				-    FIELD_M = 1,	// mode
			
 
				-    FIELD_D = 2,	// distribution/shape
			
 
				-    FIELD_RW = 10+0, FIELD_RX = 10+1, FIELD_RY = 10+2, FIELD_RZ = 10+3,	// red channel endpoints or deltas
			
 
				-    FIELD_GW = 20+0, FIELD_GX = 20+1, FIELD_GY = 20+2, FIELD_GZ = 20+3,	// green channel endpoints or deltas
			
 
				-    FIELD_BW = 30+0, FIELD_BX = 30+1, FIELD_BY = 30+2, FIELD_BZ = 30+3,	// blue channel endpoints or deltas
			
 
				-};
			
 
				-
			
 
				-// some constants
			
 
				-static const int F16S_MASK	=  0x8000;		// f16 sign mask
			
 
				-static const int F16EM_MASK	=  0x7fff;		// f16 exp & mantissa mask
			
 
				-static const int U16MAX		=  0xffff;
			
 
				-static const int S16MIN		= -0x8000;
			
 
				-static const int S16MAX		=  0x7fff;
			
 
				-static const int INT16_MASK	=  0xffff;
			
 
				-static const int F16MAX		=  0x7bff;		// MAXFLT bit pattern for halfs
			
 
				-
			
 
				-enum Format { UNSIGNED_F16, SIGNED_F16 };
			
 
				-
			
 
				-class Utils
			
 
				-{
			
 
				-public:
			
 
				-    static Format FORMAT;     // this is a global -- we're either handling unsigned or unsigned half values
			
 
				-
			
 
				-    // error metrics
			
 
				-    static float norm(const nv::Vector3 &a, const nv::Vector3 &b);
			
 
				-    static float mpsnr_norm(const nv::Vector3 &a, int exposure, const nv::Vector3 &b);
			
 
				-
			
 
				-    // conversion & clamp
			
 
				-    static int ushort_to_format(unsigned short input);
			
 
				-    static unsigned short format_to_ushort(int input);
			
 
				-
			
 
				-    // clamp to format
			
 
				-    static void clamp(nv::Vector3 &v);
			
 
				-
			
 
				-    // quantization and unquantization
			
 
				-    static int finish_unquantize(int q, int prec);
			
 
				-    static int unquantize(int q, int prec);
			
 
				-    static int quantize(float value, int prec);
			
 
				-
			
 
				-    static void parse(const char *encoding, int &ptr, Field & field, int &endbit, int &len);
			
 
				-
			
 
				-    // lerping
			
 
				-    static int lerp(int a, int b, int i, int denom);
			
 
				-    static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom);
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif // _ZOH_UTILS_H
			
--- a/3rdparty/nvtt/bc6h/zohone.cpp
+++ b/3rdparty/nvtt/bc6h/zohone.cpp
@@ -1,799 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// one region zoh compress/decompress code
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "zoh.h"
			
 
				-#include "zoh_utils.h"
			
 
				-
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-
			
 
				-#include <string.h> // strlen
			
 
				-#include <float.h> // FLT_MAX
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace ZOH;
			
 
				-
			
 
				-#define NINDICES	16
			
 
				-#define	INDEXBITS	4
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-
			
 
				-#define	NSHAPES	1
			
 
				-
			
 
				-static const int shapes[NSHAPES] =
			
 
				-{
			
 
				-    0x0000
			
 
				-};	// only 1 shape
			
 
				-
			
 
				-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NDELTA	2
			
 
				-
			
 
				-struct Chanpat
			
 
				-{
			
 
				-    int prec[NDELTA];		// precision pattern for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-    Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
			
 
				-    int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-    int mode;				// associated mode value
			
 
				-    int modebits;			// number of mode bits
			
 
				-    const char *encoding;	// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define MAXMODEBITS	5
			
 
				-#define	MAXMODES (1<<MAXMODEBITS)
			
 
				-
			
 
				-#define	NPATTERNS 4
			
 
				-
			
 
				-static const Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-    16,4,  16,4,  16,4,   1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-    12,8,  12,8,  12,8,   1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-    11,9,  11,9,  11,9,   1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-    10,10, 10,10, 10,10,  0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-};
			
 
				-
			
 
				-// mapping of mode to the corresponding index in pattern
			
 
				-static const int mode_to_pat[MAXMODES] = {
			
 
				-    -1,-1,-1,
			
 
				-    3,	// 0x03
			
 
				-    -1,-1,-1,
			
 
				-    2,	// 0x07
			
 
				-    -1,-1,-1,
			
 
				-    1,	// 0x0b
			
 
				-    -1,-1,-1,
			
 
				-    0,	// 0x0f
			
 
				-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
			
 
				-};
			
 
				-
			
 
				-#define	R_0(ep)	(ep)[0].A[i]
			
 
				-#define	R_1(ep)	(ep)[0].B[i]
			
 
				-#define	MASK(n)	((1<<(n))-1)
			
 
				-
			
 
				-// compress endpoints
			
 
				-static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
			
 
				-{
			
 
				-    if (p.transformed)
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
			
 
				-            R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
			
 
				-        }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
			
 
				-            R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// decompress endpoints
			
 
				-static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
			
 
				-{
			
 
				-    bool issigned = Utils::FORMAT == SIGNED_F16;
			
 
				-
			
 
				-    if (p.transformed)
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
			
 
				-            int t;
			
 
				-            t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
			
 
				-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
			
 
				-            R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
			
 
				-        }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
			
 
				-            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
			
 
				-{
			
 
				-    for (int region = 0; region < NREGIONS_ONE; ++region)
			
 
				-    {
			
 
				-        q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
			
 
				-        q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
			
 
				-        q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
			
 
				-        q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
			
 
				-        q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
			
 
				-        q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
			
 
				-// index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
			
 
				-static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-    int index_positions[NREGIONS_ONE];
			
 
				-
			
 
				-    index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
			
 
				-
			
 
				-    for (int region = 0; region < NREGIONS_ONE; ++region)
			
 
				-    {
			
 
				-        int x = index_positions[region] & 3;
			
 
				-        int y = (index_positions[region] >> 2) & 3;
			
 
				-        nvDebugCheck(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-        if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-        {
			
 
				-            // high bit is set, swap the endpts and indices for this region
			
 
				-            int t;
			
 
				-            for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-            for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-                for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-                    if (REGION(x,y,shapeindex) == region)
			
 
				-                        indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// endpoints fit only if the compression was lossless
			
 
				-static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
			
 
				-{
			
 
				-    IntEndpts uncompressed[NREGIONS_ONE];
			
 
				-
			
 
				-    decompress_endpts(compressed, uncompressed, p);
			
 
				-
			
 
				-    for (int j=0; j<NREGIONS_ONE; ++j)
			
 
				-	for (int i=0; i<NCHANNELS; ++i)
			
 
				-	{
			
 
				-        if (orig[j].A[i] != uncompressed[j].A[i]) return false;
			
 
				-        if (orig[j].B[i] != uncompressed[j].B[i]) return false;
			
 
				-    }
			
 
				-    return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
			
 
				-{
			
 
				-    // interpret the verilog backwards and process it
			
 
				-    int m = p.mode;
			
 
				-    int rw = endpts[0].A[0], rx = endpts[0].B[0];
			
 
				-    int gw = endpts[0].A[1], gx = endpts[0].B[1];
			
 
				-    int bw = endpts[0].A[2], bx = endpts[0].B[2];
			
 
				-    int ptr = int(strlen(p.encoding));
			
 
				-    while (ptr)
			
 
				-    {
			
 
				-        Field field;
			
 
				-        int endbit, len;
			
 
				-
			
 
				-		// !!!UNDONE: get rid of string parsing!!!
			
 
				-        Utils::parse(p.encoding, ptr, field, endbit, len);
			
 
				-        switch(field)
			
 
				-        {
			
 
				-        case FIELD_M:	out.write( m >> endbit, len); break;
			
 
				-        case FIELD_RW:	out.write(rw >> endbit, len); break;
			
 
				-        case FIELD_RX:	out.write(rx >> endbit, len); break;
			
 
				-        case FIELD_GW:	out.write(gw >> endbit, len); break;
			
 
				-        case FIELD_GX:	out.write(gx >> endbit, len); break;
			
 
				-        case FIELD_BW:	out.write(bw >> endbit, len); break;
			
 
				-        case FIELD_BX:	out.write(bx >> endbit, len); break;
			
 
				-
			
 
				-        case FIELD_D:
			
 
				-        case FIELD_RY:
			
 
				-        case FIELD_RZ:
			
 
				-        case FIELD_GY:
			
 
				-        case FIELD_GZ:
			
 
				-        case FIELD_BY:
			
 
				-        case FIELD_BZ:
			
 
				-        default: nvUnreachable();
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
			
 
				-{
			
 
				-    // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
			
 
				-    int mode = in.read(2);
			
 
				-    if (mode != 0x00 && mode != 0x01)
			
 
				-        mode = (in.read(3) << 2) | mode;
			
 
				-
			
 
				-    int pat_index = mode_to_pat[mode];
			
 
				-
			
 
				-    nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-    nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-    p = patterns[pat_index];
			
 
				-
			
 
				-    int d;
			
 
				-    int rw, rx;
			
 
				-    int gw, gx;
			
 
				-    int bw, bx;
			
 
				-
			
 
				-    d = 0;
			
 
				-    rw = rx = 0;
			
 
				-    gw = gx = 0;
			
 
				-    bw = bx = 0;
			
 
				-
			
 
				-    int ptr = int(strlen(p.encoding));
			
 
				-
			
 
				-    while (ptr)
			
 
				-    {
			
 
				-        Field field;
			
 
				-        int endbit, len;
			
 
				-
			
 
				-		// !!!UNDONE: get rid of string parsing!!!
			
 
				-        Utils::parse(p.encoding, ptr, field, endbit, len);
			
 
				-
			
 
				-        switch(field)
			
 
				-        {
			
 
				-        case FIELD_M:	break;	// already processed so ignore
			
 
				-        case FIELD_RW:	rw |= in.read(len) << endbit; break;
			
 
				-        case FIELD_RX:	rx |= in.read(len) << endbit; break;
			
 
				-        case FIELD_GW:	gw |= in.read(len) << endbit; break;
			
 
				-        case FIELD_GX:	gx |= in.read(len) << endbit; break;
			
 
				-        case FIELD_BW:	bw |= in.read(len) << endbit; break;
			
 
				-        case FIELD_BX:	bx |= in.read(len) << endbit; break;
			
 
				-
			
 
				-        case FIELD_D:
			
 
				-        case FIELD_RY:
			
 
				-        case FIELD_RZ:
			
 
				-        case FIELD_GY:
			
 
				-        case FIELD_GZ:
			
 
				-        case FIELD_BY:
			
 
				-        case FIELD_BZ:
			
 
				-        default: nvUnreachable();
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    nvDebugCheck (in.getptr() == 128 - 63);
			
 
				-
			
 
				-    endpts[0].A[0] = rw; endpts[0].B[0] = rx;
			
 
				-    endpts[0].A[1] = gw; endpts[0].B[1] = gx;
			
 
				-    endpts[0].A[2] = bw; endpts[0].B[2] = bx;
			
 
				-}
			
 
				-
			
 
				-// compress index 0
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-    {
			
 
				-        int x = POS_TO_X(pos);
			
 
				-        int y = POS_TO_Y(pos);
			
 
				-
			
 
				-        out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-    Bits out(block, ZOH::BITSIZE);
			
 
				-
			
 
				-    write_header(endpts, p, out);
			
 
				-
			
 
				-    write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-    nvDebugCheck(out.getptr() == ZOH::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
			
 
				-{
			
 
				-    // scale endpoints
			
 
				-    int a, b;			// really need a IntVector3...
			
 
				-
			
 
				-    a = Utils::unquantize(endpts.A[0], prec);
			
 
				-    b = Utils::unquantize(endpts.B[0], prec);
			
 
				-
			
 
				-    // interpolate
			
 
				-    for (int i = 0; i < NINDICES; ++i)
			
 
				-        palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
			
 
				-
			
 
				-    a = Utils::unquantize(endpts.A[1], prec);
			
 
				-    b = Utils::unquantize(endpts.B[1], prec);
			
 
				-
			
 
				-    // interpolate
			
 
				-    for (int i = 0; i < NINDICES; ++i)
			
 
				-        palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
			
 
				-
			
 
				-    a = Utils::unquantize(endpts.A[2], prec);
			
 
				-    b = Utils::unquantize(endpts.B[2], prec);
			
 
				-
			
 
				-    // interpolate
			
 
				-    for (int i = 0; i < NINDICES; ++i)
			
 
				-        palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
			
 
				-}
			
 
				-
			
 
				-// position 0 was compressed
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-    {
			
 
				-        int x = POS_TO_X(pos);
			
 
				-        int y = POS_TO_Y(pos);
			
 
				-
			
 
				-        indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-void ZOH::decompressone(const char *block, Tile &t)
			
 
				-{
			
 
				-    Bits in(block, ZOH::BITSIZE);
			
 
				-
			
 
				-    Pattern p;
			
 
				-    IntEndpts endpts[NREGIONS_ONE];
			
 
				-    ComprEndpts compr_endpts[NREGIONS_ONE];
			
 
				-
			
 
				-    read_header(in, compr_endpts, p);
			
 
				-    int shapeindex = 0;		// only one shape
			
 
				-
			
 
				-    decompress_endpts(compr_endpts, endpts, p);
			
 
				-
			
 
				-    Vector3 palette[NREGIONS_ONE][NINDICES];
			
 
				-    for (int r = 0; r < NREGIONS_ONE; ++r)
			
 
				-        generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
			
 
				-
			
 
				-    // read indices
			
 
				-    int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-    read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-    nvDebugCheck(in.getptr() == ZOH::BITSIZE);
			
 
				-
			
 
				-    // lookup
			
 
				-    for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-            t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
			
 
				-{
			
 
				-    Vector3 palette[NINDICES];
			
 
				-    float toterr = 0;
			
 
				-    Vector3 err;
			
 
				-
			
 
				-    generate_palette_quantized(endpts, prec, palette);
			
 
				-
			
 
				-    for (int i = 0; i < np; ++i)
			
 
				-    {
			
 
				-        float err, besterr;
			
 
				-
			
 
				-        besterr = Utils::norm(colors[i], palette[0]) * importance[i];
			
 
				-
			
 
				-        for (int j = 1; j < NINDICES && besterr > 0; ++j)
			
 
				-        {
			
 
				-            err = Utils::norm(colors[i], palette[j]) * importance[i];
			
 
				-
			
 
				-            if (err > besterr)	// error increased, so we're done searching
			
 
				-                break;
			
 
				-            if (err < besterr)
			
 
				-                besterr = err;
			
 
				-        }
			
 
				-        toterr += besterr;
			
 
				-    }
			
 
				-    return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, 
			
 
				-                           int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
			
 
				-{
			
 
				-    // build list of possibles
			
 
				-    Vector3 palette[NREGIONS_ONE][NINDICES];
			
 
				-
			
 
				-    for (int region = 0; region < NREGIONS_ONE; ++region)
			
 
				-    {
			
 
				-        generate_palette_quantized(endpts[region], prec, &palette[region][0]);
			
 
				-        toterr[region] = 0;
			
 
				-    }
			
 
				-
			
 
				-    Vector3 err;
			
 
				-
			
 
				-    for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-        int region = REGION(x,y,shapeindex);
			
 
				-        float err, besterr;
			
 
				-
			
 
				-        besterr = Utils::norm(tile.data[y][x], palette[region][0]);
			
 
				-        indices[y][x] = 0;
			
 
				-
			
 
				-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
			
 
				-        {
			
 
				-            err = Utils::norm(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-            if (err > besterr)	// error increased, so we're done searching
			
 
				-                break;
			
 
				-            if (err < besterr)
			
 
				-            {
			
 
				-                besterr = err;
			
 
				-                indices[y][x] = i;
			
 
				-            }
			
 
				-        }
			
 
				-        toterr[region] += besterr;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
			
 
				-                          float old_err, int do_b)
			
 
				-{
			
 
				-    // we have the old endpoints: old_endpts
			
 
				-    // we have the perturbed endpoints: new_endpts
			
 
				-    // we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-    IntEndpts temp_endpts;
			
 
				-    float min_err = old_err;		// start with the best current error
			
 
				-    int beststep;
			
 
				-
			
 
				-    // copy real endpoints so we can perturb them
			
 
				-    for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
			
 
				-
			
 
				-    // do a logarithmic search for the best error for this endpoint (which)
			
 
				-    for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-    {
			
 
				-        bool improved = false;
			
 
				-        for (int sign = -1; sign <= 1; sign += 2)
			
 
				-        {
			
 
				-            if (do_b == 0)
			
 
				-            {
			
 
				-                temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-                if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-                    continue;
			
 
				-            }
			
 
				-            else
			
 
				-            {
			
 
				-                temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-                if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-                    continue;
			
 
				-            }
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, prec);
			
 
				-
			
 
				-            if (err < min_err)
			
 
				-            {
			
 
				-                improved = true;
			
 
				-                min_err = err;
			
 
				-                beststep = sign * step;
			
 
				-            }
			
 
				-        }
			
 
				-        // if this was an improvement, move the endpoint and continue search from there
			
 
				-        if (improved)
			
 
				-        {
			
 
				-            if (do_b == 0)
			
 
				-                new_endpts.A[ch] += beststep;
			
 
				-            else
			
 
				-                new_endpts.B[ch] += beststep;
			
 
				-        }
			
 
				-    }
			
 
				-    return min_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
			
 
				-{
			
 
				-    float opt_err = orig_err;
			
 
				-    for (int ch = 0; ch < NCHANNELS; ++ch)
			
 
				-    {
			
 
				-        opt_endpts.A[ch] = orig_endpts.A[ch];
			
 
				-        opt_endpts.B[ch] = orig_endpts.B[ch];
			
 
				-    }
			
 
				-    /*
			
 
				-        err0 = perturb(rgb0, delta0)
			
 
				-        err1 = perturb(rgb1, delta1)
			
 
				-        if (err0 < err1)
			
 
				-            if (err0 >= initial_error) break
			
 
				-            rgb0 += delta0
			
 
				-            next = 1
			
 
				-        else
			
 
				-            if (err1 >= initial_error) break
			
 
				-            rgb1 += delta1
			
 
				-            next = 0
			
 
				-        initial_err = map()
			
 
				-        for (;;)
			
 
				-            err = perturb(next ? rgb1:rgb0, delta)
			
 
				-            if (err >= initial_err) break
			
 
				-            next? rgb1 : rgb0 += delta
			
 
				-            initial_err = err
			
 
				-	*/
			
 
				-    IntEndpts new_a, new_b;
			
 
				-    IntEndpts new_endpt;
			
 
				-    int do_b;
			
 
				-
			
 
				-    // now optimize each channel separately
			
 
				-    for (int ch = 0; ch < NCHANNELS; ++ch)
			
 
				-    {
			
 
				-        // figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-        // if we just alternate, we can easily end up in a local minima
			
 
				-        float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1);	// perturb endpt B
			
 
				-
			
 
				-        if (err0 < err1)
			
 
				-        {
			
 
				-            if (err0 >= opt_err)
			
 
				-                continue;
			
 
				-
			
 
				-            opt_endpts.A[ch] = new_a.A[ch];
			
 
				-            opt_err = err0;
			
 
				-            do_b = 1;		// do B next
			
 
				-        }
			
 
				-        else
			
 
				-        {
			
 
				-            if (err1 >= opt_err)
			
 
				-                continue;
			
 
				-            opt_endpts.B[ch] = new_b.B[ch];
			
 
				-            opt_err = err1;
			
 
				-            do_b = 0;		// do A next
			
 
				-        }
			
 
				-
			
 
				-        // now alternate endpoints and keep trying until there is no improvement
			
 
				-        for (;;)
			
 
				-        {
			
 
				-            float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
			
 
				-            if (err >= opt_err)
			
 
				-                break;
			
 
				-            if (do_b == 0)
			
 
				-                opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-            else
			
 
				-                opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-            opt_err = err;
			
 
				-            do_b = 1 - do_b;	// now move the other endpoint
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE], 
			
 
				-                            const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
			
 
				-{
			
 
				-    Vector3 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-    float err = 0;
			
 
				-
			
 
				-    for (int region=0; region<NREGIONS_ONE; ++region)
			
 
				-    {
			
 
				-        // collect the pixels in the region
			
 
				-        int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    ++np;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-    for each pattern
			
 
				-        convert endpoints using pattern precision
			
 
				-        assign indices and get initial error
			
 
				-        compress indices (and possibly reorder endpoints)
			
 
				-        transform endpoints
			
 
				-        if transformed endpoints fit pattern
			
 
				-            get original endpoints back
			
 
				-            optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-            compress new indices
			
 
				-            transform new endpoints
			
 
				-            if new endpoints fit pattern AND if error is improved
			
 
				-                emit compressed block with new data
			
 
				-            else
			
 
				-                emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
			
 
				-{
			
 
				-    float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
			
 
				-    IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
			
 
				-    ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
			
 
				-    int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-    for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-    {
			
 
				-        // precisions for all channels need to be the same
			
 
				-        for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
			
 
				-
			
 
				-        quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
			
 
				-        assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
			
 
				-        swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-        compress_endpts(orig_endpts, compr_orig, patterns[sp]);
			
 
				-        if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
			
 
				-        {
			
 
				-            optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
			
 
				-            assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
			
 
				-            swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-            compress_endpts(opt_endpts, compr_opt, patterns[sp]);
			
 
				-            orig_toterr = opt_toterr = 0;
			
 
				-            for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-
			
 
				-            if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-            {
			
 
				-                emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-                return opt_toterr;
			
 
				-            }
			
 
				-            else
			
 
				-            {
			
 
				-                // either it stopped fitting when we optimized it, or there was no improvement
			
 
				-                // so go back to the unoptimized endpoints which we know will fit
			
 
				-                emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-                return orig_toterr;
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-	nvAssert (false); // "No candidate found, should never happen (refineone.)";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
			
 
				-{
			
 
				-    for (int region = 0; region < NREGIONS_ONE; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-            palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
			
 
				-{
			
 
				-    // build list of possibles
			
 
				-    Vector3 palette[NREGIONS_ONE][NINDICES];
			
 
				-
			
 
				-    generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-    float toterr = 0;
			
 
				-    Vector3 err;
			
 
				-
			
 
				-    for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-        int region = REGION(x,y,shapeindex);
			
 
				-        float err, besterr;
			
 
				-
			
 
				-        besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
			
 
				-
			
 
				-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
			
 
				-        {
			
 
				-            err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
			
 
				-
			
 
				-            if (err > besterr)	// error increased, so we're done searching
			
 
				-                break;
			
 
				-            if (err < besterr)
			
 
				-                besterr = err;
			
 
				-        }
			
 
				-        toterr += besterr;
			
 
				-    }
			
 
				-    return toterr;
			
 
				-}
			
 
				-
			
 
				-float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
			
 
				-{
			
 
				-    for (int region=0; region<NREGIONS_ONE; ++region)
			
 
				-    {
			
 
				-        int np = 0;
			
 
				-        Vector3 colors[Tile::TILE_TOTAL];
			
 
				-        Vector3 mean(0,0,0);
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x,y,shapeindex) == region)
			
 
				-                {
			
 
				-                    colors[np] = tile.data[y][x];
			
 
				-                    mean += tile.data[y][x];
			
 
				-                    ++np;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        // handle simple cases
			
 
				-        if (np == 0)
			
 
				-        {
			
 
				-            Vector3 zero(0,0,0);
			
 
				-            endpts[region].A = zero;
			
 
				-            endpts[region].B = zero;
			
 
				-            continue;
			
 
				-        }
			
 
				-        else if (np == 1)
			
 
				-        {
			
 
				-            endpts[region].A = colors[0];
			
 
				-            endpts[region].B = colors[0];
			
 
				-            continue;
			
 
				-        }
			
 
				-        else if (np == 2)
			
 
				-        {
			
 
				-            endpts[region].A = colors[0];
			
 
				-            endpts[region].B = colors[1];
			
 
				-            continue;
			
 
				-        }
			
 
				-
			
 
				-        mean /= float(np);
			
 
				-
			
 
				-        Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-        // project each pixel value along the principal direction
			
 
				-        float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-        for (int i = 0; i < np; i++)
			
 
				-        {
			
 
				-            float dp = dot(colors[i]-mean, direction);
			
 
				-            if (dp < minp) minp = dp;
			
 
				-            if (dp > maxp) maxp = dp;
			
 
				-        }
			
 
				-
			
 
				-        // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-        endpts[region].A = mean + minp*direction;
			
 
				-        endpts[region].B = mean + maxp*direction;
			
 
				-
			
 
				-        // clamp endpoints
			
 
				-        // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-        // shape based on endpoints being clamped
			
 
				-        Utils::clamp(endpts[region].A);
			
 
				-        Utils::clamp(endpts[region].B);
			
 
				-    }
			
 
				-
			
 
				-    return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-float ZOH::compressone(const Tile &t, char *block)
			
 
				-{
			
 
				-    int shapeindex_best = 0;
			
 
				-    FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
			
 
				-    float msebest = FLT_MAX;
			
 
				-
			
 
				-    /*
			
 
				-		collect the mse values that are within 5% of the best values
			
 
				-		optimize each one and choose the best
			
 
				-	*/
			
 
				-    // hack for now -- just use the best value WORK
			
 
				-    for (int i=0; i<NSHAPES && msebest>0.0; ++i)
			
 
				-    {
			
 
				-        float mse = roughone(t, i, tempendpts);
			
 
				-        if (mse < msebest)
			
 
				-        {
			
 
				-            msebest = mse;
			
 
				-            shapeindex_best = i;
			
 
				-            memcpy(endptsbest, tempendpts, sizeof(endptsbest));
			
 
				-        }
			
 
				-
			
 
				-    }
			
 
				-    return refineone(t, shapeindex_best, endptsbest, block);
			
 
				-}
			
--- a/3rdparty/nvtt/bc6h/zohtwo.cpp
+++ b/3rdparty/nvtt/bc6h/zohtwo.cpp
@@ -1,883 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// two regions zoh compress/decompress code
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-
			
 
				-	get initial float endpoints
			
 
				-	convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates.
			
 
				-		note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible.
			
 
				-	for each EC candidate in order from max precision to smaller precision
			
 
				-		convert endpoints using the appropriate precision.
			
 
				-		optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well.
			
 
				-			(thus the endpoints and indices are in final form.)
			
 
				-		transform and get bit delta.
			
 
				-		if the bit delta fits, exit
			
 
				-	if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever.
			
 
				-		add a state variable to nvDebugCheck we only do this once.
			
 
				-	convert to bit stream.
			
 
				-	return the error.
			
 
				-
			
 
				-	Global optimization
			
 
				-		order all tiles based on their errors
			
 
				-		do something special for high-error tiles
			
 
				-			the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image...
			
 
				-
			
 
				-	display an image that shows partitioning and precision selected for each tile
			
 
				-*/
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "zoh.h"
			
 
				-#include "zoh_utils.h"
			
 
				-
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-
			
 
				-#include <string.h> // strlen
			
 
				-#include <float.h> // FLT_MAX
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace ZOH;
			
 
				-
			
 
				-#define NINDICES	8
			
 
				-#define	INDEXBITS	3
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-
			
 
				-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
			
 
				-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
			
 
				-// stop without having to touch all shapes?
			
 
				-
			
 
				-#include "shapes_two.h"
			
 
				-// use only the first 32 available shapes
			
 
				-#undef NSHAPES
			
 
				-#undef SHAPEBITS
			
 
				-#define NSHAPES 32
			
 
				-#define SHAPEBITS 5
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NDELTA	4
			
 
				-
			
 
				-struct Chanpat
			
 
				-{
			
 
				-    int prec[NDELTA];		// precision pattern for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-    Chanpat chan[NCHANNELS];    // allow different bit patterns per channel -- but we still want constant precision per channel
			
 
				-    int transformed;            // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-    int mode;                   // associated mode value
			
 
				-    int modebits;               // number of mode bits
			
 
				-    const char *encoding;       // verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define MAXMODEBITS	5
			
 
				-#define	MAXMODES (1<<MAXMODEBITS)
			
 
				-
			
 
				-#define	NPATTERNS 10
			
 
				-
			
 
				-static const Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-    11,5,5,5,	11,4,4,4,	11,4,4,4,	1,	0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-    11,4,4,4,	11,5,5,5,	11,4,4,4,	1,	0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-    11,4,4,4,	11,4,4,4,	11,5,5,5,	1,	0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
			
 
				-    10,5,5,5,	10,5,5,5,	10,5,5,5,	1,	0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]",
			
 
				-    9,5,5,5,	9,5,5,5,	9,5,5,5,	1,	0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]",
			
 
				-    8,6,6,6,	8,5,5,5,	8,5,5,5,	1,	0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]",
			
 
				-    8,5,5,5,	8,6,6,6,	8,5,5,5,	1,	0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]",
			
 
				-    8,5,5,5,	8,5,5,5,	8,6,6,6,	1,	0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]",
			
 
				-    7,6,6,6,	7,6,6,6,	7,6,6,6,	1,	0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]",
			
 
				-    6,6,6,6,	6,6,6,6,	6,6,6,6,	0,	0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]",
			
 
				-};
			
 
				-
			
 
				-// mapping of mode to the corresponding index in pattern
			
 
				-// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these
			
 
				-static const int mode_to_pat[MAXMODES] = {	
			
 
				-    3,	// 0x00
			
 
				-    8,	// 0x01
			
 
				-    0,	// 0x02
			
 
				-    -1,-1,-1,
			
 
				-    1,	// 0x06
			
 
				-    -1,-1,-1,
			
 
				-    2,	// 0x0a
			
 
				-    -1,-1,-1,
			
 
				-    4,	// 0x0e
			
 
				-    -1,-1,-1,
			
 
				-    5,	// 0x12
			
 
				-    -2,-1,-1,
			
 
				-    6,	// 0x16
			
 
				-    -2,-1,-1,
			
 
				-    7,	// 0x1a
			
 
				-    -2,-1,-1,
			
 
				-    9,	// 0x1e
			
 
				-    -2
			
 
				-};
			
 
				-
			
 
				-#define	R_0(ep)	(ep)[0].A[i]
			
 
				-#define	R_1(ep)	(ep)[0].B[i]
			
 
				-#define	R_2(ep)	(ep)[1].A[i]
			
 
				-#define	R_3(ep)	(ep)[1].B[i]
			
 
				-#define	MASK(n)	((1<<(n))-1)
			
 
				-
			
 
				-// compress endpoints
			
 
				-static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p)
			
 
				-{
			
 
				-    if (p.transformed)
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
			
 
				-            R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
			
 
				-            R_2(out) = (R_2(in) - R_0(in)) & MASK(p.chan[i].prec[2]);
			
 
				-            R_3(out) = (R_3(in) - R_0(in)) & MASK(p.chan[i].prec[3]);
			
 
				-        }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
			
 
				-            R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
			
 
				-            R_2(out) = R_2(in) & MASK(p.chan[i].prec[2]);
			
 
				-            R_3(out) = R_3(in) & MASK(p.chan[i].prec[3]);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// decompress endpoints
			
 
				-static void decompress_endpts(const ComprEndpts in[NREGIONS_TWO], IntEndpts out[NREGIONS_TWO], const Pattern &p)
			
 
				-{
			
 
				-    bool issigned = Utils::FORMAT == SIGNED_F16;
			
 
				-
			
 
				-    if (p.transformed)
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
			
 
				-            int t;
			
 
				-            t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
			
 
				-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
			
 
				-            R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
			
 
				-            t = SIGN_EXTEND(R_2(in), p.chan[i].prec[2]);
			
 
				-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
			
 
				-            R_2(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
			
 
				-            t = SIGN_EXTEND(R_3(in), p.chan[i].prec[3]);
			
 
				-            t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
			
 
				-            R_3(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
			
 
				-        }
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        for (int i=0; i<NCHANNELS; ++i)
			
 
				-        {
			
 
				-            R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
			
 
				-            R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
			
 
				-            R_2(out) = issigned ? SIGN_EXTEND(R_2(in),p.chan[i].prec[2]) : R_2(in);
			
 
				-            R_3(out) = issigned ? SIGN_EXTEND(R_3(in),p.chan[i].prec[3]) : R_3(in);
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS_TWO], int prec, IntEndpts q_endpts[NREGIONS_TWO])
			
 
				-{
			
 
				-    for (int region = 0; region < NREGIONS_TWO; ++region)
			
 
				-    {
			
 
				-        q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
			
 
				-        q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
			
 
				-        q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
			
 
				-        q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
			
 
				-        q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
			
 
				-        q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
			
 
				-static void swap_indices(IntEndpts endpts[NREGIONS_TWO], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-    for (int region = 0; region < NREGIONS_TWO; ++region)
			
 
				-    {
			
 
				-        int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
			
 
				-
			
 
				-        int x = POS_TO_X(position);
			
 
				-        int y = POS_TO_Y(position);
			
 
				-        nvDebugCheck(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-        if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-        {
			
 
				-            // high bit is set, swap the endpts and indices for this region
			
 
				-            int t;
			
 
				-            for (int i=0; i<NCHANNELS; ++i)
			
 
				-            {
			
 
				-                t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
			
 
				-            }
			
 
				-
			
 
				-            for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-                for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-                    if (REGION(x,y,shapeindex) == region)
			
 
				-                        indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-// endpoints fit only if the compression was lossless
			
 
				-static bool endpts_fit(const IntEndpts orig[NREGIONS_TWO], const ComprEndpts compressed[NREGIONS_TWO], const Pattern &p)
			
 
				-{
			
 
				-    IntEndpts uncompressed[NREGIONS_TWO];
			
 
				-
			
 
				-    decompress_endpts(compressed, uncompressed, p);
			
 
				-
			
 
				-    for (int j=0; j<NREGIONS_TWO; ++j)
			
 
				-    {
			
 
				-	for (int i=0; i<NCHANNELS; ++i)
			
 
				-	{
			
 
				-            if (orig[j].A[i] != uncompressed[j].A[i]) return false;
			
 
				-            if (orig[j].B[i] != uncompressed[j].B[i]) return false;
			
 
				-        }
			
 
				-    }
			
 
				-    return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const ComprEndpts endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-    // interpret the verilog backwards and process it
			
 
				-    int m = p.mode;
			
 
				-    int d = shapeindex;
			
 
				-    int rw = endpts[0].A[0], rx = endpts[0].B[0], ry = endpts[1].A[0], rz = endpts[1].B[0];
			
 
				-    int gw = endpts[0].A[1], gx = endpts[0].B[1], gy = endpts[1].A[1], gz = endpts[1].B[1];
			
 
				-    int bw = endpts[0].A[2], bx = endpts[0].B[2], by = endpts[1].A[2], bz = endpts[1].B[2];
			
 
				-    int ptr = int(strlen(p.encoding));
			
 
				-    while (ptr)
			
 
				-    {
			
 
				-        Field field;
			
 
				-        int endbit, len;
			
 
				-
			
 
				-		// !!!UNDONE: get rid of string parsing!!!
			
 
				-        Utils::parse(p.encoding, ptr, field, endbit, len);
			
 
				-        switch(field)
			
 
				-        {
			
 
				-        case FIELD_M:	out.write( m >> endbit, len); break;
			
 
				-        case FIELD_D:	out.write( d >> endbit, len); break;
			
 
				-        case FIELD_RW:	out.write(rw >> endbit, len); break;
			
 
				-        case FIELD_RX:	out.write(rx >> endbit, len); break;
			
 
				-        case FIELD_RY:	out.write(ry >> endbit, len); break;
			
 
				-        case FIELD_RZ:	out.write(rz >> endbit, len); break;
			
 
				-        case FIELD_GW:	out.write(gw >> endbit, len); break;
			
 
				-        case FIELD_GX:	out.write(gx >> endbit, len); break;
			
 
				-        case FIELD_GY:	out.write(gy >> endbit, len); break;
			
 
				-        case FIELD_GZ:	out.write(gz >> endbit, len); break;
			
 
				-        case FIELD_BW:	out.write(bw >> endbit, len); break;
			
 
				-        case FIELD_BX:	out.write(bx >> endbit, len); break;
			
 
				-        case FIELD_BY:	out.write(by >> endbit, len); break;
			
 
				-        case FIELD_BZ:	out.write(bz >> endbit, len); break;
			
 
				-        default: nvUnreachable();
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p)
			
 
				-{
			
 
				-    // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
			
 
				-    int mode = in.read(2);
			
 
				-    if (mode != 0x00 && mode != 0x01)
			
 
				-        mode = (in.read(3) << 2) | mode;
			
 
				-
			
 
				-    int pat_index = mode_to_pat[mode];
			
 
				-
			
 
				-    if (pat_index == -2)
			
 
				-        return false;		// reserved mode found
			
 
				-
			
 
				-    nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-    nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-    p = patterns[pat_index];
			
 
				-
			
 
				-    int d;
			
 
				-    int rw, rx, ry, rz;
			
 
				-    int gw, gx, gy, gz;
			
 
				-    int bw, bx, by, bz;
			
 
				-
			
 
				-    d = 0;
			
 
				-    rw = rx = ry = rz = 0;
			
 
				-    gw = gx = gy = gz = 0;
			
 
				-    bw = bx = by = bz = 0;
			
 
				-
			
 
				-    int ptr = int(strlen(p.encoding));
			
 
				-
			
 
				-    while (ptr)
			
 
				-    {
			
 
				-        Field field;
			
 
				-        int endbit, len;
			
 
				-
			
 
				-		// !!!UNDONE: get rid of string parsing!!!
			
 
				-        Utils::parse(p.encoding, ptr, field, endbit, len);
			
 
				-
			
 
				-        switch(field)
			
 
				-        {
			
 
				-        case FIELD_M:	break;	// already processed so ignore
			
 
				-        case FIELD_D:	 d |= in.read(len) << endbit; break;
			
 
				-        case FIELD_RW:	rw |= in.read(len) << endbit; break;
			
 
				-        case FIELD_RX:	rx |= in.read(len) << endbit; break;
			
 
				-        case FIELD_RY:	ry |= in.read(len) << endbit; break;
			
 
				-        case FIELD_RZ:	rz |= in.read(len) << endbit; break;
			
 
				-        case FIELD_GW:	gw |= in.read(len) << endbit; break;
			
 
				-        case FIELD_GX:	gx |= in.read(len) << endbit; break;
			
 
				-        case FIELD_GY:	gy |= in.read(len) << endbit; break;
			
 
				-        case FIELD_GZ:	gz |= in.read(len) << endbit; break;
			
 
				-        case FIELD_BW:	bw |= in.read(len) << endbit; break;
			
 
				-        case FIELD_BX:	bx |= in.read(len) << endbit; break;
			
 
				-        case FIELD_BY:	by |= in.read(len) << endbit; break;
			
 
				-        case FIELD_BZ:	bz |= in.read(len) << endbit; break;
			
 
				-        default: nvUnreachable();
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    nvDebugCheck (in.getptr() == 128 - 46);
			
 
				-
			
 
				-    shapeindex = d;
			
 
				-    endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz;
			
 
				-    endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz;
			
 
				-    endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz;
			
 
				-
			
 
				-    return true;
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-    int positions[NREGIONS_TWO];
			
 
				-
			
 
				-    for (int r = 0; r < NREGIONS_TWO; ++r)
			
 
				-        positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-    {
			
 
				-        int x = POS_TO_X(pos);
			
 
				-        int y = POS_TO_Y(pos);
			
 
				-
			
 
				-        bool match = false;
			
 
				-
			
 
				-        for (int r = 0; r < NREGIONS_TWO; ++r)
			
 
				-            if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-        out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-    Bits out(block, ZOH::BITSIZE);
			
 
				-
			
 
				-    write_header(compr_endpts, shapeindex, p, out);
			
 
				-
			
 
				-    write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-    nvDebugCheck(out.getptr() == ZOH::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
			
 
				-{
			
 
				-    // scale endpoints
			
 
				-    int a, b;			// really need a IntVector3...
			
 
				-
			
 
				-    a = Utils::unquantize(endpts.A[0], prec);
			
 
				-    b = Utils::unquantize(endpts.B[0], prec);
			
 
				-
			
 
				-    // interpolate
			
 
				-    for (int i = 0; i < NINDICES; ++i)
			
 
				-        palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
			
 
				-
			
 
				-    a = Utils::unquantize(endpts.A[1], prec);
			
 
				-    b = Utils::unquantize(endpts.B[1], prec);
			
 
				-
			
 
				-    // interpolate
			
 
				-    for (int i = 0; i < NINDICES; ++i)
			
 
				-        palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
			
 
				-
			
 
				-    a = Utils::unquantize(endpts.A[2], prec);
			
 
				-    b = Utils::unquantize(endpts.B[2], prec);
			
 
				-
			
 
				-    // interpolate
			
 
				-    for (int i = 0; i < NINDICES; ++i)
			
 
				-        palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-    int positions[NREGIONS_TWO];
			
 
				-
			
 
				-    for (int r = 0; r < NREGIONS_TWO; ++r)
			
 
				-        positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-    for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-    {
			
 
				-        int x = POS_TO_X(pos);
			
 
				-        int y = POS_TO_Y(pos);
			
 
				-
			
 
				-        bool match = false;
			
 
				-
			
 
				-        for (int r = 0; r < NREGIONS_TWO; ++r)
			
 
				-            if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-        indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-void ZOH::decompresstwo(const char *block, Tile &t)
			
 
				-{
			
 
				-    Bits in(block, ZOH::BITSIZE);
			
 
				-
			
 
				-    Pattern p;
			
 
				-    IntEndpts endpts[NREGIONS_TWO];
			
 
				-    ComprEndpts compr_endpts[NREGIONS_TWO];
			
 
				-    int shapeindex;
			
 
				-
			
 
				-    if (!read_header(in, compr_endpts, shapeindex, p))
			
 
				-    {
			
 
				-        // reserved mode, return all zeroes
			
 
				-        for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-            for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-                t.data[y][x] = Vector3(0.0f);
			
 
				-
			
 
				-        return;
			
 
				-    }
			
 
				-
			
 
				-    decompress_endpts(compr_endpts, endpts, p);
			
 
				-
			
 
				-    Vector3 palette[NREGIONS_TWO][NINDICES];
			
 
				-    for (int r = 0; r < NREGIONS_TWO; ++r)
			
 
				-        generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
			
 
				-
			
 
				-    int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-    read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-    nvDebugCheck(in.getptr() == ZOH::BITSIZE);
			
 
				-
			
 
				-    // lookup
			
 
				-    for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-        t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
			
 
				-{
			
 
				-    Vector3 palette[NINDICES];
			
 
				-    float toterr = 0;
			
 
				-    Vector3 err;
			
 
				-
			
 
				-    generate_palette_quantized(endpts, prec, palette);
			
 
				-
			
 
				-    for (int i = 0; i < np; ++i)
			
 
				-    {
			
 
				-        float err, besterr;
			
 
				-
			
 
				-        besterr = Utils::norm(colors[i], palette[0]) * importance[i];
			
 
				-
			
 
				-        for (int j = 1; j < NINDICES && besterr > 0; ++j)
			
 
				-        {
			
 
				-            err = Utils::norm(colors[i], palette[j]) * importance[i];
			
 
				-
			
 
				-            if (err > besterr)	// error increased, so we're done searching
			
 
				-                break;
			
 
				-            if (err < besterr)
			
 
				-                besterr = err;
			
 
				-        }
			
 
				-        toterr += besterr;
			
 
				-    }
			
 
				-    return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec, 
			
 
				-                           int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO])
			
 
				-{
			
 
				-    // build list of possibles
			
 
				-    Vector3 palette[NREGIONS_TWO][NINDICES];
			
 
				-
			
 
				-    for (int region = 0; region < NREGIONS_TWO; ++region)
			
 
				-    {
			
 
				-        generate_palette_quantized(endpts[region], prec, &palette[region][0]);
			
 
				-        toterr[region] = 0;
			
 
				-    }
			
 
				-
			
 
				-    Vector3 err;
			
 
				-
			
 
				-    for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-        int region = REGION(x,y,shapeindex);
			
 
				-        float err, besterr;
			
 
				-
			
 
				-        besterr = Utils::norm(tile.data[y][x], palette[region][0]);
			
 
				-        indices[y][x] = 0;
			
 
				-
			
 
				-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
			
 
				-        {
			
 
				-            err = Utils::norm(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-            if (err > besterr)	// error increased, so we're done searching
			
 
				-                break;
			
 
				-            if (err < besterr)
			
 
				-            {
			
 
				-                besterr = err;
			
 
				-                indices[y][x] = i;
			
 
				-            }
			
 
				-        }
			
 
				-        toterr[region] += besterr;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
			
 
				-                          float old_err, int do_b)
			
 
				-{
			
 
				-    // we have the old endpoints: old_endpts
			
 
				-    // we have the perturbed endpoints: new_endpts
			
 
				-    // we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-    IntEndpts temp_endpts;
			
 
				-    float min_err = old_err;		// start with the best current error
			
 
				-    int beststep;
			
 
				-
			
 
				-    // copy real endpoints so we can perturb them
			
 
				-    for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
			
 
				-
			
 
				-    // do a logarithmic search for the best error for this endpoint (which)
			
 
				-    for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-    {
			
 
				-        bool improved = false;
			
 
				-        for (int sign = -1; sign <= 1; sign += 2)
			
 
				-        {
			
 
				-            if (do_b == 0)
			
 
				-            {
			
 
				-                temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-                if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-                    continue;
			
 
				-            }
			
 
				-            else
			
 
				-            {
			
 
				-                temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-                if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-                    continue;
			
 
				-            }
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, prec);
			
 
				-
			
 
				-            if (err < min_err)
			
 
				-            {
			
 
				-                improved = true;
			
 
				-                min_err = err;
			
 
				-                beststep = sign * step;
			
 
				-            }
			
 
				-        }
			
 
				-        // if this was an improvement, move the endpoint and continue search from there
			
 
				-        if (improved)
			
 
				-        {
			
 
				-            if (do_b == 0)
			
 
				-                new_endpts.A[ch] += beststep;
			
 
				-            else
			
 
				-                new_endpts.B[ch] += beststep;
			
 
				-        }
			
 
				-    }
			
 
				-    return min_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
			
 
				-{
			
 
				-    float opt_err = orig_err;
			
 
				-    for (int ch = 0; ch < NCHANNELS; ++ch)
			
 
				-    {
			
 
				-        opt_endpts.A[ch] = orig_endpts.A[ch];
			
 
				-        opt_endpts.B[ch] = orig_endpts.B[ch];
			
 
				-    }
			
 
				-    /*
			
 
				-        err0 = perturb(rgb0, delta0)
			
 
				-        err1 = perturb(rgb1, delta1)
			
 
				-        if (err0 < err1)
			
 
				-            if (err0 >= initial_error) break
			
 
				-            rgb0 += delta0
			
 
				-            next = 1
			
 
				-        else
			
 
				-            if (err1 >= initial_error) break
			
 
				-            rgb1 += delta1
			
 
				-            next = 0
			
 
				-        initial_err = map()
			
 
				-        for (;;)
			
 
				-            err = perturb(next ? rgb1:rgb0, delta)
			
 
				-            if (err >= initial_err) break
			
 
				-            next? rgb1 : rgb0 += delta
			
 
				-            initial_err = err
			
 
				-    */
			
 
				-    IntEndpts new_a, new_b;
			
 
				-    IntEndpts new_endpt;
			
 
				-    int do_b;
			
 
				-
			
 
				-    // now optimize each channel separately
			
 
				-    for (int ch = 0; ch < NCHANNELS; ++ch)
			
 
				-    {
			
 
				-        // figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-        // if we just alternate, we can easily end up in a local minima
			
 
				-        float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1);	// perturb endpt B
			
 
				-
			
 
				-        if (err0 < err1)
			
 
				-        {
			
 
				-            if (err0 >= opt_err)
			
 
				-                continue;
			
 
				-
			
 
				-            opt_endpts.A[ch] = new_a.A[ch];
			
 
				-            opt_err = err0;
			
 
				-            do_b = 1;		// do B next
			
 
				-        }
			
 
				-        else
			
 
				-        {
			
 
				-            if (err1 >= opt_err)
			
 
				-                continue;
			
 
				-            opt_endpts.B[ch] = new_b.B[ch];
			
 
				-            opt_err = err1;
			
 
				-            do_b = 0;		// do A next
			
 
				-        }
			
 
				-
			
 
				-        // now alternate endpoints and keep trying until there is no improvement
			
 
				-        for (;;)
			
 
				-        {
			
 
				-            float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
			
 
				-            if (err >= opt_err)
			
 
				-                break;
			
 
				-            if (do_b == 0)
			
 
				-                opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-            else
			
 
				-                opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-            opt_err = err;
			
 
				-            do_b = 1 - do_b;	// now move the other endpoint
			
 
				-        }
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO], 
			
 
				-                            const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO])
			
 
				-{
			
 
				-    Vector3 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-    float err = 0;
			
 
				-
			
 
				-    for (int region=0; region<NREGIONS_TWO; ++region)
			
 
				-    {
			
 
				-        // collect the pixels in the region
			
 
				-        int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++)
			
 
				-            for (int x = 0; x < tile.size_x; x++)
			
 
				-                if (REGION(x,y,shapeindex) == region)
			
 
				-                {
			
 
				-            pixels[np] = tile.data[y][x];
			
 
				-            importance[np] = tile.importance_map[y][x];
			
 
				-            ++np;
			
 
				-        }
			
 
				-
			
 
				-        optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-    for each pattern
			
 
				-        convert endpoints using pattern precision
			
 
				-        assign indices and get initial error
			
 
				-        compress indices (and possibly reorder endpoints)
			
 
				-        transform endpoints
			
 
				-        if transformed endpoints fit pattern
			
 
				-            get original endpoints back
			
 
				-            optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-            compress new indices
			
 
				-            transform new endpoints
			
 
				-            if new endpoints fit pattern AND if error is improved
			
 
				-                emit compressed block with new data
			
 
				-            else
			
 
				-                emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block)
			
 
				-{
			
 
				-    float orig_err[NREGIONS_TWO], opt_err[NREGIONS_TWO], orig_toterr, opt_toterr;
			
 
				-    IntEndpts orig_endpts[NREGIONS_TWO], opt_endpts[NREGIONS_TWO];
			
 
				-    ComprEndpts compr_orig[NREGIONS_TWO], compr_opt[NREGIONS_TWO];
			
 
				-    int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-    for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-    {
			
 
				-        // precisions for all channels need to be the same
			
 
				-        for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
			
 
				-
			
 
				-        quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
			
 
				-        assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
			
 
				-        swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-        compress_endpts(orig_endpts, compr_orig, patterns[sp]);
			
 
				-        if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
			
 
				-        {
			
 
				-            optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
			
 
				-            assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
			
 
				-            swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-            compress_endpts(opt_endpts, compr_opt, patterns[sp]);
			
 
				-            orig_toterr = opt_toterr = 0;
			
 
				-            for (int i=0; i < NREGIONS_TWO; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-            if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-            {
			
 
				-                emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-                return opt_toterr;
			
 
				-            }
			
 
				-            else
			
 
				-            {
			
 
				-                // either it stopped fitting when we optimized it, or there was no improvement
			
 
				-                // so go back to the unoptimized endpoints which we know will fit
			
 
				-                emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-                return orig_toterr;
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				-    nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
			
 
				-{
			
 
				-    for (int region = 0; region < NREGIONS_TWO; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-            palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_TWO])
			
 
				-{
			
 
				-    // build list of possibles
			
 
				-    Vector3 palette[NREGIONS_TWO][NINDICES];
			
 
				-
			
 
				-    generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-    float toterr = 0;
			
 
				-    Vector3 err;
			
 
				-
			
 
				-    for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-        int region = REGION(x,y,shapeindex);
			
 
				-        float err, besterr;
			
 
				-
			
 
				-        besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
			
 
				-
			
 
				-        for (int i = 1; i < NINDICES && besterr > 0; ++i)
			
 
				-        {
			
 
				-            err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
			
 
				-
			
 
				-            if (err > besterr)	// error increased, so we're done searching
			
 
				-                break;
			
 
				-            if (err < besterr)
			
 
				-                besterr = err;
			
 
				-        }
			
 
				-        toterr += besterr;
			
 
				-    }
			
 
				-    return toterr;
			
 
				-}
			
 
				-
			
 
				-float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO])
			
 
				-{
			
 
				-    for (int region=0; region<NREGIONS_TWO; ++region)
			
 
				-    {
			
 
				-        int np = 0;
			
 
				-        Vector3 colors[Tile::TILE_TOTAL];
			
 
				-        Vector3 mean(0,0,0);
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++)
			
 
				-            for (int x = 0; x < tile.size_x; x++)
			
 
				-                if (REGION(x,y,shapeindex) == region)
			
 
				-                {
			
 
				-            colors[np] = tile.data[y][x];
			
 
				-            mean += tile.data[y][x];
			
 
				-            ++np;
			
 
				-        }
			
 
				-
			
 
				-        // handle simple cases
			
 
				-        if (np == 0)
			
 
				-        {
			
 
				-            Vector3 zero(0,0,0);
			
 
				-            endpts[region].A = zero;
			
 
				-            endpts[region].B = zero;
			
 
				-            continue;
			
 
				-        }
			
 
				-        else if (np == 1)
			
 
				-        {
			
 
				-            endpts[region].A = colors[0];
			
 
				-            endpts[region].B = colors[0];
			
 
				-            continue;
			
 
				-        }
			
 
				-        else if (np == 2)
			
 
				-        {
			
 
				-            endpts[region].A = colors[0];
			
 
				-            endpts[region].B = colors[1];
			
 
				-            continue;
			
 
				-        }
			
 
				-
			
 
				-        mean /= float(np);
			
 
				-
			
 
				-        Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-        // project each pixel value along the principal direction
			
 
				-        float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-        for (int i = 0; i < np; i++)
			
 
				-        {
			
 
				-            float dp = dot(colors[i]-mean, direction);
			
 
				-            if (dp < minp) minp = dp;
			
 
				-            if (dp > maxp) maxp = dp;
			
 
				-        }
			
 
				-
			
 
				-        // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-        endpts[region].A = mean + minp*direction;
			
 
				-        endpts[region].B = mean + maxp*direction;
			
 
				-
			
 
				-        // clamp endpoints
			
 
				-        // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-        // shape based on endpoints being clamped
			
 
				-        Utils::clamp(endpts[region].A);
			
 
				-        Utils::clamp(endpts[region].B);
			
 
				-    }
			
 
				-
			
 
				-    return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-float ZOH::compresstwo(const Tile &t, char *block)
			
 
				-{
			
 
				-    int shapeindex_best = 0;
			
 
				-    FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO];
			
 
				-    float msebest = FLT_MAX;
			
 
				-
			
 
				-    /*
			
 
				-    collect the mse values that are within 5% of the best values
			
 
				-    optimize each one and choose the best
			
 
				-    */
			
 
				-    // hack for now -- just use the best value WORK
			
 
				-    for (int i=0; i<NSHAPES && msebest>0.0; ++i)
			
 
				-    {
			
 
				-        float mse = roughtwo(t, i, tempendpts);
			
 
				-        if (mse < msebest)
			
 
				-        {
			
 
				-            msebest = mse;
			
 
				-            shapeindex_best = i;
			
 
				-            memcpy(endptsbest, tempendpts, sizeof(endptsbest));
			
 
				-        }
			
 
				-
			
 
				-    }
			
 
				-    return refinetwo(t, shapeindex_best, endptsbest, block);
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl.cpp
+++ b/3rdparty/nvtt/bc7/avpcl.cpp
@@ -1,264 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// the avpcl compressor and decompressor
			
 
				-
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-// global flags
			
 
				-bool AVPCL::flag_premult = false;
			
 
				-bool AVPCL::flag_nonuniform = false;
			
 
				-bool AVPCL::flag_nonuniform_ati = false;
			
 
				-
			
 
				-// global mode
			
 
				-bool AVPCL::mode_rgb = false;		// true if image had constant alpha = 255
			
 
				-
			
 
				-void AVPCL::compress(const Tile &t, char *block)
			
 
				-{
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	float mse_mode0 = AVPCL::compress_mode0(t, tempblock);		if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode1 = AVPCL::compress_mode1(t, tempblock);		if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode2 = AVPCL::compress_mode2(t, tempblock);		if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode3 = AVPCL::compress_mode3(t, tempblock);		if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode4 = AVPCL::compress_mode4(t, tempblock);		if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode5 = AVPCL::compress_mode5(t, tempblock);		if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode6 = AVPCL::compress_mode6(t, tempblock);		if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-	float mse_mode7 = AVPCL::compress_mode7(t, tempblock);		if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
			
 
				-		
			
 
				-	/*if (errfile)
			
 
				-	{
			
 
				-		float errs[21];
			
 
				-		int nerrs = 8;
			
 
				-		errs[0] = mse_mode0; 
			
 
				-		errs[1] = mse_mode1; 
			
 
				-		errs[2] = mse_mode2; 
			
 
				-		errs[3] = mse_mode3; 
			
 
				-		errs[4] = mse_mode4; 
			
 
				-		errs[5] = mse_mode5; 
			
 
				-		errs[6] = mse_mode6; 
			
 
				-		errs[7] = mse_mode7;
			
 
				-		if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs)
			
 
				-			throw "Write error on error file";
			
 
				-	}*/
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-static int getbit(char *b, int start)
			
 
				-{
			
 
				-	if (start < 0 || start >= 128) return 0; // out of range
			
 
				-
			
 
				-	int ix = start >> 3;
			
 
				-	return (b[ix] & (1 << (start & 7))) != 0;
			
 
				-}
			
 
				-
			
 
				-static int getbits(char *b, int start, int len)
			
 
				-{
			
 
				-	int out = 0;
			
 
				-	for (int i=0; i<len; ++i)
			
 
				-		out |= getbit(b, start+i) << i;
			
 
				-	return out;
			
 
				-}
			
 
				-
			
 
				-static void setbit(char *b, int start, int bit)
			
 
				-{
			
 
				-	if (start < 0 || start >= 128) return; // out of range
			
 
				-
			
 
				-	int ix = start >> 3;
			
 
				-
			
 
				-	if (bit & 1)
			
 
				-		b[ix] |= (1 << (start & 7));
			
 
				-	else
			
 
				-		b[ix] &= ~(1 << (start & 7));
			
 
				-}
			
 
				-
			
 
				-static void setbits(char *b, int start, int len, int bits)
			
 
				-{
			
 
				-	for (int i=0; i<len; ++i)
			
 
				-		setbit(b, start+i, bits >> i);
			
 
				-}
			
 
				-*/
			
 
				-
			
 
				-void AVPCL::decompress(const char *cblock, Tile &t)
			
 
				-{
			
 
				-	char block[AVPCL::BLOCKSIZE];
			
 
				-	memcpy(block, cblock, AVPCL::BLOCKSIZE);
			
 
				-
			
 
				-	switch(getmode(block))
			
 
				-	{
			
 
				-	case 0:	AVPCL::decompress_mode0(block, t);	break;
			
 
				-	case 1:	AVPCL::decompress_mode1(block, t);	break;
			
 
				-	case 2:	AVPCL::decompress_mode2(block, t);	break;
			
 
				-	case 3:	AVPCL::decompress_mode3(block, t);	break;
			
 
				-	case 4:	AVPCL::decompress_mode4(block, t);	break;
			
 
				-	case 5:	AVPCL::decompress_mode5(block, t);	break;
			
 
				-	case 6:	AVPCL::decompress_mode6(block, t);	break;
			
 
				-	case 7:	AVPCL::decompress_mode7(block, t);	break;
			
 
				-	case 8: // return a black tile if you get a reserved mode
			
 
				-		for (int y=0; y<Tile::TILE_H; ++y)
			
 
				-			for (int x=0; x<Tile::TILE_W; ++x)
			
 
				-				t.data[y][x].set(0, 0, 0, 0);
			
 
				-		break;
			
 
				-	default: nvUnreachable();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-void AVPCL::compress(string inf, string avpclf, string errf)
			
 
				-{
			
 
				-	Array2D<RGBA> pixels;
			
 
				-	int w, h;
			
 
				-	char block[AVPCL::BLOCKSIZE];
			
 
				-
			
 
				-	Targa::read(inf, pixels, w, h);
			
 
				-	FILE *avpclfile = fopen(avpclf.c_str(), "wb");
			
 
				-	if (avpclfile == NULL) throw "Unable to open .avpcl file for write";
			
 
				-	FILE *errfile = NULL;
			
 
				-	if (errf != "")
			
 
				-	{
			
 
				-		errfile = fopen(errf.c_str(), "wb");
			
 
				-		if (errfile == NULL) throw "Unable to open error file for write";
			
 
				-	}
			
 
				-
			
 
				-	// Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set)
			
 
				-	if (AVPCL::flag_premult)
			
 
				-	{
			
 
				-		if (AVPCL::mode_rgb)
			
 
				-		{
			
 
				-			AVPCL::flag_premult = false;
			
 
				-			cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// stuff for progress bar O.o
			
 
				-	int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
			
 
				-	int tilecnt = 0;
			
 
				-	clock_t start, prev, cur;
			
 
				-
			
 
				-	start = prev = clock();
			
 
				-
			
 
				-	// convert to tiles and compress each tile
			
 
				-	for (int y=0; y<h; y+=Tile::TILE_H)
			
 
				-	{
			
 
				-		int ysize = min(Tile::TILE_H, h-y);
			
 
				-		for (int x=0; x<w; x+=Tile::TILE_W)
			
 
				-		{
			
 
				-			if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, float(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; }
			
 
				-
			
 
				-			int xsize = min(Tile::TILE_W, w-x);
			
 
				-			Tile t(xsize, ysize);
			
 
				-
			
 
				-			t.insert(pixels, x, y);
			
 
				-
			
 
				-			AVPCL::compress(t, block, errfile);
			
 
				-			if (fwrite(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
			
 
				-				throw "File error on write";
			
 
				-
			
 
				-			// progress bar
			
 
				-			++tilecnt;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	cur = clock();
			
 
				-	printf("\nTotal time to compress: %.2f seconds\n\n", float(cur-start)/CLOCKS_PER_SEC);		// advance to next line finally
			
 
				-
			
 
				-	if (fclose(avpclfile)) throw "Close failed on .avpcl file";
			
 
				-	if (errfile && fclose(errfile)) throw "Close failed on error file";
			
 
				-}
			
 
				-
			
 
				-static int str2int(std::string s) 
			
 
				-{
			
 
				-	int thing;
			
 
				-	std::stringstream str (stringstream::in | stringstream::out);
			
 
				-	str << s;
			
 
				-	str >> thing;
			
 
				-	return thing;
			
 
				-}
			
 
				-
			
 
				-// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height
			
 
				-static void extract(string avpclf, int &w, int &h, bool &mode_rgb)
			
 
				-{
			
 
				-	size_t n = avpclf.rfind('.', avpclf.length()-1);
			
 
				-	size_t n1 = avpclf.rfind('-', n-1);
			
 
				-	size_t n2 = avpclf.rfind('-', n1-1);
			
 
				-	size_t n3 = avpclf.rfind('-', n2-1);
			
 
				-	//	...-wwww-hhhh-RGB[A].avpcl
			
 
				-	//     ^    ^    ^      ^
			
 
				-	//     n3   n2   n1     n n3<n2<n1<n
			
 
				-	string width = avpclf.substr(n3+1, n2-n3-1);
			
 
				-	w = str2int(width);
			
 
				-	string height = avpclf.substr(n2+1, n1-n2-1);
			
 
				-	h = str2int(height);
			
 
				-	string mode = avpclf.substr(n1+1, n-n1-1);
			
 
				-	mode_rgb = mode == "RGB";
			
 
				-}
			
 
				-
			
 
				-static int modehist[8];
			
 
				-
			
 
				-static void stats(char block[AVPCL::BLOCKSIZE])
			
 
				-{
			
 
				-	int m = AVPCL::getmode(block);
			
 
				-	modehist[m]++;
			
 
				-}
			
 
				-
			
 
				-static void printstats()
			
 
				-{
			
 
				-	printf("\nMode histogram: "); for (int i=0; i<8; ++i) { printf("%d,", modehist[i]); }
			
 
				-	printf("\n");
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress(string avpclf, string outf)
			
 
				-{
			
 
				-	Array2D<RGBA> pixels;
			
 
				-	int w, h;
			
 
				-	char block[AVPCL::BLOCKSIZE];
			
 
				-
			
 
				-	extract(avpclf, w, h, AVPCL::mode_rgb);
			
 
				-	FILE *avpclfile = fopen(avpclf.c_str(), "rb");
			
 
				-	if (avpclfile == NULL) throw "Unable to open .avpcl file for read";
			
 
				-	pixels.resizeErase(h, w);
			
 
				-
			
 
				-	// convert to tiles and decompress each tile
			
 
				-	for (int y=0; y<h; y+=Tile::TILE_H)
			
 
				-	{
			
 
				-		int ysize = min(Tile::TILE_H, h-y);
			
 
				-		for (int x=0; x<w; x+=Tile::TILE_W)
			
 
				-		{
			
 
				-			int xsize = min(Tile::TILE_W, w-x);
			
 
				-			Tile t(xsize, ysize);
			
 
				-
			
 
				-			if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
			
 
				-				throw "File error on read";
			
 
				-
			
 
				-			stats(block);	// collect statistics
			
 
				-		
			
 
				-			AVPCL::decompress(block, t);
			
 
				-
			
 
				-			t.extract(pixels, x, y);
			
 
				-		}
			
 
				-	}
			
 
				-	if (fclose(avpclfile)) throw "Close failed on .avpcl file";
			
 
				-
			
 
				-	Targa::write(outf, pixels, w, h);
			
 
				-
			
 
				-	printstats();	// print statistics
			
 
				-}
			
 
				-*/
			
--- a/3rdparty/nvtt/bc7/avpcl.h
+++ b/3rdparty/nvtt/bc7/avpcl.h
@@ -1,99 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-#ifndef _AVPCL_H
			
 
				-#define _AVPCL_H
			
 
				-
			
 
				-#include "tile.h"
			
 
				-#include "bits.h"
			
 
				-
			
 
				-#define	DISABLE_EXHAUSTIVE	1	// define this if you don't want to spend a lot of time on exhaustive compression
			
 
				-#define	USE_ZOH_INTERP		1	// use zoh interpolator, otherwise use exact avpcl interpolators
			
 
				-#define	USE_ZOH_INTERP_ROUNDED 1	// use the rounded versions!
			
 
				-
			
 
				-namespace AVPCL {
			
 
				-
			
 
				-static const int NREGIONS_TWO	= 2;
			
 
				-static const int NREGIONS_THREE	= 3;
			
 
				-
			
 
				-static const int BLOCKSIZE=16;
			
 
				-static const int BITSIZE=128;
			
 
				-
			
 
				-// global flags
			
 
				-extern bool flag_premult;
			
 
				-extern bool flag_nonuniform;
			
 
				-extern bool flag_nonuniform_ati;
			
 
				-
			
 
				-// global mode
			
 
				-extern bool mode_rgb;		// true if image had constant alpha = 255
			
 
				-
			
 
				-void compress(const Tile &t, char *block);
			
 
				-void decompress(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode0(const Tile &t, char *block);
			
 
				-void decompress_mode0(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode1(const Tile &t, char *block);
			
 
				-void decompress_mode1(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode2(const Tile &t, char *block);
			
 
				-void decompress_mode2(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode3(const Tile &t, char *block);
			
 
				-void decompress_mode3(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode4(const Tile &t, char *block);
			
 
				-void decompress_mode4(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode5(const Tile &t, char *block);
			
 
				-void decompress_mode5(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode6(const Tile &t, char *block);
			
 
				-void decompress_mode6(const char *block, Tile &t);
			
 
				-
			
 
				-float compress_mode7(const Tile &t, char *block);
			
 
				-void decompress_mode7(const char *block, Tile &t);
			
 
				-
			
 
				-inline int getmode(Bits &in)
			
 
				-{
			
 
				-	int mode = 0;
			
 
				-
			
 
				-	if (in.read(1))			mode = 0;
			
 
				-	else if (in.read(1))	mode = 1;
			
 
				-	else if (in.read(1))	mode = 2;
			
 
				-	else if (in.read(1))	mode = 3;
			
 
				-	else if (in.read(1))	mode = 4;
			
 
				-	else if (in.read(1))	mode = 5;
			
 
				-	else if (in.read(1))	mode = 6;
			
 
				-	else if (in.read(1))	mode = 7;
			
 
				-	else mode = 8;	// reserved
			
 
				-	return mode;
			
 
				-}
			
 
				-inline int getmode(const char *block)
			
 
				-{
			
 
				-	int bits = block[0], mode = 0;
			
 
				-
			
 
				-	if (bits & 1) mode = 0;
			
 
				-	else if ((bits&3) == 2) mode = 1;
			
 
				-	else if ((bits&7) == 4) mode = 2;
			
 
				-	else if ((bits & 0xF) == 8) mode = 3;
			
 
				-	else if ((bits & 0x1F) == 16) mode = 4;
			
 
				-	else if ((bits & 0x3F) == 32) mode = 5;
			
 
				-	else if ((bits & 0x7F) == 64) mode = 6;
			
 
				-	else if ((bits & 0xFF) == 128) mode = 7;
			
 
				-	else mode = 8;	// reserved
			
 
				-	return mode;
			
 
				-}
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc7/avpcl_mode0.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode0.cpp
@@ -1,1066 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-//  x1		444.1x6 16p 45b (3bi)
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-#include "shapes_three.h"
			
 
				-
			
 
				-// use only the first 16 available shapes
			
 
				-#undef NSHAPES
			
 
				-#undef SHAPEBITS
			
 
				-#define NSHAPES 16
			
 
				-#define SHAPEBITS 4
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
			
 
				-
			
 
				-#define NINDICES	8
			
 
				-#define	INDEXBITS	3
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-#define	BIAS		(DENOM/2)
			
 
				-
			
 
				-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
			
 
				-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
			
 
				-// stop without having to touch all shapes?
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NBITSIZES	(NREGIONS*2)
			
 
				-#define	ABITINDEX(region)	(2*(region)+0)
			
 
				-#define	BBITINDEX(region)	(2*(region)+1)
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
			
 
				-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-    const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red			green			blue			xfm	mode  mb
			
 
				-	4,4,4,4,4,4,	4,4,4,4,4,4,	4,4,4,4,4,4,	0,	0x1, 1, "",	// really 444.1 x 6
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGB];
			
 
				-	int endpt_b_prec[NCHANNELS_RGB];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 
			
 
				-};
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-// endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits
			
 
				-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
			
 
				-{
			
 
				-	int onescnt;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		onescnt += endpts.A[j] & 1;
			
 
				-		compr_endpts.A[j] = endpts.A[j] >> 1;
			
 
				-		nvAssert (compr_endpts.A[j] < 16);
			
 
				-	}
			
 
				-	compr_endpts.a_lsb = onescnt >= 2;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		onescnt += endpts.B[j] & 1;
			
 
				-		compr_endpts.B[j] = endpts.B[j] >> 1;
			
 
				-		nvAssert (compr_endpts.B[j] < 16);
			
 
				-	}
			
 
				-	compr_endpts.b_lsb = onescnt >= 2;
			
 
				-}
			
 
				-
			
 
				-static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
			
 
				-{
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
			
 
				-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		uncompress_one(compr_endpts[i], endpts[i]);
			
 
				-}
			
 
				-
			
 
				-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		compress_one(endpts[i], compr_endpts[i]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
			
 
				-{
			
 
				-	IntEndptsRGB full_endpts[NREGIONS];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
			
 
				-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
			
 
				-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
			
 
				-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
			
 
				-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
			
 
				-		compress_one(full_endpts[region], q_endpts[region]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
			
 
				-static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
			
 
				-
			
 
				-		int x = POS_TO_X(position);
			
 
				-		int y = POS_TO_Y(position);
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-		if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=0; i<NCHANNELS_RGB; ++i) 
			
 
				-			{
			
 
				-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
			
 
				-			}
			
 
				-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(shapeindex, SHAPEBITS);
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		out.write(endpts[i].a_lsb, 1);
			
 
				-		out.write(endpts[i].b_lsb, 1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (out.getptr() == 83);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	shapeindex = in.read(SHAPEBITS);
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-	
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		endpts[i].a_lsb  = in.read(1);
			
 
				-		endpts[i].b_lsb  = in.read(1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (in.getptr() == 83);
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
			
 
				-{
			
 
				-	IntEndptsRGB endpts;
			
 
				-
			
 
				-	uncompress_one(endpts_2, endpts);
			
 
				-
			
 
				-	// scale endpoints
			
 
				-	int a, b;			// really need a IntVec4...
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	// constant alpha
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode0(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGB_2 endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, p, pat_index);
			
 
				-	
			
 
				-	if (p.transformed)
			
 
				-	{
			
 
				-		sign_extend(p, endpts);
			
 
				-		transform_inverse(endpts);
			
 
				-	}
			
 
				-
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
			
 
				-
			
 
				-	int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector4 palette[NINDICES];
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	generate_palette_quantized(endpts, region_prec, palette);
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
			
 
				-		{
			
 
				-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[i] = j;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-
			
 
				-		// check for early exit
			
 
				-		if (toterr > current_err)
			
 
				-		{
			
 
				-			// fill out bogus index values so it's initialized at least
			
 
				-			for (int k = i; k < np; ++k)
			
 
				-				indices[k] = -1;
			
 
				-
			
 
				-			return FLT_MAX;
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[y][x] = i;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr[region] += besterr;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, 
			
 
				-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGB_2 temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGB_2 temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err; 
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		// if we actually improved, update the indices
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[i] = good_indices[i];
			
 
				-	}
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGB_2 new_a, new_b;
			
 
				-	IntEndptsRGB_2 new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[Tile::TILE_TOTAL];
			
 
				-	int new_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
			
 
				-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices1[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (new_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[i] != new_indices[i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	// note this is independent of the above search, so we don't care about the indices from the above
			
 
				-	// we don't care about the above because if they differ, so what? we've already started at ch=0
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[i] = temp_indices0[i];
			
 
				-					nvAssert (orig_indices[i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[i] != temp_indices0[i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
			
 
				-							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGB_2 temp_in, temp_out;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
			
 
				-		{
			
 
				-			temp_in.a_lsb = lsbmode & 1;
			
 
				-			temp_in.b_lsb = (lsbmode >> 1) & 1;
			
 
				-
			
 
				-			// make sure we have a valid error for temp_in
			
 
				-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
			
 
				-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
			
 
				-			float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
			
 
				-
			
 
				-			// now try to optimize these endpoints
			
 
				-			float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-			// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-			if (temp_out_err < best_err)
			
 
				-			{
			
 
				-				best_err = temp_out_err;
			
 
				-				opt_err[region] = temp_out_err;
			
 
				-				opt_endpts[region] = temp_out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-		if (patterns[sp].transformed)
			
 
				-			transform_forward(orig_endpts);
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_inverse(orig_endpts);
			
 
				-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_forward(opt_endpts);
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transformed)
			
 
				-					transform_forward(orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-    nvAssert(false); // throw "No candidate found, should never happen (mode avpcl 0).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-				besterr = err;
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// for this mode, we assume alpha = 255 constant and compress only the RGB portion.
			
 
				-// however, we do the error check against the actual alpha values supplied for the tile.
			
 
				-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector3 colors[Tile::TILE_TOTAL];
			
 
				-		float alphas[2];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x].xyz();
			
 
				-				if (np < 2) alphas[np] = tile.data[y][x].w;
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[0], alphas[0]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[1], alphas[1]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean.xyz(), direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + minp*Vector4(direction, 0);
			
 
				-		endpts[region].B = mean + maxp*Vector4(direction, 0);
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-
			
 
				-	return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-static void swap(float *list1, int *list2, int i, int j)
			
 
				-{
			
 
				-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
			
 
				-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode0(const Tile &t, char *block)
			
 
				-{
			
 
				-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
			
 
				-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
			
 
				-	const int NITEMS=NSHAPES/4;
			
 
				-
			
 
				-	// pick the best NITEMS shapes and refine these.
			
 
				-	struct {
			
 
				-		FltEndpts endpts[NREGIONS];
			
 
				-	} all[NSHAPES];
			
 
				-	float roughmse[NSHAPES];
			
 
				-	int index[NSHAPES];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	for (int i=0; i<NSHAPES; ++i)
			
 
				-	{
			
 
				-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
			
 
				-		index[i] = i;
			
 
				-	}
			
 
				-
			
 
				-	// bubble sort -- only need to bubble up the first NITEMS items
			
 
				-	for (int i=0; i<NITEMS; ++i)
			
 
				-	for (int j=i+1; j<NSHAPES; ++j)
			
 
				-		if (roughmse[i] > roughmse[j])
			
 
				-			swap(roughmse, index, i, j);
			
 
				-
			
 
				-	for (int i=0; i<NITEMS && msebest>0; ++i)
			
 
				-	{
			
 
				-		int shape = index[i];
			
 
				-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
			
 
				-		if (mse < msebest)
			
 
				-		{
			
 
				-			memcpy(block, tempblock, sizeof(tempblock));
			
 
				-			msebest = mse;
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl_mode1.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode1.cpp
@@ -1,1047 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x10	(666x2).1 (666x2).1 64p 3bi
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-#include "shapes_two.h"
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-#define	NLSBMODES	2		// number of different lsb modes per region. since we have one .1 per region, that can have 2 values
			
 
				-
			
 
				-#define NINDICES	8
			
 
				-#define	INDEXBITS	3
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-#define	BIAS		(DENOM/2)
			
 
				-
			
 
				-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
			
 
				-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
			
 
				-// stop without having to touch all shapes?
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NBITSIZES	(NREGIONS*2)
			
 
				-#define	ABITINDEX(region)	(2*(region)+0)
			
 
				-#define	BBITINDEX(region)	(2*(region)+1)
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
			
 
				-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red		green		blue		xfm	mode  mb
			
 
				-	6,6,6,6,	6,6,6,6,	6,6,6,6,	0,	0x2, 2, "",
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGB];
			
 
				-	int endpt_b_prec[NCHANNELS_RGB];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	6,6,6, 6,6,6, 6,6,6, 6,6,6,	
			
 
				-};
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void transform_forward(IntEndptsRGB_1 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-// endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits
			
 
				-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpts)
			
 
				-{
			
 
				-	int onescnt;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		onescnt += endpts.A[j] & 1;
			
 
				-		compr_endpts.A[j] = endpts.A[j] >> 1;
			
 
				-		onescnt += endpts.B[j] & 1;
			
 
				-		compr_endpts.B[j] = endpts.B[j] >> 1;
			
 
				-		nvAssert (compr_endpts.A[j] < 64);
			
 
				-		nvAssert (compr_endpts.B[j] < 64);
			
 
				-	}
			
 
				-	compr_endpts.lsb = onescnt >= 3;
			
 
				-}
			
 
				-
			
 
				-static void uncompress_one(const IntEndptsRGB_1& compr_endpts, IntEndptsRGB& endpts)
			
 
				-{
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.lsb;
			
 
				-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.lsb;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void uncompress_endpoints(const IntEndptsRGB_1 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		uncompress_one(compr_endpts[i], endpts[i]);
			
 
				-}
			
 
				-
			
 
				-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_1 compr_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		compress_one(endpts[i], compr_endpts[i]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_1 q_endpts[NREGIONS])
			
 
				-{
			
 
				-	IntEndptsRGB full_endpts[NREGIONS];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
			
 
				-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
			
 
				-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
			
 
				-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
			
 
				-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
			
 
				-		compress_one(full_endpts[region], q_endpts[region]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
			
 
				-static void swap_indices(IntEndptsRGB_1 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
			
 
				-
			
 
				-		int x = POS_TO_X(position);
			
 
				-		int y = POS_TO_Y(position);
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-		if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGB_1 endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void write_header(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(shapeindex, SHAPEBITS);
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		out.write(endpts[i].lsb, 1);
			
 
				-
			
 
				-	nvAssert (out.getptr() == 82);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	shapeindex = in.read(SHAPEBITS);
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		endpts[i].lsb  = in.read(1);
			
 
				-	
			
 
				-	nvAssert (in.getptr() == 82);
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndptsRGB_1 &endpts_1, const RegionPrec &region_prec, Vector4 palette[NINDICES])
			
 
				-{
			
 
				-	IntEndptsRGB endpts;
			
 
				-
			
 
				-	uncompress_one(endpts_1, endpts);
			
 
				-
			
 
				-	// scale endpoints
			
 
				-	int a, b;			// really need a IntVec4...
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
			
 
				-
			
 
				-	// note: don't simplify to a + ((b-a)*i + BIAS)/DENOM as that doesn't work due to the way C handles integer division of negatives
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	// constant alpha
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-// sign extend but only if it was transformed
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGB_1 endpts[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode1(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGB_1 endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, p, pat_index);
			
 
				-	
			
 
				-	if (p.transformed)
			
 
				-	{
			
 
				-		sign_extend(p, endpts);
			
 
				-		transform_inverse(endpts);
			
 
				-	}
			
 
				-
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
			
 
				-
			
 
				-	int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector4 palette[NINDICES];
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	generate_palette_quantized(endpts, region_prec, palette);
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
			
 
				-		{
			
 
				-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[i] = j;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-
			
 
				-		// check for early exit
			
 
				-		if (toterr > current_err)
			
 
				-		{
			
 
				-			// fill out bogus index values so it's initialized at least
			
 
				-			for (int k = i; k < np; ++k)
			
 
				-				indices[k] = -1;
			
 
				-
			
 
				-			return FLT_MAX;
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[y][x] = i;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr[region] += besterr;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, 
			
 
				-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGB_1 temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGB_1 temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err; 
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		// if we actually improved, update the indices
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[i] = good_indices[i];
			
 
				-	}
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGB_1 new_a, new_b;
			
 
				-	IntEndptsRGB_1 new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[Tile::TILE_TOTAL];
			
 
				-	int new_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
			
 
				-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices1[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (new_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[i] != new_indices[i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	// note this is independent of the above search, so we don't care about the indices from the above
			
 
				-	// we don't care about the above because if they differ, so what? we've already started at ch=0
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-		float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[i] = temp_indices0[i];
			
 
				-					nvAssert (orig_indices[i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[i] != temp_indices0[i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
			
 
				-							IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGB_1 temp_in, temp_out;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
			
 
				-		{
			
 
				-			temp_in.lsb = lsbmode;
			
 
				-
			
 
				-			// make sure we have a valid error for temp_in
			
 
				-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
			
 
				-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
			
 
				-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
			
 
				-
			
 
				-			// now try to optimize these endpoints
			
 
				-			float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-			// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-			if (temp_out_err < best_err)
			
 
				-			{
			
 
				-				best_err = temp_out_err;
			
 
				-				opt_err[region] = temp_out_err;
			
 
				-				opt_endpts[region] = temp_out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGB_1 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-		if (patterns[sp].transformed)
			
 
				-			transform_forward(orig_endpts);
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_inverse(orig_endpts);
			
 
				-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_forward(opt_endpts);
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			//nvAssert(opt_toterr <= orig_toterr);
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transformed)
			
 
				-					transform_forward(orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 1).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-				besterr = err;
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector3 colors[Tile::TILE_TOTAL];
			
 
				-		float alphas[2];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x].xyz();
			
 
				-				if (np < 2) alphas[np] = tile.data[y][x].w;
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[0], alphas[0]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[1], alphas[1]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean.xyz(), direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + minp*Vector4(direction, 0);
			
 
				-		endpts[region].B = mean + maxp*Vector4(direction, 0);
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-
			
 
				-	return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-static void swap(float *list1, int *list2, int i, int j)
			
 
				-{
			
 
				-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
			
 
				-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode1(const Tile &t, char *block)
			
 
				-{
			
 
				-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
			
 
				-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
			
 
				-	const int NITEMS=NSHAPES/4;
			
 
				-
			
 
				-	// pick the best NITEMS shapes and refine these.
			
 
				-	struct {
			
 
				-		FltEndpts endpts[NREGIONS];
			
 
				-	} all[NSHAPES];
			
 
				-	float roughmse[NSHAPES];
			
 
				-	int index[NSHAPES];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	for (int i=0; i<NSHAPES; ++i)
			
 
				-	{
			
 
				-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
			
 
				-		index[i] = i;
			
 
				-	}
			
 
				-
			
 
				-	// bubble sort -- only need to bubble up the first NITEMS items
			
 
				-	for (int i=0; i<NITEMS; ++i)
			
 
				-	for (int j=i+1; j<NSHAPES; ++j)
			
 
				-		if (roughmse[i] > roughmse[j])
			
 
				-			swap(roughmse, index, i, j);
			
 
				-
			
 
				-	for (int i=0; i<NITEMS && msebest>0; ++i)
			
 
				-	{
			
 
				-		int shape = index[i];
			
 
				-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
			
 
				-		if (mse < msebest)
			
 
				-		{
			
 
				-			memcpy(block, tempblock, sizeof(tempblock));
			
 
				-			msebest = mse;
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl_mode2.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode2.cpp
@@ -1,1004 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x100 555x6 64p 2bi
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-#include "shapes_three.h"
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-#define NINDICES	4
			
 
				-#define	INDEXBITS	2
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-#define	BIAS		(DENOM/2)
			
 
				-
			
 
				-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
			
 
				-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
			
 
				-// stop without having to touch all shapes?
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NBITSIZES	6
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
			
 
				-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red			green			blue			xfm	mode  mb
			
 
				-	5,5,5,5,5,5,	5,5,5,5,5,5,	5,5,5,5,5,5,	0,	0x4, 3, "",
			
 
				-};
			
 
				-
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGB];
			
 
				-	int endpt_b_prec[NCHANNELS_RGB];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS_THREE];
			
 
				-};
			
 
				-
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 
			
 
				-};
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#define	R_0	ep[0].A[i]
			
 
				-#define	R_1 ep[0].B[i]
			
 
				-#define	R_2 ep[1].A[i]
			
 
				-#define	R_3	ep[1].B[i]
			
 
				-
			
 
				-static void transform_forward(IntEndptsRGB ep[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NCHANNELS_RGB; ++i)
			
 
				-	{
			
 
				-		R_1 -= R_3; R_2 -= R_3; R_0 -= R_3;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(IntEndptsRGB ep[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NCHANNELS_RGB; ++i)
			
 
				-	{
			
 
				-		R_0 += R_3; R_2 += R_3; R_1 += R_3;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, IntEndptsRGB q_endpts[NREGIONS_THREE])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS_THREE; ++region)
			
 
				-	{
			
 
				-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
			
 
				-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
			
 
				-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
			
 
				-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
			
 
				-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
			
 
				-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
			
 
				-static void swap_indices(IntEndptsRGB endpts[NREGIONS_THREE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS_THREE; ++region)
			
 
				-	{
			
 
				-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
			
 
				-
			
 
				-		int x = POS_TO_X(position);
			
 
				-		int y = POS_TO_Y(position);
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-		if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=0; i<NCHANNELS_RGB; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGB endpts[NREGIONS_THREE], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void write_header(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(shapeindex, SHAPEBITS);
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS_THREE; ++i)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]);
			
 
				-		}
			
 
				-	nvAssert (out.getptr() == 99);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	shapeindex = in.read(SHAPEBITS);
			
 
				-
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS_THREE; ++i)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]);
			
 
				-		}
			
 
				-	nvAssert (in.getptr() == 99);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-// WORK PLACEHOLDER -- keep it simple for now
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-	int positions[NREGIONS_THREE];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS_THREE; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS_THREE; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int positions[NREGIONS_THREE];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS_THREE; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS_THREE; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec &region_prec, Vector4 palette[NINDICES])
			
 
				-{
			
 
				-	// scale endpoints
			
 
				-	int a, b;			// really need a IntVec4...
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	// constant alpha
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-// sign extend but only if it was transformed
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE])
			
 
				-{
			
 
				-	nvAssert (p.transformed != 0);
			
 
				-
			
 
				-	for (int i=0; i<NCHANNELS_RGB; ++i)
			
 
				-	{
			
 
				-		// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
			
 
				-		endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
			
 
				-		endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
			
 
				-		endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
			
 
				-		endpts[2].A[i] = SIGN_EXTEND(endpts[2].A[i], p.chan[i].nbitsizes[4]);
			
 
				-		endpts[2].B[i] = SIGN_EXTEND(endpts[2].B[i], p.chan[i].nbitsizes[5]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode2(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGB endpts[NREGIONS_THREE];
			
 
				-	int shapeindex, pat_index;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, p, pat_index);
			
 
				-	
			
 
				-	if (p.transformed)
			
 
				-	{
			
 
				-		sign_extend(p, endpts);
			
 
				-		transform_inverse(endpts);
			
 
				-	}
			
 
				-
			
 
				-	Vector4 palette[NREGIONS_THREE][NINDICES];
			
 
				-	for (int r = 0; r < NREGIONS_THREE; ++r)
			
 
				-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
			
 
				-
			
 
				-	int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector4 palette[NINDICES];
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	generate_palette_quantized(endpts, region_prec, palette);
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
			
 
				-		{
			
 
				-			float err = Utils::metric4(colors[i], palette[j]) * importance[i];
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[i] = j;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-
			
 
				-		// check for early exit
			
 
				-		if (toterr > current_err)
			
 
				-		{
			
 
				-			// fill out bogus index values so it's initialized at least
			
 
				-			for (int k = i; k < np; ++k)
			
 
				-				indices[k] = -1;
			
 
				-
			
 
				-			return FLT_MAX;
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS_THREE][NINDICES];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS_THREE; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[y][x] = i;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr[region] += besterr;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, 
			
 
				-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGB temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-			float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGB temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err; 
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		// if we actually improved, update the indices
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[i] = good_indices[i];
			
 
				-	}
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGB new_a, new_b;
			
 
				-	IntEndptsRGB new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[Tile::TILE_TOTAL];
			
 
				-	int new_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
			
 
				-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices1[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (new_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[i] != new_indices[i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	// note this is independent of the above search, so we don't care about the indices from the above
			
 
				-	// we don't care about the above because if they differ, so what? we've already started at ch=0
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[i] = temp_indices0[i];
			
 
				-					nvAssert (orig_indices[i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[i] != temp_indices0[i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_THREE], 
			
 
				-							const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGB temp_in, temp_out;
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS_THREE; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		// make sure we have a valid error for temp_in
			
 
				-		// we didn't change temp_in, so orig_err[region] is still valid
			
 
				-		float temp_in_err = orig_err[region];
			
 
				-
			
 
				-		// now try to optimize these endpoints
			
 
				-		float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-		// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-		if (temp_out_err < best_err)
			
 
				-		{
			
 
				-			best_err = temp_out_err;
			
 
				-			opt_err[region] = temp_out_err;
			
 
				-			opt_endpts[region] = temp_out;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_THREE], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS_THREE], opt_err[NREGIONS_THREE], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGB orig_endpts[NREGIONS_THREE], opt_endpts[NREGIONS_THREE];
			
 
				-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-		if (patterns[sp].transformed)
			
 
				-			transform_forward(orig_endpts);
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_inverse(orig_endpts);
			
 
				-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_forward(opt_endpts);
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS_THREE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transformed)
			
 
				-					transform_forward(orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 2).";
			
 
				-	return FLT_MAX;
			
 
				-
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS_THREE; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS_THREE][NINDICES];
			
 
				-
			
 
				-	generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-				besterr = err;
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS_THREE; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector3 colors[Tile::TILE_TOTAL];
			
 
				-		float alphas[2];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x].xyz();
			
 
				-				if (np < 2) alphas[np] = tile.data[y][x].w;
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[0], alphas[0]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[1], alphas[1]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean.xyz(), direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + minp*Vector4(direction, 0);
			
 
				-		endpts[region].B = mean + maxp*Vector4(direction, 0);
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-
			
 
				-	return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-static void swap(float *list1, int *list2, int i, int j)
			
 
				-{
			
 
				-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
			
 
				-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode2(const Tile &t, char *block)
			
 
				-{
			
 
				-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
			
 
				-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
			
 
				-	const int NITEMS=NSHAPES/4;
			
 
				-
			
 
				-	// pick the best NITEMS shapes and refine these.
			
 
				-	struct {
			
 
				-		FltEndpts endpts[NREGIONS_THREE];
			
 
				-	} all[NSHAPES];
			
 
				-	float roughmse[NSHAPES];
			
 
				-	int index[NSHAPES];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	for (int i=0; i<NSHAPES; ++i)
			
 
				-	{
			
 
				-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
			
 
				-		index[i] = i;
			
 
				-	}
			
 
				-
			
 
				-	// bubble sort -- only need to bubble up the first NITEMS items
			
 
				-	for (int i=0; i<NITEMS; ++i)
			
 
				-	for (int j=i+1; j<NSHAPES; ++j)
			
 
				-		if (roughmse[i] > roughmse[j])
			
 
				-			swap(roughmse, index, i, j);
			
 
				-
			
 
				-	for (int i=0; i<NITEMS && msebest>0; ++i)
			
 
				-	{
			
 
				-		int shape = index[i];
			
 
				-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
			
 
				-		if (mse < msebest)
			
 
				-		{
			
 
				-			memcpy(block, tempblock, sizeof(tempblock));
			
 
				-			msebest = mse;
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl_mode3.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode3.cpp
@@ -1,1059 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x1000 777.1x4 64p 2bi (30b)
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-#include "shapes_two.h"
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
			
 
				-
			
 
				-#define NINDICES	4
			
 
				-#define	INDEXBITS	2
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-#define	BIAS		(DENOM/2)
			
 
				-
			
 
				-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
			
 
				-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
			
 
				-// stop without having to touch all shapes?
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NBITSIZES	(NREGIONS*2)
			
 
				-#define	ABITINDEX(region)	(2*(region)+0)
			
 
				-#define	BBITINDEX(region)	(2*(region)+1)
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
			
 
				-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-#define	NREGIONS  2
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red		green		blue		xfm	mode  mb
			
 
				-	7,7,7,7,	7,7,7,7,	7,7,7,7,	0,	0x8, 4, "",
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGB];
			
 
				-	int endpt_b_prec[NCHANNELS_RGB];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	7,7,7, 7,7,7, 7,7,7, 7,7,7,
			
 
				-};
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
			
 
				-static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
			
 
				-{
			
 
				-	int onescnt;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		onescnt += endpts.A[j] & 1;
			
 
				-		compr_endpts.A[j] = endpts.A[j] >> 1;
			
 
				-		nvAssert (compr_endpts.A[j] < 128);
			
 
				-	}
			
 
				-	compr_endpts.a_lsb = onescnt >= 2;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		onescnt += endpts.B[j] & 1;
			
 
				-		compr_endpts.B[j] = endpts.B[j] >> 1;
			
 
				-		nvAssert (compr_endpts.B[j] < 128);
			
 
				-	}
			
 
				-	compr_endpts.b_lsb = onescnt >= 2;
			
 
				-}
			
 
				-
			
 
				-static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
			
 
				-{
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-	{
			
 
				-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
			
 
				-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		uncompress_one(compr_endpts[i], endpts[i]);
			
 
				-}
			
 
				-
			
 
				-static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		compress_one(endpts[i], compr_endpts[i]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
			
 
				-{
			
 
				-	IntEndptsRGB full_endpts[NREGIONS];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
			
 
				-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
			
 
				-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
			
 
				-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
			
 
				-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
			
 
				-		compress_one(full_endpts[region], q_endpts[region]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
			
 
				-static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
			
 
				-
			
 
				-		int x = POS_TO_X(position);
			
 
				-		int y = POS_TO_Y(position);
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-		if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=0; i<NCHANNELS_RGB; ++i) 
			
 
				-			{
			
 
				-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
			
 
				-			}
			
 
				-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(shapeindex, SHAPEBITS);
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		out.write(endpts[i].a_lsb, 1);
			
 
				-		out.write(endpts[i].b_lsb, 1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (out.getptr() == 98);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	shapeindex = in.read(SHAPEBITS);
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGB; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-	
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		endpts[i].a_lsb  = in.read(1);
			
 
				-		endpts[i].b_lsb  = in.read(1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (in.getptr() == 98);
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
			
 
				-{
			
 
				-	IntEndptsRGB endpts;
			
 
				-
			
 
				-	uncompress_one(endpts_2, endpts);
			
 
				-
			
 
				-	// scale endpoints
			
 
				-	int a, b;			// really need a IntVec4...
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	// constant alpha
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode3(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGB_2 endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, p, pat_index);
			
 
				-	
			
 
				-	if (p.transformed)
			
 
				-	{
			
 
				-		sign_extend(p, endpts);
			
 
				-		transform_inverse(endpts);
			
 
				-	}
			
 
				-
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
			
 
				-
			
 
				-	int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector4 palette[NINDICES];
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	generate_palette_quantized(endpts, region_prec, palette);
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
			
 
				-		{
			
 
				-            float err = Utils::metric4(colors[i], palette[j]) * importance[i];
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[i] = j;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-
			
 
				-		// check for early exit
			
 
				-		if (toterr > current_err)
			
 
				-		{
			
 
				-			// fill out bogus index values so it's initialized at least
			
 
				-			for (int k = i; k < np; ++k)
			
 
				-				indices[k] = -1;
			
 
				-
			
 
				-			return FLT_MAX;
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[y][x] = i;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr[region] += besterr;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, 
			
 
				-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGB_2 temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGB_2 temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err; 
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		// if we actually improved, update the indices
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[i] = good_indices[i];
			
 
				-	}
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGB_2 new_a, new_b;
			
 
				-	IntEndptsRGB_2 new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[Tile::TILE_TOTAL];
			
 
				-	int new_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
			
 
				-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices1[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (new_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[i] != new_indices[i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	// note this is independent of the above search, so we don't care about the indices from the above
			
 
				-	// we don't care about the above because if they differ, so what? we've already started at ch=0
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[i] = temp_indices0[i];
			
 
				-					nvAssert (orig_indices[i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[i] != temp_indices0[i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
			
 
				-							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGB_2 temp_in, temp_out;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
			
 
				-		{
			
 
				-			temp_in.a_lsb = lsbmode & 1;
			
 
				-			temp_in.b_lsb = (lsbmode >> 1) & 1;
			
 
				-
			
 
				-			// make sure we have a valid error for temp_in
			
 
				-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
			
 
				-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
			
 
				-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
			
 
				-
			
 
				-			// now try to optimize these endpoints
			
 
				-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-			// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-			if (temp_out_err < best_err)
			
 
				-			{
			
 
				-				best_err = temp_out_err;
			
 
				-				opt_err[region] = temp_out_err;
			
 
				-				opt_endpts[region] = temp_out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-		if (patterns[sp].transformed)
			
 
				-			transform_forward(orig_endpts);
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_inverse(orig_endpts);
			
 
				-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_forward(opt_endpts);
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transformed)
			
 
				-					transform_forward(orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-				besterr = err;
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector3 colors[Tile::TILE_TOTAL];
			
 
				-		float alphas[2];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x].xyz();
			
 
				-				if (np < 2) alphas[np] = tile.data[y][x].w;
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[0], alphas[0]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[1], alphas[1]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean.xyz(), direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + minp*Vector4(direction, 0);
			
 
				-		endpts[region].B = mean + maxp*Vector4(direction, 0);
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-
			
 
				-	return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-static void swap(float *list1, int *list2, int i, int j)
			
 
				-{
			
 
				-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
			
 
				-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode3(const Tile &t, char *block)
			
 
				-{
			
 
				-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
			
 
				-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
			
 
				-	const int NITEMS=NSHAPES/4;
			
 
				-
			
 
				-	// pick the best NITEMS shapes and refine these.
			
 
				-	struct {
			
 
				-		FltEndpts endpts[NREGIONS];
			
 
				-	} all[NSHAPES];
			
 
				-	float roughmse[NSHAPES];
			
 
				-	int index[NSHAPES];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	for (int i=0; i<NSHAPES; ++i)
			
 
				-	{
			
 
				-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
			
 
				-		index[i] = i;
			
 
				-	}
			
 
				-
			
 
				-	// bubble sort -- only need to bubble up the first NITEMS items
			
 
				-	for (int i=0; i<NITEMS; ++i)
			
 
				-	for (int j=i+1; j<NSHAPES; ++j)
			
 
				-		if (roughmse[i] > roughmse[j])
			
 
				-			swap(roughmse, index, i, j);
			
 
				-
			
 
				-	for (int i=0; i<NITEMS && msebest>0; ++i)
			
 
				-	{
			
 
				-		int shape = index[i];
			
 
				-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
			
 
				-		if (mse < msebest)
			
 
				-		{
			
 
				-			memcpy(block, tempblock, sizeof(tempblock));
			
 
				-			msebest = mse;
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl_mode4.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode4.cpp
@@ -1,1214 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x10000 2r 1i 555x2 6x2 2bi 3bi
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
			
 
				-// array 0 is always the RGB array and array 1 is always the A array
			
 
				-#define	NINDEXARRAYS	2
			
 
				-#define	INDEXARRAY_RGB	0
			
 
				-#define INDEXARRAY_A	1
			
 
				-#define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
			
 
				-#define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
			
 
				-
			
 
				-#define NINDICES3	8
			
 
				-#define	INDEXBITS3	3
			
 
				-#define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
			
 
				-#define	DENOM3		(NINDICES3-1)
			
 
				-#define	BIAS3		(DENOM3/2)
			
 
				-
			
 
				-#define NINDICES2	4
			
 
				-#define	INDEXBITS2	2
			
 
				-#define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
			
 
				-#define	DENOM2		(NINDICES2-1)
			
 
				-#define	BIAS2		(DENOM2/2)
			
 
				-
			
 
				-#define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
			
 
				-#define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
			
 
				-#define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
			
 
				-#define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
			
 
				-#define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
			
 
				-
			
 
				-#define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
			
 
				-#define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
			
 
				-#define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
			
 
				-#define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
			
 
				-#define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
			
 
				-
			
 
				-#define	NSHAPES	1
			
 
				-
			
 
				-static int shapes[NSHAPES] =
			
 
				-{
			
 
				-	0x0000,
			
 
				-};
			
 
				-
			
 
				-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
			
 
				-
			
 
				-#define NREGIONS	1			// keep the region stuff in just in case...
			
 
				-
			
 
				-// encoded index compression location: region 0 is always at 0,0.
			
 
				-
			
 
				-#define	NBITSIZES	2			// one endpoint pair
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
			
 
				-	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	TRANSFORM_MODE_ALPHA	1
			
 
				-#define	TRANSFORM_MODE_RGB	2
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red		green		blue		alpha	xfm	mode  mb encoding
			
 
				-	5,5,		5,5,		5,5,		6,6,	0x0, 0x10, 5, "",
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGBA];
			
 
				-	int endpt_b_prec[NCHANNELS_RGBA];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	5,5,5,6,	5,5,5,6,
			
 
				-};
			
 
				-
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#define	R_0	ep[0].A[i]
			
 
				-#define	R_1 ep[0].B[i]
			
 
				-
			
 
				-static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (transform_mode & TRANSFORM_MODE_RGB)
			
 
				-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
			
 
				-			R_1 -= R_0;
			
 
				-	if (transform_mode & TRANSFORM_MODE_ALPHA)
			
 
				-	{
			
 
				-		i = CHANNEL_A;
			
 
				-		R_1 -= R_0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (transform_mode & TRANSFORM_MODE_RGB)
			
 
				-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
			
 
				-			R_1 += R_0;
			
 
				-	if (transform_mode & TRANSFORM_MODE_ALPHA)
			
 
				-	{
			
 
				-		i = CHANNEL_A;
			
 
				-		R_1 += R_0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
			
 
				-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
			
 
				-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
			
 
				-		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
			
 
				-
			
 
				-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
			
 
				-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
			
 
				-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
			
 
				-		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
			
 
				-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
			
 
				-static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int index_positions[NREGIONS];
			
 
				-
			
 
				-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int x = index_positions[region] & 3;
			
 
				-		int y = (index_positions[region] >> 2) & 3;
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-
			
 
				-		// swap RGB
			
 
				-		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
			
 
				-		}
			
 
				-
			
 
				-		// swap A
			
 
				-		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
			
 
				-{
			
 
				-	// ignore shapeindex
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(rotatemode, ROTATEMODE_BITS);
			
 
				-	out.write(indexmode, INDEXMODE_BITS);
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
			
 
				-		}
			
 
				-	nvAssert (out.getptr() == 50);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	shapeindex = 0;		// we don't have any
			
 
				-
			
 
				-	rotatemode = in.read(ROTATEMODE_BITS);
			
 
				-	indexmode = in.read(INDEXMODE_BITS);
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
			
 
				-		}
			
 
				-	nvAssert (in.getptr() == 50);
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
			
 
				-{
			
 
				-	// the indices we shorten is always index 0
			
 
				-
			
 
				-	// do the 2 bit indices first
			
 
				-	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
			
 
				-
			
 
				-	// then the 3 bit indices
			
 
				-	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	// the indices we shorten is always index 0
			
 
				-
			
 
				-	// do the 2 bit indices first
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
			
 
				-
			
 
				-	// then the 3 bit indices
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, indexmode, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
			
 
				-{
			
 
				-	// scale endpoints for RGB
			
 
				-	int a, b;
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
			
 
				-
			
 
				-	// interpolate R
			
 
				-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
			
 
				-		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
			
 
				-
			
 
				-	// interpolate G
			
 
				-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
			
 
				-		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
			
 
				-
			
 
				-	// interpolate B
			
 
				-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
			
 
				-		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); 
			
 
				-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
			
 
				-
			
 
				-	// interpolate A
			
 
				-	for (int i = 0; i < NINDICES_A(indexmode); ++i)
			
 
				-		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
			
 
				-
			
 
				-}
			
 
				-
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NCHANNELS_RGBA; ++i)
			
 
				-	{
			
 
				-		if (p.transform_mode)
			
 
				-		{
			
 
				-			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
			
 
				-			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
			
 
				-			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
			
 
				-			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
			
 
				-{
			
 
				-	out.size_x = in.size_x;
			
 
				-	out.size_y = in.size_y;
			
 
				-
			
 
				-	for (int y=0; y<in.size_y; ++y)
			
 
				-	for (int x=0; x<in.size_x; ++x)
			
 
				-	{
			
 
				-		float t;
			
 
				-		out.data[y][x] = in.data[y][x];
			
 
				-
			
 
				-		switch(rotatemode)
			
 
				-		{
			
 
				-		case ROTATEMODE_RGBA_RGBA: break;
			
 
				-		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
			
 
				-		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
			
 
				-		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
			
 
				-		default: nvUnreachable();
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode4(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGBA endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index, rotatemode, indexmode;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
			
 
				-	
			
 
				-	sign_extend(p, endpts);
			
 
				-
			
 
				-	if (p.transform_mode)
			
 
				-		transform_inverse(p.transform_mode, endpts);
			
 
				-
			
 
				-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
			
 
				-
			
 
				-	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indexmode, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	Tile temp(t.size_x, t.size_y);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
			
 
				-
			
 
				-	rotate_tile(temp, rotatemode, t);
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
			
 
				-// exceeds what we already have
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector3 palette_rgb[NINDICES3];	// could be nindices2
			
 
				-	float palette_a[NINDICES3];	// could be nindices2
			
 
				-	float toterr = 0;
			
 
				-
			
 
				-	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
			
 
				-
			
 
				-	Vector3 rgb;
			
 
				-	float a;
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float err, besterr;
			
 
				-		float palette_alpha = 0, tile_alpha = 0;
			
 
				-
			
 
				-		if(AVPCL::flag_premult)
			
 
				-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
			
 
				-
			
 
				-		rgb.x = (colors[i]).x;
			
 
				-		rgb.y = (colors[i]).y;
			
 
				-		rgb.z = (colors[i]).z;
			
 
				-		a = (colors[i]).w;
			
 
				-
			
 
				-		// compute the two indices separately
			
 
				-		// if we're doing premultiplied alpha, we need to choose first the index that
			
 
				-		// determines the alpha value, and then do the other index
			
 
				-
			
 
				-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
			
 
				-		{
			
 
				-			// do A index first as it has the alpha
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = Utils::metric1(a, palette_a[j], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					palette_alpha = palette_a[j];
			
 
				-					indices[INDEXARRAY_A][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-
			
 
				-			// do RGB index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
			
 
				-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_RGB][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr += besterr;
			
 
				-			if (toterr > current_besterr)
			
 
				-			{
			
 
				-				// fill out bogus index values so it's initialized at least
			
 
				-				for (int k = i; k < np; ++k)
			
 
				-				{
			
 
				-					indices[INDEXARRAY_RGB][k] = -1;
			
 
				-					indices[INDEXARRAY_A][k] = -1;
			
 
				-				}
			
 
				-				return FLT_MAX;
			
 
				-			}
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			// do RGB index
			
 
				-			besterr = FLT_MAX;
			
 
				-			int bestindex;
			
 
				-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
			
 
				-											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					bestindex = j;
			
 
				-					indices[INDEXARRAY_RGB][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
			
 
				-			toterr += besterr;
			
 
				-
			
 
				-			// do A index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
			
 
				-											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_A][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-			if (toterr > current_besterr)
			
 
				-			{
			
 
				-				// fill out bogus index values so it's initialized at least
			
 
				-				for (int k = i; k < np; ++k)
			
 
				-				{
			
 
				-					indices[INDEXARRAY_RGB][k] = -1;
			
 
				-					indices[INDEXARRAY_A][k] = -1;
			
 
				-				}
			
 
				-				return FLT_MAX;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector3 rgb;
			
 
				-	float a;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr;
			
 
				-		float palette_alpha = 0, tile_alpha = 0;
			
 
				-
			
 
				-		rgb.x = (tile.data[y][x]).x;
			
 
				-		rgb.y = (tile.data[y][x]).y;
			
 
				-		rgb.z = (tile.data[y][x]).z;
			
 
				-		a = (tile.data[y][x]).w;
			
 
				-
			
 
				-		if(AVPCL::flag_premult)
			
 
				-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
			
 
				-
			
 
				-		// compute the two indices separately
			
 
				-		// if we're doing premultiplied alpha, we need to choose first the index that
			
 
				-		// determines the alpha value, and then do the other index
			
 
				-
			
 
				-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
			
 
				-		{
			
 
				-			// do A index first as it has the alpha
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = Utils::metric1(a, palette_a[region][i], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_A][y][x] = i;
			
 
				-					palette_alpha = palette_a[region][i];
			
 
				-				}
			
 
				-			}
			
 
				-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-
			
 
				-			// do RGB index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
			
 
				-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_RGB][y][x] = i;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr[region] += besterr;
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			// do RGB index first as it has the alpha
			
 
				-			besterr = FLT_MAX;
			
 
				-			int bestindex;
			
 
				-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
			
 
				-											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_RGB][y][x] = i;
			
 
				-					bestindex = i;
			
 
				-				}
			
 
				-			}
			
 
				-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
			
 
				-			toterr[region] += besterr;
			
 
				-
			
 
				-			// do A index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
			
 
				-											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_A][y][x] = i;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, 
			
 
				-						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGBA temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[j][i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[j][i] = temp_indices[j][i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGBA temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[j][i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[j][i] = temp_indices[j][i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[j][i] = temp_indices[j][i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[j][i] = good_indices[j][i];
			
 
				-	}
			
 
				-
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGBA new_a, new_b;
			
 
				-	IntEndptsRGBA new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-		float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
			
 
				-				nvAssert (orig_indices[j][i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
			
 
				-				nvAssert (orig_indices[j][i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[j][i] = temp_indices0[j][i];
			
 
				-				nvAssert (orig_indices[j][i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[j][i] = temp_indices0[j][i];
			
 
				-					nvAssert (orig_indices[j][i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS], 
			
 
				-							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGBA temp_in, temp_out;
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		// make sure we have a valid error for temp_in
			
 
				-		// we didn't change temp_in, so orig_err[region] is still valid
			
 
				-		float temp_in_err = orig_err[region];
			
 
				-
			
 
				-		// now try to optimize these endpoints
			
 
				-        float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-		// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-		if (temp_out_err < best_err)
			
 
				-		{
			
 
				-			best_err = temp_out_err;
			
 
				-			opt_err[region] = temp_out_err;
			
 
				-			opt_endpts[region] = temp_out;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-
			
 
				-		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
			
 
				-
			
 
				-		if (patterns[sp].transform_mode)
			
 
				-			transform_forward(patterns[sp].transform_mode, orig_endpts);
			
 
				-
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transform_mode)
			
 
				-				transform_inverse(patterns[sp].transform_mode, orig_endpts);
			
 
				-
			
 
				-			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-
			
 
				-			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
			
 
				-
			
 
				-			if (patterns[sp].transform_mode)
			
 
				-				transform_forward(patterns[sp].transform_mode, opt_endpts);
			
 
				-
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transform_mode)
			
 
				-					transform_forward(patterns[sp].transform_mode, orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 4).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	if (v.w < 0.0f) v.w = 0.0f;
			
 
				-	if (v.w > 255.0f) v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-// compute initial endpoints for the "RGB" portion and the "A" portion. 
			
 
				-// Note these channels may have been rotated.
			
 
				-static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector3 colors[Tile::TILE_TOTAL];
			
 
				-		float alphas[Tile::TILE_TOTAL];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x].xyz();
			
 
				-				alphas[np] = tile.data[y][x].w;
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[0], alphas[0]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[1], alphas[1]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		float mina = FLT_MAX, maxa = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean.xyz(), direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-
			
 
				-			dp = alphas[i] - mean.w;
			
 
				-			if (dp < mina) mina = dp;
			
 
				-			if (dp > maxa) maxa = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + Vector4(minp*direction, mina);
			
 
				-		endpts[region].B = mean + Vector4(maxp*direction, maxa);
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode4(const Tile &t, char *block)
			
 
				-{
			
 
				-	FltEndpts endpts[NREGIONS];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-	int shape = 0;
			
 
				-	Tile t1;
			
 
				-
			
 
				-	// try all rotations. refine tries the 2 different indexings.
			
 
				-	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
			
 
				-	{
			
 
				-		rotate_tile(t, r, t1);
			
 
				-		rough(t1, shape, endpts);
			
 
				-		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
			
 
				-		{
			
 
				-			float mse = refine(t1, shape, r, i, endpts, tempblock);
			
 
				-			if (mse < msebest)
			
 
				-			{
			
 
				-				memcpy(block, tempblock, sizeof(tempblock));
			
 
				-				msebest = mse;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
--- a/3rdparty/nvtt/bc7/avpcl_mode5.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode5.cpp
@@ -1,1216 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x100000 2r 777x2 8x2 2bi 2bi
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
			
 
				-// array 0 is always the RGB array and array 1 is always the A array
			
 
				-#define	NINDEXARRAYS	2
			
 
				-#define	INDEXARRAY_RGB	0
			
 
				-#define INDEXARRAY_A	1
			
 
				-#define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
			
 
				-#define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
			
 
				-
			
 
				-#define NINDICES3	4
			
 
				-#define	INDEXBITS3	2
			
 
				-#define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
			
 
				-#define	DENOM3		(NINDICES3-1)
			
 
				-#define	BIAS3		(DENOM3/2)
			
 
				-
			
 
				-#define NINDICES2	4
			
 
				-#define	INDEXBITS2	2
			
 
				-#define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
			
 
				-#define	DENOM2		(NINDICES2-1)
			
 
				-#define	BIAS2		(DENOM2/2)
			
 
				-
			
 
				-#define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
			
 
				-#define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
			
 
				-#define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
			
 
				-#define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
			
 
				-#define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
			
 
				-
			
 
				-#define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
			
 
				-#define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
			
 
				-#define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
			
 
				-#define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
			
 
				-#define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
			
 
				-
			
 
				-#define	NSHAPES	1
			
 
				-
			
 
				-static int shapes[NSHAPES] =
			
 
				-{
			
 
				-	0x0000,
			
 
				-};
			
 
				-
			
 
				-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
			
 
				-
			
 
				-#define NREGIONS	1			// keep the region stuff in just in case...
			
 
				-
			
 
				-// encoded index compression location: region 0 is always at 0,0.
			
 
				-
			
 
				-#define	NBITSIZES	2			// one endpoint pair
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
			
 
				-	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	TRANSFORM_MODE_ALPHA	1
			
 
				-#define	TRANSFORM_MODE_RGB	2
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red		green		blue		alpha	xfm	mode  mb encoding
			
 
				-	7,7,		7,7,		7,7,		8,8,	0x0, 0x20, 6, "",
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGBA];
			
 
				-	int endpt_b_prec[NCHANNELS_RGBA];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	7,7,7,8,	7,7,7,8,
			
 
				-};
			
 
				-
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#define	R_0	ep[0].A[i]
			
 
				-#define	R_1 ep[0].B[i]
			
 
				-
			
 
				-static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (transform_mode & TRANSFORM_MODE_RGB)
			
 
				-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
			
 
				-			R_1 -= R_0;
			
 
				-	if (transform_mode & TRANSFORM_MODE_ALPHA)
			
 
				-	{
			
 
				-		i = CHANNEL_A;
			
 
				-		R_1 -= R_0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	if (transform_mode & TRANSFORM_MODE_RGB)
			
 
				-		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
			
 
				-			R_1 += R_0;
			
 
				-	if (transform_mode & TRANSFORM_MODE_ALPHA)
			
 
				-	{
			
 
				-		i = CHANNEL_A;
			
 
				-		R_1 += R_0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
			
 
				-		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
			
 
				-		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
			
 
				-		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
			
 
				-
			
 
				-		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
			
 
				-		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
			
 
				-		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
			
 
				-		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
			
 
				-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
			
 
				-static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int index_positions[NREGIONS];
			
 
				-
			
 
				-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int x = index_positions[region] & 3;
			
 
				-		int y = (index_positions[region] >> 2) & 3;
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-
			
 
				-		// swap RGB
			
 
				-		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
			
 
				-		}
			
 
				-
			
 
				-		// swap A
			
 
				-		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
			
 
				-{
			
 
				-	// ignore shapeindex
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(rotatemode, ROTATEMODE_BITS);
			
 
				-//	out.write(indexmode, INDEXMODE_BITS);
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
			
 
				-		}
			
 
				-	nvAssert (out.getptr() == 66);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	shapeindex = 0;		// we don't have any
			
 
				-
			
 
				-	rotatemode = in.read(ROTATEMODE_BITS);
			
 
				-
			
 
				-	indexmode = 0;		// we don't have any
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
			
 
				-		}
			
 
				-	nvAssert (in.getptr() == 66);
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
			
 
				-{
			
 
				-	// the indices we shorten is always index 0
			
 
				-
			
 
				-	// do the 2 bit indices first
			
 
				-	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
			
 
				-
			
 
				-	// then the 3 bit indices
			
 
				-	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	// the indices we shorten is always index 0
			
 
				-
			
 
				-	// do the 2 bit indices first
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
			
 
				-
			
 
				-	// then the 3 bit indices
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, indexmode, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
			
 
				-{
			
 
				-	// scale endpoints for RGB
			
 
				-	int a, b;
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); 
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
			
 
				-
			
 
				-	// interpolate R
			
 
				-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
			
 
				-		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
			
 
				-
			
 
				-	// interpolate G
			
 
				-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
			
 
				-		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
			
 
				-
			
 
				-	// interpolate B
			
 
				-	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
			
 
				-		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); 
			
 
				-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
			
 
				-
			
 
				-	// interpolate A
			
 
				-	for (int i = 0; i < NINDICES_A(indexmode); ++i)
			
 
				-		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
			
 
				-}
			
 
				-
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NCHANNELS_RGBA; ++i)
			
 
				-	{
			
 
				-		if (p.transform_mode)
			
 
				-		{
			
 
				-			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
			
 
				-			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
			
 
				-			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
			
 
				-			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
			
 
				-{
			
 
				-	out.size_x = in.size_x;
			
 
				-	out.size_y = in.size_y;
			
 
				-
			
 
				-	for (int y=0; y<in.size_y; ++y)
			
 
				-	for (int x=0; x<in.size_x; ++x)
			
 
				-	{
			
 
				-		float t;
			
 
				-		out.data[y][x] = in.data[y][x];
			
 
				-
			
 
				-		switch(rotatemode)
			
 
				-		{
			
 
				-		case ROTATEMODE_RGBA_RGBA: break;
			
 
				-		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
			
 
				-		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
			
 
				-		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
			
 
				-		default: nvUnreachable();
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode5(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGBA endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index, rotatemode, indexmode;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
			
 
				-	
			
 
				-	sign_extend(p, endpts);
			
 
				-
			
 
				-	if (p.transform_mode)
			
 
				-		transform_inverse(p.transform_mode, endpts);
			
 
				-
			
 
				-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
			
 
				-
			
 
				-	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indexmode, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	Tile temp(t.size_x, t.size_y);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
			
 
				-
			
 
				-	rotate_tile(temp, rotatemode, t);
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
			
 
				-// exceeds what we already have
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector3 palette_rgb[NINDICES3];	// could be nindices2
			
 
				-	float palette_a[NINDICES3];	// could be nindices2
			
 
				-	float toterr = 0;
			
 
				-
			
 
				-	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
			
 
				-
			
 
				-	Vector3 rgb;
			
 
				-	float a;
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float err, besterr;
			
 
				-		float palette_alpha = 0, tile_alpha = 0;
			
 
				-
			
 
				-		if(AVPCL::flag_premult)
			
 
				-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
			
 
				-
			
 
				-		rgb.x = (colors[i]).x;
			
 
				-		rgb.y = (colors[i]).y;
			
 
				-		rgb.z = (colors[i]).z;
			
 
				-		a = (colors[i]).w;
			
 
				-
			
 
				-		// compute the two indices separately
			
 
				-		// if we're doing premultiplied alpha, we need to choose first the index that
			
 
				-		// determines the alpha value, and then do the other index
			
 
				-
			
 
				-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
			
 
				-		{
			
 
				-			// do A index first as it has the alpha
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = Utils::metric1(a, palette_a[j], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					palette_alpha = palette_a[j];
			
 
				-					indices[INDEXARRAY_A][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-
			
 
				-			// do RGB index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
			
 
				-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_RGB][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr += besterr;
			
 
				-			if (toterr > current_besterr)
			
 
				-			{
			
 
				-				// fill out bogus index values so it's initialized at least
			
 
				-				for (int k = i; k < np; ++k)
			
 
				-				{
			
 
				-					indices[INDEXARRAY_RGB][k] = -1;
			
 
				-					indices[INDEXARRAY_A][k] = -1;
			
 
				-				}
			
 
				-				return FLT_MAX;
			
 
				-			}
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			// do RGB index
			
 
				-			besterr = FLT_MAX;
			
 
				-			int bestindex;
			
 
				-			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
			
 
				-											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					bestindex = j;
			
 
				-					indices[INDEXARRAY_RGB][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
			
 
				-			toterr += besterr;
			
 
				-
			
 
				-			// do A index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
			
 
				-											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_A][i] = j;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-			if (toterr > current_besterr)
			
 
				-			{
			
 
				-				// fill out bogus index values so it's initialized at least
			
 
				-				for (int k = i; k < np; ++k)
			
 
				-				{
			
 
				-					indices[INDEXARRAY_RGB][k] = -1;
			
 
				-					indices[INDEXARRAY_A][k] = -1;
			
 
				-				}
			
 
				-				return FLT_MAX;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector3 rgb;
			
 
				-	float a;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr;
			
 
				-		float palette_alpha = 0, tile_alpha = 0;
			
 
				-
			
 
				-		rgb.x = (tile.data[y][x]).x;
			
 
				-		rgb.y = (tile.data[y][x]).y;
			
 
				-		rgb.z = (tile.data[y][x]).z;
			
 
				-		a = (tile.data[y][x]).w;
			
 
				-
			
 
				-		if(AVPCL::flag_premult)
			
 
				-				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
			
 
				-							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
			
 
				-
			
 
				-		// compute the two indices separately
			
 
				-		// if we're doing premultiplied alpha, we need to choose first the index that
			
 
				-		// determines the alpha value, and then do the other index
			
 
				-
			
 
				-		if (rotatemode == ROTATEMODE_RGBA_RGBA)
			
 
				-		{
			
 
				-			// do A index first as it has the alpha
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = Utils::metric1(a, palette_a[region][i], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_A][y][x] = i;
			
 
				-					palette_alpha = palette_a[region][i];
			
 
				-				}
			
 
				-			}
			
 
				-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-
			
 
				-			// do RGB index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
			
 
				-											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_RGB][y][x] = i;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr[region] += besterr;
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			// do RGB index first as it has the alpha
			
 
				-			besterr = FLT_MAX;
			
 
				-			int bestindex;
			
 
				-			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
			
 
				-											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_RGB][y][x] = i;
			
 
				-					bestindex = i;
			
 
				-				}
			
 
				-			}
			
 
				-			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
			
 
				-							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
			
 
				-			toterr[region] += besterr;
			
 
				-
			
 
				-			// do A index
			
 
				-			besterr = FLT_MAX;
			
 
				-			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
			
 
				-			{
			
 
				-				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
			
 
				-											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
			
 
				-
			
 
				-				if (err > besterr)	// error increased, so we're done searching
			
 
				-					break;
			
 
				-				if (err < besterr)
			
 
				-				{
			
 
				-					besterr = err;
			
 
				-					indices[INDEXARRAY_A][y][x] = i;
			
 
				-				}
			
 
				-			}
			
 
				-			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
			
 
				-						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGBA temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[j][i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[j][i] = temp_indices[j][i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGBA temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[j][i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[j][i] = temp_indices[j][i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[j][i] = temp_indices[j][i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[j][i] = good_indices[j][i];
			
 
				-	}
			
 
				-
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGBA new_a, new_b;
			
 
				-	IntEndptsRGBA new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-        float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
			
 
				-				nvAssert (orig_indices[j][i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
			
 
				-				nvAssert (orig_indices[j][i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[j][i] = temp_indices0[j][i];
			
 
				-				nvAssert (orig_indices[j][i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int j=0; j<NINDEXARRAYS; ++j)
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[j][i] = temp_indices0[j][i];
			
 
				-					nvAssert (orig_indices[j][i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS], 
			
 
				-							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGBA temp_in, temp_out;
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		// make sure we have a valid error for temp_in
			
 
				-		// we didn't change temp_in, so orig_err[region] is still valid
			
 
				-		float temp_in_err = orig_err[region];
			
 
				-
			
 
				-		// now try to optimize these endpoints
			
 
				-        float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-		// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-		if (temp_out_err < best_err)
			
 
				-		{
			
 
				-			best_err = temp_out_err;
			
 
				-			opt_err[region] = temp_out_err;
			
 
				-			opt_endpts[region] = temp_out;
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-
			
 
				-		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
			
 
				-
			
 
				-		if (patterns[sp].transform_mode)
			
 
				-			transform_forward(patterns[sp].transform_mode, orig_endpts);
			
 
				-
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transform_mode)
			
 
				-				transform_inverse(patterns[sp].transform_mode, orig_endpts);
			
 
				-
			
 
				-			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-
			
 
				-			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
			
 
				-
			
 
				-			if (patterns[sp].transform_mode)
			
 
				-				transform_forward(patterns[sp].transform_mode, opt_endpts);
			
 
				-
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transform_mode)
			
 
				-					transform_forward(patterns[sp].transform_mode, orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	if (v.w < 0.0f) v.w = 0.0f;
			
 
				-	if (v.w > 255.0f) v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-// compute initial endpoints for the "RGB" portion and the "A" portion. 
			
 
				-// Note these channels may have been rotated.
			
 
				-static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector3 colors[Tile::TILE_TOTAL];
			
 
				-		float alphas[Tile::TILE_TOTAL];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x].xyz();
			
 
				-				alphas[np] = tile.data[y][x].w;
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[0], alphas[0]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = Vector4(colors[0], alphas[0]);
			
 
				-			endpts[region].B = Vector4(colors[1], alphas[1]);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		float mina = FLT_MAX, maxa = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean.xyz(), direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-
			
 
				-			dp = alphas[i] - mean.w;
			
 
				-			if (dp < mina) mina = dp;
			
 
				-			if (dp > maxa) maxa = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + Vector4(minp*direction, mina);
			
 
				-		endpts[region].B = mean + Vector4(maxp*direction, maxa);
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode5(const Tile &t, char *block)
			
 
				-{
			
 
				-	FltEndpts endpts[NREGIONS];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-	int shape = 0;
			
 
				-	Tile t1;
			
 
				-
			
 
				-	// try all rotations. refine tries the 2 different indexings.
			
 
				-	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
			
 
				-	{
			
 
				-		rotate_tile(t, r, t1);
			
 
				-		rough(t1, shape, endpts);
			
 
				-//		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
			
 
				-		for (int i = 0; i < 1 && msebest > 0; ++i)
			
 
				-		{
			
 
				-			float mse = refine(t1, shape, r, i, endpts, tempblock);
			
 
				-			if (mse < msebest)
			
 
				-			{
			
 
				-				memcpy(block, tempblock, sizeof(tempblock));
			
 
				-				msebest = mse;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
--- a/3rdparty/nvtt/bc7/avpcl_mode6.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode6.cpp
@@ -1,1055 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x1000000 7777.1x2 4bi
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
			
 
				-
			
 
				-#define NINDICES	16
			
 
				-#define	INDEXBITS	4
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-#define	BIAS		(DENOM/2)
			
 
				-
			
 
				-#define	NSHAPES	1
			
 
				-
			
 
				-static int shapes[NSHAPES] =
			
 
				-{
			
 
				-	0x0000,
			
 
				-};
			
 
				-
			
 
				-#define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
			
 
				-
			
 
				-#define	NREGIONS	1
			
 
				-
			
 
				-#define	NBITSIZES	(NREGIONS*2)
			
 
				-#define	ABITINDEX(region)	(2*(region)+0)
			
 
				-#define	BBITINDEX(region)	(2*(region)+1)
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red	green	blue	alpha	mode  mb verilog
			
 
				-	7,7,	7,7,	7,7,	7,7,	0x40, 7, "",
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGBA];
			
 
				-	int endpt_b_prec[NCHANNELS_RGBA];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	7,7,7,7,	7,7,7,7,
			
 
				-};
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-we're using this table to assign lsbs
			
 
				-abgr	>=2	correct
			
 
				-0000	0	0
			
 
				-0001	0	0
			
 
				-0010	0	0
			
 
				-0011	1	x1
			
 
				-0100	0	0
			
 
				-0101	1	x1
			
 
				-0110	1	x1
			
 
				-0111	1	1
			
 
				-1000	0	0
			
 
				-1001	1	x0
			
 
				-1010	1	x0
			
 
				-1011	1	1
			
 
				-1100	1	x0
			
 
				-1101	1	1
			
 
				-1110	1	1
			
 
				-1111	1	1
			
 
				-
			
 
				-we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8.
			
 
				-I choose to assign the lsbs so that the rgb channels are as good as possible.
			
 
				-*/
			
 
				-
			
 
				-// 8888 ->7777.1, use the "correct" column above to assign the lsb
			
 
				-static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts)
			
 
				-{
			
 
				-	int onescnt;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-	{
			
 
				-		// ignore the alpha channel in the count
			
 
				-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
			
 
				-		compr_endpts.A[j] = endpts.A[j] >> 1;
			
 
				-		nvAssert (compr_endpts.A[j] < 128);
			
 
				-	}
			
 
				-	compr_endpts.a_lsb = onescnt >= 2;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-	{
			
 
				-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
			
 
				-		compr_endpts.B[j] = endpts.B[j] >> 1;
			
 
				-		nvAssert (compr_endpts.B[j] < 128);
			
 
				-	}
			
 
				-	compr_endpts.b_lsb = onescnt >= 2;
			
 
				-}
			
 
				-
			
 
				-static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts)
			
 
				-{
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-	{
			
 
				-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
			
 
				-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void uncompress_endpoints(const IntEndptsRGBA_2 compr_endpts[NREGIONS], IntEndptsRGBA endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		uncompress_one(compr_endpts[i], endpts[i]);
			
 
				-}
			
 
				-
			
 
				-static void compress_endpoints(const IntEndptsRGBA endpts[NREGIONS], IntEndptsRGBA_2 compr_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		compress_one(endpts[i], compr_endpts[i]);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA_2 q_endpts[NREGIONS])
			
 
				-{
			
 
				-	IntEndptsRGBA full_endpts[NREGIONS];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
			
 
				-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
			
 
				-		full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
			
 
				-
			
 
				-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
			
 
				-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
			
 
				-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
			
 
				-		full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
			
 
				-
			
 
				-		compress_one(full_endpts[region], q_endpts[region]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
			
 
				-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
			
 
				-static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-	int index_positions[NREGIONS];
			
 
				-
			
 
				-	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int x = index_positions[region] & 3;
			
 
				-		int y = (index_positions[region] >> 2) & 3;
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-		if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=0; i<NCHANNELS_RGBA; ++i) 
			
 
				-			{
			
 
				-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
			
 
				-			}
			
 
				-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGBA_2 endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		out.write(endpts[i].a_lsb, 1);
			
 
				-		out.write(endpts[i].b_lsb, 1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (out.getptr() == 65);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	shapeindex = 0;		// we don't have any
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-	
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		endpts[i].a_lsb  = in.read(1);
			
 
				-		endpts[i].b_lsb  = in.read(1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (in.getptr() == 65);
			
 
				-}
			
 
				-
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-	nvAssert ((indices[0][0] & HIGH_INDEXBIT) == 0);
			
 
				-
			
 
				-	// the index we shorten is always index 0
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-	{
			
 
				-		if (i==0)
			
 
				-			out.write(indices[i>>2][i&3], INDEXBITS-1);	// write i..[2:0]
			
 
				-		else
			
 
				-			out.write(indices[i>>2][i&3], INDEXBITS);	// write i..[3:0]
			
 
				-	}
			
 
				-
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	// the index we shorten is always index 0
			
 
				-	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
			
 
				-	{
			
 
				-		if (i==0)
			
 
				-			indices[i>>2][i&3] = in.read(INDEXBITS-1);	// read i..[1:0]
			
 
				-		else
			
 
				-			indices[i>>2][i&3] = in.read(INDEXBITS);	// read i..[2:0]
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
			
 
				-{
			
 
				-	IntEndptsRGBA endpts;
			
 
				-
			
 
				-	uncompress_one(endpts_2, endpts);
			
 
				-
			
 
				-	// scale endpoints
			
 
				-	int a, b;			// really need a IntVec4...
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space 
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode6(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGBA_2 endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, p, pat_index);
			
 
				-	
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
			
 
				-
			
 
				-	int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector4 palette[NINDICES];
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	generate_palette_quantized(endpts, region_prec, palette);
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
			
 
				-		{
			
 
				-			err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) :
			
 
				-									     Utils::metric4premult(colors[i], palette[j]) ;
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[i] = j;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-
			
 
				-		// check for early exit
			
 
				-		if (toterr > current_err)
			
 
				-		{
			
 
				-			// fill out bogus index values so it's initialized at least
			
 
				-			for (int k = i; k < np; ++k)
			
 
				-				indices[k] = -1;
			
 
				-
			
 
				-			return FLT_MAX;
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) :
			
 
				-										 Utils::metric4premult(tile.data[y][x], palette[region][i]) ;
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[y][x] = i;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr[region] += besterr;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
			
 
				-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGBA_2 temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGBA_2 temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err; 
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		// if we actually improved, update the indices
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[i] = good_indices[i];
			
 
				-	}
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGBA_2 new_a, new_b;
			
 
				-	IntEndptsRGBA_2 new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[Tile::TILE_TOTAL];
			
 
				-	int new_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
			
 
				-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices1[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (new_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[i] != new_indices[i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	// note this is independent of the above search, so we don't care about the indices from the above
			
 
				-	// we don't care about the above because if they differ, so what? we've already started at ch=0
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[i] = temp_indices0[i];
			
 
				-					nvAssert (orig_indices[i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[i] != temp_indices0[i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
			
 
				-							IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGBA_2 temp_in, temp_out;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		// try all lsb modes as we search for better endpoints
			
 
				-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
			
 
				-		{
			
 
				-			temp_in.a_lsb = lsbmode & 1;
			
 
				-			temp_in.b_lsb = (lsbmode >> 1) & 1;
			
 
				-
			
 
				-			// make sure we have a valid error for temp_in
			
 
				-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
			
 
				-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
			
 
				-            float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
			
 
				-
			
 
				-			// now try to optimize these endpoints
			
 
				-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-			// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-			if (temp_out_err < best_err)
			
 
				-			{
			
 
				-				best_err = temp_out_err;
			
 
				-				opt_err[region] = temp_out_err;
			
 
				-				opt_endpts[region] = temp_out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-
			
 
				-     simplify the above given that there is no transform now and that endpoints will always fit
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-
			
 
				-		optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-
			
 
				-		assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-		// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-		//for (int i=0; i<NREGIONS; ++i)
			
 
				-		//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-		swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-
			
 
				-		orig_toterr = opt_toterr = 0;
			
 
				-		for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-		//nvAssert(opt_toterr <= orig_toterr);
			
 
				-
			
 
				-		if (opt_toterr < orig_toterr)
			
 
				-		{
			
 
				-			emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-			return opt_toterr;
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-			return orig_toterr;
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 6).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	if (v.w < 0.0f) v.w = 0.0f;
			
 
				-	if (v.w > 255.0f) v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr;
			
 
				-
			
 
				-		besterr = Utils::metric4(tile.data[y][x], palette[region][0]);
			
 
				-
			
 
				-		for (int i = 1; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-				besterr = err;
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector4 colors[Tile::TILE_TOTAL];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x];
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = colors[0];
			
 
				-			endpts[region].B = colors[0];
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = colors[0];
			
 
				-			endpts[region].B = colors[1];
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean, direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + minp*direction;
			
 
				-		endpts[region].B = mean + maxp*direction;
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-
			
 
				-	return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-static void swap(float *list1, int *list2, int i, int j)
			
 
				-{
			
 
				-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
			
 
				-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode6(const Tile &t, char *block)
			
 
				-{
			
 
				-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
			
 
				-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
			
 
				-	const int NITEMS=1;
			
 
				-
			
 
				-	// pick the best NITEMS shapes and refine these.
			
 
				-	struct {
			
 
				-		FltEndpts endpts[NREGIONS];
			
 
				-	} all[NSHAPES];
			
 
				-	float roughmse[NSHAPES];
			
 
				-	int index[NSHAPES];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	for (int i=0; i<NSHAPES; ++i)
			
 
				-	{
			
 
				-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
			
 
				-		index[i] = i;
			
 
				-	}
			
 
				-
			
 
				-	// bubble sort -- only need to bubble up the first NITEMS items
			
 
				-	for (int i=0; i<NITEMS; ++i)
			
 
				-	for (int j=i+1; j<NSHAPES; ++j)
			
 
				-		if (roughmse[i] > roughmse[j])
			
 
				-			swap(roughmse, index, i, j);
			
 
				-
			
 
				-	for (int i=0; i<NITEMS && msebest>0; ++i)
			
 
				-	{
			
 
				-		int shape = index[i];
			
 
				-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
			
 
				-		if (mse < msebest)
			
 
				-		{
			
 
				-			memcpy(block, tempblock, sizeof(tempblock));
			
 
				-			msebest = mse;
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl_mode7.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_mode7.cpp
@@ -1,1094 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Thanks to Jacob Munkberg ([email protected]) for the shortcut of using SVD to do the equivalent of principal components analysis
			
 
				-
			
 
				-// x10000000 5555.1x4 64p 2bi (30b)
			
 
				-
			
 
				-#include "bits.h"
			
 
				-#include "tile.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvcore/debug.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include "nvmath/matrix.inl"
			
 
				-#include "nvmath/fitting.h"
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "endpts.h"
			
 
				-#include <string.h>
			
 
				-#include <float.h>
			
 
				-
			
 
				-#include "shapes_two.h"
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-#define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
			
 
				-
			
 
				-#define NINDICES	4
			
 
				-#define	INDEXBITS	2
			
 
				-#define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
			
 
				-#define	DENOM		(NINDICES-1)
			
 
				-#define	BIAS		(DENOM/2)
			
 
				-
			
 
				-// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
			
 
				-// i.e. can we search shapes in a particular order so we can see the global error minima easily and
			
 
				-// stop without having to touch all shapes?
			
 
				-
			
 
				-#define	POS_TO_X(pos)	((pos)&3)
			
 
				-#define	POS_TO_Y(pos)	(((pos)>>2)&3)
			
 
				-
			
 
				-#define	NBITSIZES	(NREGIONS*2)
			
 
				-#define	ABITINDEX(region)	(2*(region)+0)
			
 
				-#define	BBITINDEX(region)	(2*(region)+1)
			
 
				-
			
 
				-struct ChanBits
			
 
				-{
			
 
				-	int nbitsizes[NBITSIZES];	// bitsizes for one channel
			
 
				-};
			
 
				-
			
 
				-struct Pattern
			
 
				-{
			
 
				-	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
			
 
				-	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
			
 
				-	int mode;				// associated mode value
			
 
				-	int modebits;			// number of mode bits
			
 
				-	const char *encoding;			// verilog description of encoding for this mode
			
 
				-};
			
 
				-
			
 
				-#define	NPATTERNS 1
			
 
				-#define	NREGIONS  2
			
 
				-
			
 
				-static Pattern patterns[NPATTERNS] =
			
 
				-{
			
 
				-	// red		green		blue		alpha		xfm	mode  mb
			
 
				-	5,5,5,5,	5,5,5,5,	5,5,5,5,	5,5,5,5,	0,	0x80, 8, "",
			
 
				-};
			
 
				-
			
 
				-struct RegionPrec
			
 
				-{
			
 
				-	int	endpt_a_prec[NCHANNELS_RGBA];
			
 
				-	int endpt_b_prec[NCHANNELS_RGBA];
			
 
				-};
			
 
				-
			
 
				-struct PatternPrec
			
 
				-{
			
 
				-	RegionPrec region_precs[NREGIONS];
			
 
				-};
			
 
				-
			
 
				-
			
 
				-// this is the precision for each channel and region
			
 
				-// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
			
 
				-static PatternPrec pattern_precs[NPATTERNS] =
			
 
				-{
			
 
				-	5,5,5,5,  5,5,5,5,  5,5,5,5,  5,5,5,5,
			
 
				-};
			
 
				-
			
 
				-// return # of bits needed to store n. handle signed or unsigned cases properly
			
 
				-static int nbits(int n, bool issigned)
			
 
				-{
			
 
				-	int nb;
			
 
				-	if (n==0)
			
 
				-		return 0;	// no bits needed for 0, signed or not
			
 
				-	else if (n > 0)
			
 
				-	{
			
 
				-		for (nb=0; n; ++nb, n>>=1) ;
			
 
				-		return nb + (issigned?1:0);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		nvAssert (issigned);
			
 
				-		for (nb=0; n<-1; ++nb, n>>=1) ;
			
 
				-		return nb + 1;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-we're using this table to assign lsbs
			
 
				-abgr	>=2	correct
			
 
				-0000	0	0
			
 
				-0001	0	0
			
 
				-0010	0	0
			
 
				-0011	1	x1
			
 
				-0100	0	0
			
 
				-0101	1	x1
			
 
				-0110	1	x1
			
 
				-0111	1	1
			
 
				-1000	0	0
			
 
				-1001	1	x0
			
 
				-1010	1	x0
			
 
				-1011	1	1
			
 
				-1100	1	x0
			
 
				-1101	1	1
			
 
				-1110	1	1
			
 
				-1111	1	1
			
 
				-
			
 
				-we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8.
			
 
				-I choose to assign the lsbs so that the rgb channels are as good as possible.
			
 
				-*/
			
 
				-
			
 
				-// 6666 ->5555.1, use the "correct" column above to assign the lsb
			
 
				-static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts)
			
 
				-{
			
 
				-	int onescnt;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-	{
			
 
				-		// ignore the alpha channel in the count
			
 
				-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
			
 
				-		compr_endpts.A[j] = endpts.A[j] >> 1;
			
 
				-		nvAssert (compr_endpts.A[j] < 32);
			
 
				-	}
			
 
				-	compr_endpts.a_lsb = onescnt >= 2;
			
 
				-
			
 
				-	onescnt = 0;
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-	{
			
 
				-		onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
			
 
				-		compr_endpts.B[j] = endpts.B[j] >> 1;
			
 
				-		nvAssert (compr_endpts.B[j] < 32);
			
 
				-	}
			
 
				-	compr_endpts.b_lsb = onescnt >= 2;
			
 
				-}
			
 
				-
			
 
				-static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts)
			
 
				-{
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-	{
			
 
				-		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
			
 
				-		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
			
 
				-	}
			
 
				-}
			
 
				-static void uncompress_endpoints(const IntEndptsRGBA_2 compr_endpts[NREGIONS], IntEndptsRGBA endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		uncompress_one(compr_endpts[i], endpts[i]);
			
 
				-}
			
 
				-
			
 
				-static void compress_endpoints(const IntEndptsRGBA endpts[NREGIONS], IntEndptsRGBA_2 compr_endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-		compress_one(endpts[i], compr_endpts[i]);
			
 
				-}
			
 
				-
			
 
				-static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA_2 q_endpts[NREGIONS])
			
 
				-{
			
 
				-	IntEndptsRGBA full_endpts[NREGIONS];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
			
 
				-		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
			
 
				-		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
			
 
				-		full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
			
 
				-
			
 
				-		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
			
 
				-		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
			
 
				-		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
			
 
				-		full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
			
 
				-
			
 
				-		compress_one(full_endpts[region], q_endpts[region]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
			
 
				-// index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
			
 
				-static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
			
 
				-
			
 
				-		int x = POS_TO_X(position);
			
 
				-		int y = POS_TO_Y(position);
			
 
				-		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
			
 
				-		if (indices[y][x] & HIGH_INDEXBIT)
			
 
				-		{
			
 
				-			// high bit is set, swap the endpts and indices for this region
			
 
				-			int t;
			
 
				-			for (int i=0; i<NCHANNELS_RGBA; ++i) 
			
 
				-			{
			
 
				-				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
			
 
				-			}
			
 
				-			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
			
 
				-
			
 
				-			for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-			for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-				if (REGION(x,y,shapeindex) == region)
			
 
				-					indices[y][x] = NINDICES - 1 - indices[y][x];
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static bool endpts_fit(IntEndptsRGBA_2 endpts[NREGIONS], const Pattern &p)
			
 
				-{
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				-static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
			
 
				-{
			
 
				-	out.write(p.mode, p.modebits);
			
 
				-	out.write(shapeindex, SHAPEBITS);
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		out.write(endpts[i].a_lsb, 1);
			
 
				-		out.write(endpts[i].b_lsb, 1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (out.getptr() == 98);
			
 
				-}
			
 
				-
			
 
				-static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
			
 
				-{
			
 
				-	int mode = AVPCL::getmode(in);
			
 
				-
			
 
				-	pat_index = 0;
			
 
				-	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
			
 
				-	nvAssert (in.getptr() == patterns[pat_index].modebits);
			
 
				-
			
 
				-	shapeindex = in.read(SHAPEBITS);
			
 
				-	p = patterns[pat_index];
			
 
				-
			
 
				-	for (int j=0; j<NCHANNELS_RGBA; ++j)
			
 
				-		for (int i=0; i<NREGIONS; ++i)
			
 
				-		{
			
 
				-			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
			
 
				-			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
			
 
				-		}
			
 
				-	
			
 
				-	for (int i=0; i<NREGIONS; ++i)
			
 
				-	{
			
 
				-		endpts[i].a_lsb  = in.read(1);
			
 
				-		endpts[i].b_lsb  = in.read(1);
			
 
				-	}
			
 
				-
			
 
				-	nvAssert (in.getptr() == 98);
			
 
				-}
			
 
				-
			
 
				-// WORK PLACEHOLDER -- keep it simple for now
			
 
				-static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
			
 
				-{
			
 
				-	int positions[NREGIONS];
			
 
				-
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
			
 
				-
			
 
				-	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
			
 
				-	{
			
 
				-		int x = POS_TO_X(pos);
			
 
				-		int y = POS_TO_Y(pos);
			
 
				-
			
 
				-		bool match = false;
			
 
				-
			
 
				-		for (int r = 0; r < NREGIONS; ++r)
			
 
				-			if (positions[r] == pos) { match = true; break; }
			
 
				-
			
 
				-		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
			
 
				-{
			
 
				-	Bits out(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	write_header(endpts, shapeindex, p, out);
			
 
				-
			
 
				-	write_indices(indices, shapeindex, out);
			
 
				-
			
 
				-	nvAssert(out.getptr() == AVPCL::BITSIZE);
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
			
 
				-{
			
 
				-	IntEndptsRGBA endpts;
			
 
				-
			
 
				-	uncompress_one(endpts_2, endpts);
			
 
				-
			
 
				-	// scale endpoints
			
 
				-	int a, b;			// really need a IntVec4...
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space 
			
 
				-	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-
			
 
				-	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); 
			
 
				-	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
			
 
				-
			
 
				-	// interpolate
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
			
 
				-}
			
 
				-
			
 
				-// sign extend but only if it was transformed
			
 
				-static void sign_extend(Pattern &p, IntEndptsRGBA_2 endpts[NREGIONS])
			
 
				-{
			
 
				-	nvUnreachable();
			
 
				-}
			
 
				-
			
 
				-void AVPCL::decompress_mode7(const char *block, Tile &t)
			
 
				-{
			
 
				-	Bits in(block, AVPCL::BITSIZE);
			
 
				-
			
 
				-	Pattern p;
			
 
				-	IntEndptsRGBA_2 endpts[NREGIONS];
			
 
				-	int shapeindex, pat_index;
			
 
				-
			
 
				-	read_header(in, endpts, shapeindex, p, pat_index);
			
 
				-	
			
 
				-	if (p.transformed)
			
 
				-	{
			
 
				-		sign_extend(p, endpts);
			
 
				-		transform_inverse(endpts);
			
 
				-	}
			
 
				-
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-	for (int r = 0; r < NREGIONS; ++r)
			
 
				-		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
			
 
				-
			
 
				-	int indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	read_indices(in, shapeindex, indices);
			
 
				-
			
 
				-	nvAssert(in.getptr() == AVPCL::BITSIZE);
			
 
				-
			
 
				-	// lookup
			
 
				-	for (int y = 0; y < Tile::TILE_H; y++)
			
 
				-	for (int x = 0; x < Tile::TILE_W; x++)
			
 
				-		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
			
 
				-}
			
 
				-
			
 
				-// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
			
 
				-static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	Vector4 palette[NINDICES];
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	generate_palette_quantized(endpts, region_prec, palette);
			
 
				-
			
 
				-	for (int i = 0; i < np; ++i)
			
 
				-	{
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int j = 0; j < NINDICES && besterr > 0; ++j)
			
 
				-		{
			
 
				-			err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) :
			
 
				-									     Utils::metric4premult(colors[i], palette[j]) ;
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[i] = j;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-
			
 
				-		// check for early exit
			
 
				-		if (toterr > current_err)
			
 
				-		{
			
 
				-			// fill out bogus index values so it's initialized at least
			
 
				-			for (int k = i; k < np; ++k)
			
 
				-				indices[k] = -1;
			
 
				-
			
 
				-			return FLT_MAX;
			
 
				-		}
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
			
 
				-static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, 
			
 
				-						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	{
			
 
				-		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
			
 
				-		toterr[region] = 0;
			
 
				-	}
			
 
				-
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) :
			
 
				-										 Utils::metric4premult(tile.data[y][x], palette[region][i]) ;
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-			{
			
 
				-				besterr = err;
			
 
				-				indices[y][x] = i;
			
 
				-			}
			
 
				-		}
			
 
				-		toterr[region] += besterr;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
			
 
				-// this function returns either old_err or a value smaller (if it was successful in improving the error)
			
 
				-static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
			
 
				-						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	// we have the old endpoints: old_endpts
			
 
				-	// we have the perturbed endpoints: new_endpts
			
 
				-	// we have the temporary endpoints: temp_endpts
			
 
				-
			
 
				-	IntEndptsRGBA_2 temp_endpts;
			
 
				-	float min_err = old_err;		// start with the best current error
			
 
				-	int beststep;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	// copy real endpoints so we can perturb them
			
 
				-	temp_endpts = new_endpts = old_endpts;
			
 
				-
			
 
				-	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
			
 
				-
			
 
				-	// do a logarithmic search for the best error for this endpoint (which)
			
 
				-	for (int step = 1 << (prec-1); step; step >>= 1)
			
 
				-	{
			
 
				-		bool improved = false;
			
 
				-		for (int sign = -1; sign <= 1; sign += 2)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-			{
			
 
				-				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
			
 
				-				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
			
 
				-				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
			
 
				-					continue;
			
 
				-			}
			
 
				-
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
			
 
				-
			
 
				-			if (err < min_err)
			
 
				-			{
			
 
				-				improved = true;
			
 
				-				min_err = err;
			
 
				-				beststep = sign * step;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-		// if this was an improvement, move the endpoint and continue search from there
			
 
				-		if (improved)
			
 
				-		{
			
 
				-			if (do_b == 0)
			
 
				-				new_endpts.A[ch] += beststep;
			
 
				-			else
			
 
				-				new_endpts.B[ch] += beststep;
			
 
				-		}
			
 
				-	}
			
 
				-	return min_err;
			
 
				-}
			
 
				-
			
 
				-// the larger the error the more time it is worth spending on an exhaustive search.
			
 
				-// perturb the endpoints at least -3 to 3.
			
 
				-// if err > 5000 perturb endpoints 50% of precision
			
 
				-// if err > 1000 25%
			
 
				-// if err > 200 12.5%
			
 
				-// if err > 40  6.25%
			
 
				-// for np = 16 -- adjust error thresholds as a function of np
			
 
				-// always ensure endpoint ordering is preserved (no need to overlap the scan)
			
 
				-// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
			
 
				-static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
			
 
				-{
			
 
				-	IntEndptsRGBA_2 temp_endpts;
			
 
				-	float best_err = orig_err;
			
 
				-	int aprec = region_prec.endpt_a_prec[ch];
			
 
				-	int bprec = region_prec.endpt_b_prec[ch];
			
 
				-	int good_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int i=0; i<np; ++i)
			
 
				-		indices[i] = -1;
			
 
				-
			
 
				-	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
			
 
				-
			
 
				-	if (orig_err == 0) return orig_err;
			
 
				-
			
 
				-	int adelta = 0, bdelta = 0;
			
 
				-	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
			
 
				-	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
			
 
				-	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
			
 
				-	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
			
 
				-	adelta = max(adelta, 3);
			
 
				-	bdelta = max(bdelta, 3);
			
 
				-
			
 
				-#ifdef	DISABLE_EXHAUSTIVE
			
 
				-	adelta = bdelta = 3;
			
 
				-#endif
			
 
				-
			
 
				-	temp_endpts = opt_endpts;
			
 
				-
			
 
				-	// ok figure out the range of A and B
			
 
				-	int alow = max(0, opt_endpts.A[ch] - adelta);
			
 
				-	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
			
 
				-	int blow = max(0, opt_endpts.B[ch] - bdelta);
			
 
				-	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
			
 
				-
			
 
				-	// now there's no need to swap the ordering of A and B
			
 
				-	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
			
 
				-
			
 
				-	int amin, bmin;
			
 
				-
			
 
				-	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
			
 
				-	{
			
 
				-		// keep a <= b
			
 
				-		for (int a = alow; a <= ahigh; ++a)
			
 
				-		for (int b = max(a, blow); b < bhigh; ++b)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err;
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// keep b <= a
			
 
				-		for (int b = blow; b < bhigh; ++b)
			
 
				-		for (int a = max(b, alow); a <= ahigh; ++a)
			
 
				-		{
			
 
				-			temp_endpts.A[ch] = a;
			
 
				-			temp_endpts.B[ch] = b;
			
 
				-		
			
 
				-            float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
			
 
				-			if (err < best_err) 
			
 
				-			{ 
			
 
				-				amin = a; 
			
 
				-				bmin = b; 
			
 
				-				best_err = err; 
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-					good_indices[i] = temp_indices[i];
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	if (best_err < orig_err)
			
 
				-	{
			
 
				-		opt_endpts.A[ch] = amin;
			
 
				-		opt_endpts.B[ch] = bmin;
			
 
				-		orig_err = best_err;
			
 
				-		// if we actually improved, update the indices
			
 
				-		for (int i=0; i<np; ++i)
			
 
				-			indices[i] = good_indices[i];
			
 
				-	}
			
 
				-	return best_err;
			
 
				-}
			
 
				-
			
 
				-static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
			
 
				-{
			
 
				-	float opt_err = orig_err;
			
 
				-
			
 
				-	opt_endpts = orig_endpts;
			
 
				-
			
 
				-	/*
			
 
				-		err0 = perturb(rgb0, delta0)
			
 
				-		err1 = perturb(rgb1, delta1)
			
 
				-		if (err0 < err1)
			
 
				-			if (err0 >= initial_error) break
			
 
				-			rgb0 += delta0
			
 
				-			next = 1
			
 
				-		else
			
 
				-			if (err1 >= initial_error) break
			
 
				-			rgb1 += delta1
			
 
				-			next = 0
			
 
				-		initial_err = map()
			
 
				-		for (;;)
			
 
				-			err = perturb(next ? rgb1:rgb0, delta)
			
 
				-			if (err >= initial_err) break
			
 
				-			next? rgb1 : rgb0 += delta
			
 
				-			initial_err = err
			
 
				-	*/
			
 
				-	IntEndptsRGBA_2 new_a, new_b;
			
 
				-	IntEndptsRGBA_2 new_endpt;
			
 
				-	int do_b;
			
 
				-	int orig_indices[Tile::TILE_TOTAL];
			
 
				-	int new_indices[Tile::TILE_TOTAL];
			
 
				-	int temp_indices0[Tile::TILE_TOTAL];
			
 
				-	int temp_indices1[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	// now optimize each channel separately
			
 
				-	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
			
 
				-	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-		// figure out which endpoint when perturbed gives the most improvement and start there
			
 
				-		// if we just alternate, we can easily end up in a local minima
			
 
				-        float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
			
 
				-        float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
			
 
				-
			
 
				-		if (err0 < err1)
			
 
				-		{
			
 
				-			if (err0 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.A[ch] = new_a.A[ch];
			
 
				-			opt_err = err0;
			
 
				-			do_b = 1;		// do B next
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			if (err1 >= opt_err)
			
 
				-				continue;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = orig_indices[i] = temp_indices1[i];
			
 
				-				nvAssert (orig_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			opt_endpts.B[ch] = new_b.B[ch];
			
 
				-			opt_err = err1;
			
 
				-			do_b = 0;		// do A next
			
 
				-		}
			
 
				-		
			
 
				-		// now alternate endpoints and keep trying until there is no improvement
			
 
				-		for (;;)
			
 
				-		{
			
 
				-            float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
			
 
				-			if (err >= opt_err)
			
 
				-				break;
			
 
				-
			
 
				-			for (int i=0; i<np; ++i)
			
 
				-			{
			
 
				-				new_indices[i] = temp_indices0[i];
			
 
				-				nvAssert (new_indices[i] != -1);
			
 
				-			}
			
 
				-
			
 
				-			if (do_b == 0)
			
 
				-				opt_endpts.A[ch] = new_endpt.A[ch];
			
 
				-			else
			
 
				-				opt_endpts.B[ch] = new_endpt.B[ch];
			
 
				-			opt_err = err;
			
 
				-			do_b = 1 - do_b;	// now move the other endpoint
			
 
				-		}
			
 
				-
			
 
				-		// see if the indices have changed
			
 
				-		int i;
			
 
				-		for (i=0; i<np; ++i)
			
 
				-			if (orig_indices[i] != new_indices[i])
			
 
				-				break;
			
 
				-
			
 
				-		if (i<np)
			
 
				-			ch = -1;	// start over
			
 
				-	}
			
 
				-
			
 
				-	// finally, do a small exhaustive search around what we think is the global minima to be sure
			
 
				-	// note this is independent of the above search, so we don't care about the indices from the above
			
 
				-	// we don't care about the above because if they differ, so what? we've already started at ch=0
			
 
				-	bool first = true;
			
 
				-	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
			
 
				-	{
			
 
				-        float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
			
 
				-
			
 
				-		if (new_err < opt_err)
			
 
				-		{
			
 
				-			opt_err = new_err;
			
 
				-
			
 
				-			if (first)
			
 
				-			{
			
 
				-				for (int i=0; i<np; ++i)
			
 
				-				{
			
 
				-					orig_indices[i] = temp_indices0[i];
			
 
				-					nvAssert (orig_indices[i] != -1);
			
 
				-				}
			
 
				-				first = false;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// see if the indices have changed
			
 
				-				int i;
			
 
				-				for (i=0; i<np; ++i)
			
 
				-					if (orig_indices[i] != temp_indices0[i])
			
 
				-						break;
			
 
				-
			
 
				-				if (i<np)
			
 
				-				{
			
 
				-					ch = -1;	// start over
			
 
				-					first = true;
			
 
				-				}
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return opt_err;
			
 
				-}
			
 
				-
			
 
				-static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS], 
			
 
				-							IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
			
 
				-{
			
 
				-	Vector4 pixels[Tile::TILE_TOTAL];
			
 
				-    float importance[Tile::TILE_TOTAL];
			
 
				-	IntEndptsRGBA_2 temp_in, temp_out;
			
 
				-	int temp_indices[Tile::TILE_TOTAL];
			
 
				-
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		// collect the pixels in the region
			
 
				-		int np = 0;
			
 
				-
			
 
				-        for (int y = 0; y < tile.size_y; y++) {
			
 
				-            for (int x = 0; x < tile.size_x; x++) {
			
 
				-                if (REGION(x, y, shapeindex) == region) {
			
 
				-                    pixels[np] = tile.data[y][x];
			
 
				-                    importance[np] = tile.importance_map[y][x];
			
 
				-                    np++;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-		opt_endpts[region] = temp_in = orig_endpts[region];
			
 
				-		opt_err[region] = orig_err[region];
			
 
				-
			
 
				-		float best_err = orig_err[region];
			
 
				-
			
 
				-		// try all lsb modes as we search for better endpoints
			
 
				-		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
			
 
				-		{
			
 
				-			temp_in.a_lsb = lsbmode & 1;
			
 
				-			temp_in.b_lsb = (lsbmode >> 1) & 1;
			
 
				-
			
 
				-			// make sure we have a valid error for temp_in
			
 
				-			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
			
 
				-			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
			
 
				-			float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
			
 
				-
			
 
				-			// now try to optimize these endpoints
			
 
				-            float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
			
 
				-
			
 
				-			// if we find an improvement, update the best so far and correct the output endpoints and errors
			
 
				-			if (temp_out_err < best_err)
			
 
				-			{
			
 
				-				best_err = temp_out_err;
			
 
				-				opt_err[region] = temp_out_err;
			
 
				-				opt_endpts[region] = temp_out;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/* optimization algorithm
			
 
				-	for each pattern
			
 
				-		convert endpoints using pattern precision
			
 
				-		assign indices and get initial error
			
 
				-		compress indices (and possibly reorder endpoints)
			
 
				-		transform endpoints
			
 
				-		if transformed endpoints fit pattern
			
 
				-			get original endpoints back
			
 
				-			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
			
 
				-			compress new indices
			
 
				-			transform new endpoints
			
 
				-			if new endpoints fit pattern AND if error is improved
			
 
				-				emit compressed block with new data
			
 
				-			else
			
 
				-				emit compressed block with original data // to try to preserve maximum endpoint precision
			
 
				-*/
			
 
				-
			
 
				-static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
			
 
				-{
			
 
				-	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
			
 
				-	IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
			
 
				-	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
			
 
				-
			
 
				-	for (int sp = 0; sp < NPATTERNS; ++sp)
			
 
				-	{
			
 
				-		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
			
 
				-		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
			
 
				-		swap_indices(orig_endpts, orig_indices, shapeindex_best);
			
 
				-		if (patterns[sp].transformed)
			
 
				-			transform_forward(orig_endpts);
			
 
				-		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
			
 
				-		// the assumption made is that if they don't fit now, they won't fit after optimizing.
			
 
				-		if (endpts_fit(orig_endpts, patterns[sp]))
			
 
				-		{
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_inverse(orig_endpts);
			
 
				-			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
			
 
				-			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
			
 
				-			// (nreed) Commented out asserts because they go off all the time...not sure why
			
 
				-			//for (int i=0; i<NREGIONS; ++i)
			
 
				-			//	nvAssert(expected_opt_err[i] == opt_err[i]);
			
 
				-			swap_indices(opt_endpts, opt_indices, shapeindex_best);
			
 
				-			if (patterns[sp].transformed)
			
 
				-				transform_forward(opt_endpts);
			
 
				-			orig_toterr = opt_toterr = 0;
			
 
				-			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
			
 
				-			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
			
 
				-			{
			
 
				-				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
			
 
				-				return opt_toterr;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				// either it stopped fitting when we optimized it, or there was no improvement
			
 
				-				// so go back to the unoptimized endpoints which we know will fit
			
 
				-				if (patterns[sp].transformed)
			
 
				-					transform_forward(orig_endpts);
			
 
				-				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
			
 
				-				return orig_toterr;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 7).";
			
 
				-	return FLT_MAX;
			
 
				-}
			
 
				-
			
 
				-static void clamp(Vector4 &v)
			
 
				-{
			
 
				-	if (v.x < 0.0f) v.x = 0.0f;
			
 
				-	if (v.x > 255.0f) v.x = 255.0f;
			
 
				-	if (v.y < 0.0f) v.y = 0.0f;
			
 
				-	if (v.y > 255.0f) v.y = 255.0f;
			
 
				-	if (v.z < 0.0f) v.z = 0.0f;
			
 
				-	if (v.z > 255.0f) v.z = 255.0f;
			
 
				-	if (v.w < 0.0f) v.w = 0.0f;
			
 
				-	if (v.w > 255.0f) v.w = 255.0f;
			
 
				-}
			
 
				-
			
 
				-static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
			
 
				-{
			
 
				-	for (int region = 0; region < NREGIONS; ++region)
			
 
				-	for (int i = 0; i < NINDICES; ++i)
			
 
				-		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
			
 
				-}
			
 
				-
			
 
				-// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
			
 
				-static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	// build list of possibles
			
 
				-	Vector4 palette[NREGIONS][NINDICES];
			
 
				-
			
 
				-	generate_palette_unquantized(endpts, palette);
			
 
				-
			
 
				-	float toterr = 0;
			
 
				-	Vector4 err;
			
 
				-
			
 
				-	for (int y = 0; y < tile.size_y; y++)
			
 
				-	for (int x = 0; x < tile.size_x; x++)
			
 
				-	{
			
 
				-		int region = REGION(x,y,shapeindex);
			
 
				-		float err, besterr = FLT_MAX;
			
 
				-
			
 
				-		for (int i = 0; i < NINDICES && besterr > 0; ++i)
			
 
				-		{
			
 
				-			err = Utils::metric4(tile.data[y][x], palette[region][i]);
			
 
				-
			
 
				-			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
			
 
				-				break;
			
 
				-			if (err < besterr)
			
 
				-				besterr = err;
			
 
				-		}
			
 
				-		toterr += besterr;
			
 
				-	}
			
 
				-	return toterr;
			
 
				-}
			
 
				-
			
 
				-static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
			
 
				-{
			
 
				-	for (int region=0; region<NREGIONS; ++region)
			
 
				-	{
			
 
				-		int np = 0;
			
 
				-		Vector4 colors[Tile::TILE_TOTAL];
			
 
				-		Vector4 mean(0,0,0,0);
			
 
				-
			
 
				-		for (int y = 0; y < tile.size_y; y++)
			
 
				-		for (int x = 0; x < tile.size_x; x++)
			
 
				-			if (REGION(x,y,shapeindex) == region)
			
 
				-			{
			
 
				-				colors[np] = tile.data[y][x];
			
 
				-				mean += tile.data[y][x];
			
 
				-				++np;
			
 
				-			}
			
 
				-
			
 
				-		// handle simple cases	
			
 
				-		if (np == 0)
			
 
				-		{
			
 
				-			Vector4 zero(0,0,0,255.0f);
			
 
				-			endpts[region].A = zero;
			
 
				-			endpts[region].B = zero;
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 1)
			
 
				-		{
			
 
				-			endpts[region].A = colors[0];
			
 
				-			endpts[region].B = colors[0];
			
 
				-			continue;
			
 
				-		}
			
 
				-		else if (np == 2)
			
 
				-		{
			
 
				-			endpts[region].A = colors[0];
			
 
				-			endpts[region].B = colors[1];
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		mean /= float(np);
			
 
				-
			
 
				-		Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
			
 
				-
			
 
				-		// project each pixel value along the principal direction
			
 
				-		float minp = FLT_MAX, maxp = -FLT_MAX;
			
 
				-		for (int i = 0; i < np; i++) 
			
 
				-		{
			
 
				-			float dp = dot(colors[i]-mean, direction);
			
 
				-			if (dp < minp) minp = dp;
			
 
				-			if (dp > maxp) maxp = dp;
			
 
				-		}
			
 
				-
			
 
				-		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
			
 
				-		endpts[region].A = mean + minp*direction;
			
 
				-		endpts[region].B = mean + maxp*direction;
			
 
				-
			
 
				-		// clamp endpoints
			
 
				-		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
			
 
				-		// shape based on endpoints being clamped
			
 
				-		clamp(endpts[region].A);
			
 
				-		clamp(endpts[region].B);
			
 
				-	}
			
 
				-
			
 
				-	return map_colors(tile, shapeindex, endpts);
			
 
				-}
			
 
				-
			
 
				-static void swap(float *list1, int *list2, int i, int j)
			
 
				-{
			
 
				-	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
			
 
				-	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
			
 
				-}
			
 
				-
			
 
				-float AVPCL::compress_mode7(const Tile &t, char *block)
			
 
				-{
			
 
				-	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
			
 
				-	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
			
 
				-	const int NITEMS=NSHAPES/4;
			
 
				-
			
 
				-	// pick the best NITEMS shapes and refine these.
			
 
				-	struct {
			
 
				-		FltEndpts endpts[NREGIONS];
			
 
				-	} all[NSHAPES];
			
 
				-	float roughmse[NSHAPES];
			
 
				-	int index[NSHAPES];
			
 
				-	char tempblock[AVPCL::BLOCKSIZE];
			
 
				-	float msebest = FLT_MAX;
			
 
				-
			
 
				-	for (int i=0; i<NSHAPES; ++i)
			
 
				-	{
			
 
				-		roughmse[i] = rough(t, i, &all[i].endpts[0]);
			
 
				-		index[i] = i;
			
 
				-	}
			
 
				-
			
 
				-	// bubble sort -- only need to bubble up the first NITEMS items
			
 
				-	for (int i=0; i<NITEMS; ++i)
			
 
				-	for (int j=i+1; j<NSHAPES; ++j)
			
 
				-		if (roughmse[i] > roughmse[j])
			
 
				-			swap(roughmse, index, i, j);
			
 
				-
			
 
				-	for (int i=0; i<NITEMS && msebest>0; ++i)
			
 
				-	{
			
 
				-		int shape = index[i];
			
 
				-		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
			
 
				-		if (mse < msebest)
			
 
				-		{
			
 
				-			memcpy(block, tempblock, sizeof(tempblock));
			
 
				-			msebest = mse;
			
 
				-		}
			
 
				-	}
			
 
				-	return msebest;
			
 
				-}
			
 
				-
			
--- a/3rdparty/nvtt/bc7/avpcl_utils.cpp
+++ b/3rdparty/nvtt/bc7/avpcl_utils.cpp
@@ -1,389 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// Utility and common routines
			
 
				-
			
 
				-#include "avpcl_utils.h"
			
 
				-#include "avpcl.h"
			
 
				-#include "nvmath/vector.inl"
			
 
				-#include <math.h>
			
 
				-
			
 
				-using namespace nv;
			
 
				-using namespace AVPCL;
			
 
				-
			
 
				-static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
			
 
				-static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
			
 
				-
			
 
				-int Utils::lerp(int a, int b, int i, int bias, int denom)
			
 
				-{
			
 
				-#ifdef	USE_ZOH_INTERP
			
 
				-	nvAssert (denom == 3 || denom == 7 || denom == 15);
			
 
				-	nvAssert (i >= 0 && i <= denom);
			
 
				-	nvAssert (bias >= 0 && bias <= denom/2);
			
 
				-	nvAssert (a >= 0 && b >= 0);
			
 
				-
			
 
				-	int round = 0;
			
 
				-#ifdef	USE_ZOH_INTERP_ROUNDED
			
 
				-	round = 32;
			
 
				-#endif
			
 
				-
			
 
				-	switch (denom)
			
 
				-	{
			
 
				-	case 3:	denom *= 5; i *= 5;	// fall through to case 15
			
 
				-	case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
			
 
				-	case 7:	return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
			
 
				-	default: nvUnreachable(); return 0;
			
 
				-	}
			
 
				-#else
			
 
				-	return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom));		// simple exact interpolation
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
			
 
				-{
			
 
				-#ifdef	USE_ZOH_INTERP
			
 
				-	nvAssert (denom == 3 || denom == 7 || denom == 15);
			
 
				-	nvAssert (i >= 0 && i <= denom);
			
 
				-	nvAssert (bias >= 0 && bias <= denom/2);
			
 
				-//	nvAssert (a >= 0 && b >= 0);
			
 
				-
			
 
				-	// no need to bias these as this is an exact division
			
 
				-
			
 
				-	switch (denom)
			
 
				-	{
			
 
				-	case 3:	denom *= 5; i *= 5;	// fall through to case 15
			
 
				-	case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
			
 
				-	case 7:	return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
			
 
				-	default: nvUnreachable(); return Vector4(0);
			
 
				-	}
			
 
				-#else
			
 
				-	return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom));		// simple exact interpolation
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-
			
 
				-int Utils::unquantize(int q, int prec)
			
 
				-{
			
 
				-	int unq;
			
 
				-
			
 
				-	nvAssert (prec > 3);	// we only want to do one replicate
			
 
				-
			
 
				-#ifdef USE_ZOH_QUANT
			
 
				-	if (prec >= 8)
			
 
				-		unq = q;
			
 
				-	else if (q == 0) 
			
 
				-		unq = 0;
			
 
				-	else if (q == ((1<<prec)-1)) 
			
 
				-		unq = 255;
			
 
				-	else
			
 
				-		unq = (q * 256 + 128) >> prec;
			
 
				-#else
			
 
				-	// avpcl unquantizer -- bit replicate
			
 
				-	unq = (q << (8-prec)) | (q >> (2*prec-8));
			
 
				-#endif
			
 
				-
			
 
				-	return unq;
			
 
				-}
			
 
				-
			
 
				-// quantize to the best value -- i.e., minimize unquantize error
			
 
				-int Utils::quantize(float value, int prec)
			
 
				-{
			
 
				-	int q, unq;
			
 
				-
			
 
				-	nvAssert (prec > 3);	// we only want to do one replicate
			
 
				-
			
 
				-	unq = (int)floor(value + 0.5f);
			
 
				-	nvAssert (unq <= 255);
			
 
				-
			
 
				-#ifdef USE_ZOH_QUANT
			
 
				-	q = (prec >= 8) ? unq : (unq << prec) / 256;
			
 
				-#else
			
 
				-	// avpcl quantizer -- scale properly for best possible bit-replicated result
			
 
				-	q = (unq * ((1<<prec)-1) + 127)/255;
			
 
				-#endif
			
 
				-
			
 
				-	nvAssert (q >= 0 && q < (1 << prec));
			
 
				-
			
 
				-	return q;
			
 
				-}
			
 
				-
			
 
				-float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
			
 
				-{
			
 
				-	Vector4 err = a - b;
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else /*if (AVPCL::flag_nonuniform_ati)*/
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err.x *= rwt;
			
 
				-		err.y *= gwt;
			
 
				-		err.z *= bwt;
			
 
				-	}
			
 
				-
			
 
				-	return lengthSquared(err);
			
 
				-}
			
 
				-
			
 
				-// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
			
 
				-float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
			
 
				-{
			
 
				-	Vector3 err = a - b;
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else if (AVPCL::flag_nonuniform_ati)
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// adjust weights based on rotatemode
			
 
				-		switch(rotatemode)
			
 
				-		{
			
 
				-		case ROTATEMODE_RGBA_RGBA: break;
			
 
				-		case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
			
 
				-		case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
			
 
				-		case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
			
 
				-		default: nvUnreachable();
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err.x *= rwt;
			
 
				-		err.y *= gwt;
			
 
				-		err.z *= bwt;
			
 
				-	}
			
 
				-
			
 
				-	return lengthSquared(err);
			
 
				-}
			
 
				-
			
 
				-float Utils::metric1(const float a, const float b, int rotatemode)
			
 
				-{
			
 
				-	float err = a - b;
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt, awt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else if (AVPCL::flag_nonuniform_ati)
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// adjust weights based on rotatemode
			
 
				-		switch(rotatemode)
			
 
				-		{
			
 
				-		case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
			
 
				-		case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
			
 
				-		case ROTATEMODE_RGBA_RABG: awt = gwt; break;
			
 
				-		case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
			
 
				-		default: nvUnreachable();
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err *= awt;
			
 
				-	}
			
 
				-
			
 
				-	return err * err;
			
 
				-}
			
 
				-
			
 
				-float Utils::premult(float r, float a)
			
 
				-{
			
 
				-	// note that the args are really integers stored in floats
			
 
				-	int R = int(r), A = int(a);
			
 
				-
			
 
				-	nvAssert ((R==r) && (A==a));
			
 
				-
			
 
				-	return float((R*A + 127)/255);
			
 
				-}
			
 
				-
			
 
				-static void premult4(Vector4& rgba)
			
 
				-{
			
 
				-	rgba.x = Utils::premult(rgba.x, rgba.w);
			
 
				-	rgba.y = Utils::premult(rgba.y, rgba.w);
			
 
				-	rgba.z = Utils::premult(rgba.z, rgba.w);
			
 
				-}
			
 
				-
			
 
				-static void premult3(Vector3& rgb, float a)
			
 
				-{
			
 
				-	rgb.x = Utils::premult(rgb.x, a);
			
 
				-	rgb.y = Utils::premult(rgb.y, a);
			
 
				-	rgb.z = Utils::premult(rgb.z, a);
			
 
				-}
			
 
				-
			
 
				-float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
			
 
				-{
			
 
				-	Vector4 pma = a, pmb = b;
			
 
				-
			
 
				-	premult4(pma);
			
 
				-	premult4(pmb);
			
 
				-
			
 
				-	Vector4 err = pma - pmb;
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else /*if (AVPCL::flag_nonuniform_ati)*/
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err.x *= rwt;
			
 
				-		err.y *= gwt;
			
 
				-		err.z *= bwt;
			
 
				-	}
			
 
				-
			
 
				-	return lengthSquared(err);
			
 
				-}
			
 
				-
			
 
				-float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
			
 
				-{
			
 
				-	Vector3 pma = rgb0, pmb = rgb1;
			
 
				-
			
 
				-	premult3(pma, a0);
			
 
				-	premult3(pmb, a1);
			
 
				-
			
 
				-	Vector3 err = pma - pmb;
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else /*if (AVPCL::flag_nonuniform_ati)*/
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err.x *= rwt;
			
 
				-		err.y *= gwt;
			
 
				-		err.z *= bwt;
			
 
				-	}
			
 
				-
			
 
				-	return lengthSquared(err);
			
 
				-}
			
 
				-
			
 
				-float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
			
 
				-{
			
 
				-	Vector3 pma = rgb0, pmb = rgb1;
			
 
				-
			
 
				-	switch(rotatemode)
			
 
				-	{
			
 
				-	case ROTATEMODE_RGBA_RGBA:
			
 
				-		// this function isn't supposed to be called for this rotatemode
			
 
				-		nvUnreachable();
			
 
				-		break;
			
 
				-	case ROTATEMODE_RGBA_AGBR:
			
 
				-		pma.y = premult(pma.y, pma.x);
			
 
				-		pma.z = premult(pma.z, pma.x);
			
 
				-		pmb.y = premult(pmb.y, pmb.x);
			
 
				-		pmb.z = premult(pmb.z, pmb.x);
			
 
				-		break;
			
 
				-	case ROTATEMODE_RGBA_RABG:
			
 
				-		pma.x = premult(pma.x, pma.y);
			
 
				-		pma.z = premult(pma.z, pma.y);
			
 
				-		pmb.x = premult(pmb.x, pmb.y);
			
 
				-		pmb.z = premult(pmb.z, pmb.y);
			
 
				-		break;
			
 
				-	case ROTATEMODE_RGBA_RGAB:
			
 
				-		pma.x = premult(pma.x, pma.z);
			
 
				-		pma.y = premult(pma.y, pma.z);
			
 
				-		pmb.x = premult(pmb.x, pmb.z);
			
 
				-		pmb.y = premult(pmb.y, pmb.z);
			
 
				-		break;
			
 
				-	default: nvUnreachable();
			
 
				-	}
			
 
				-
			
 
				-	Vector3 err = pma - pmb;
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else /*if (AVPCL::flag_nonuniform_ati)*/
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err.x *= rwt;
			
 
				-		err.y *= gwt;
			
 
				-		err.z *= bwt;
			
 
				-	}
			
 
				-
			
 
				-	return lengthSquared(err);
			
 
				-}
			
 
				-
			
 
				-float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
			
 
				-{
			
 
				-	float err = premult(rgb0, a0) - premult(rgb1, a1);
			
 
				-
			
 
				-	// if nonuniform, select weights and weigh away
			
 
				-	if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
			
 
				-	{
			
 
				-		float rwt, gwt, bwt, awt;
			
 
				-		if (AVPCL::flag_nonuniform)
			
 
				-		{
			
 
				-			rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
			
 
				-		}
			
 
				-		else if (AVPCL::flag_nonuniform_ati)
			
 
				-		{
			
 
				-			rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
			
 
				-		}
			
 
				-
			
 
				-		// adjust weights based on rotatemode
			
 
				-		switch(rotatemode)
			
 
				-		{
			
 
				-		case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
			
 
				-		case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
			
 
				-		case ROTATEMODE_RGBA_RABG: awt = gwt; break;
			
 
				-		case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
			
 
				-		default: nvUnreachable();
			
 
				-		}
			
 
				-
			
 
				-		// weigh the components
			
 
				-		err *= awt;
			
 
				-	}
			
 
				-
			
 
				-	return err * err;
			
 
				-}
			
--- a/3rdparty/nvtt/bc7/avpcl_utils.h
+++ b/3rdparty/nvtt/bc7/avpcl_utils.h
@@ -1,61 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-// utility class holding common routines
			
 
				-#ifndef _AVPCL_UTILS_H
			
 
				-#define _AVPCL_UTILS_H
			
 
				-
			
 
				-#include "nvmath/vector.h"
			
 
				-
			
 
				-namespace AVPCL {
			
 
				-
			
 
				-inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); }
			
 
				-
			
 
				-static const int INDEXMODE_BITS				= 1;		// 2 different index modes
			
 
				-static const int NINDEXMODES				= (1<<(INDEXMODE_BITS));
			
 
				-static const int INDEXMODE_ALPHA_IS_3BITS	= 0;
			
 
				-static const int INDEXMODE_ALPHA_IS_2BITS	= 1;
			
 
				-
			
 
				-static const int ROTATEMODE_BITS		= 2;		// 4 different rotate modes
			
 
				-static const int NROTATEMODES			= (1<<(ROTATEMODE_BITS));
			
 
				-static const int ROTATEMODE_RGBA_RGBA	= 0;
			
 
				-static const int ROTATEMODE_RGBA_AGBR	= 1;
			
 
				-static const int ROTATEMODE_RGBA_RABG	= 2;
			
 
				-static const int ROTATEMODE_RGBA_RGAB	= 3;
			
 
				-
			
 
				-class Utils
			
 
				-{
			
 
				-public:
			
 
				-	// error metrics
			
 
				-	static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b);
			
 
				-	static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode);
			
 
				-	static float metric1(float a, float b, int rotatemode);
			
 
				-
			
 
				-	static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1);
			
 
				-	static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1);
			
 
				-	static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode);
			
 
				-	static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode);
			
 
				-
			
 
				-	static float premult(float r, float a);
			
 
				-
			
 
				-	// quantization and unquantization
			
 
				-	static int unquantize(int q, int prec);
			
 
				-	static int quantize(float value, int prec);
			
 
				-
			
 
				-	// lerping
			
 
				-	static int lerp(int a, int b, int i, int bias, int denom);
			
 
				-	static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom);
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc7/bits.h
+++ b/3rdparty/nvtt/bc7/bits.h
@@ -1,76 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-#ifndef _AVPCL_BITS_H
			
 
				-#define _AVPCL_BITS_H
			
 
				-
			
 
				-// read/write a bitstream
			
 
				-
			
 
				-#include "nvcore/debug.h"
			
 
				-
			
 
				-namespace AVPCL {
			
 
				-
			
 
				-class Bits
			
 
				-{
			
 
				-public:
			
 
				-
			
 
				-	Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
			
 
				-	Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
			
 
				-
			
 
				-	void write(int value, int nbits) {
			
 
				-		nvAssert (nbits >= 0 && nbits < 32);
			
 
				-		nvAssert (sizeof(int)>= 4);
			
 
				-		for (int i=0; i<nbits; ++i)
			
 
				-			writeone(value>>i);
			
 
				-	}
			
 
				-	int read(int nbits) { 
			
 
				-		nvAssert (nbits >= 0 && nbits < 32);
			
 
				-		nvAssert (sizeof(int)>= 4);
			
 
				-		int out = 0;
			
 
				-		for (int i=0; i<nbits; ++i)
			
 
				-			out |= readone() << i;
			
 
				-		return out;
			
 
				-	}
			
 
				-	int getptr() { return bptr; }
			
 
				-	void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
			
 
				-	int getsize() { return bend; }
			
 
				-
			
 
				-private:
			
 
				-	int	bptr;		// next bit to read
			
 
				-	int bend;		// last written bit + 1
			
 
				-	char *bits;		// ptr to user bit stream
			
 
				-	const char *cbits;	// ptr to const user bit stream
			
 
				-	int maxbits;	// max size of user bit stream
			
 
				-	char readonly;	// 1 if this is a read-only stream
			
 
				-
			
 
				-	int readone() {
			
 
				-		nvAssert (bptr < bend);
			
 
				-		if (bptr >= bend) return 0;
			
 
				-		int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
			
 
				-		++bptr;
			
 
				-		return bit != 0;
			
 
				-	}
			
 
				-	void writeone(int bit) {
			
 
				-		nvAssert (!readonly); // "Writing a read-only bit stream"
			
 
				-		nvAssert (bptr < maxbits);
			
 
				-		if (bptr >= maxbits) return;
			
 
				-		if (bit&1)
			
 
				-			bits[bptr>>3] |= 1 << (bptr & 7);
			
 
				-		else
			
 
				-			bits[bptr>>3] &= ~(1 << (bptr & 7));
			
 
				-		if (bptr++ >= bend) bend = bptr;
			
 
				-	}
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc7/endpts.h
+++ b/3rdparty/nvtt/bc7/endpts.h
@@ -1,81 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-#ifndef _AVPCL_ENDPTS_H
			
 
				-#define _AVPCL_ENDPTS_H
			
 
				-
			
 
				-// endpoint definitions and routines to search through endpoint space
			
 
				-
			
 
				-#include "nvmath/vector.h"
			
 
				-
			
 
				-namespace AVPCL {
			
 
				-
			
 
				-static const int NCHANNELS_RGB	= 3;
			
 
				-static const int NCHANNELS_RGBA	= 4;
			
 
				-static const int CHANNEL_R		= 0;
			
 
				-static const int CHANNEL_G		= 1;
			
 
				-static const int CHANNEL_B		= 2;
			
 
				-static const int CHANNEL_A		= 3;
			
 
				-
			
 
				-struct FltEndpts
			
 
				-{
			
 
				-	nv::Vector4	A;
			
 
				-	nv::Vector4	B;
			
 
				-};
			
 
				-
			
 
				-struct IntEndptsRGB
			
 
				-{
			
 
				-	int		A[NCHANNELS_RGB];
			
 
				-	int		B[NCHANNELS_RGB];
			
 
				-};
			
 
				-
			
 
				-struct IntEndptsRGB_1
			
 
				-{
			
 
				-	int		A[NCHANNELS_RGB];
			
 
				-	int		B[NCHANNELS_RGB];
			
 
				-	int		lsb;				// shared lsb for A and B
			
 
				-};
			
 
				-
			
 
				-struct IntEndptsRGB_2
			
 
				-{
			
 
				-	int		A[NCHANNELS_RGB];
			
 
				-	int		B[NCHANNELS_RGB];
			
 
				-	int		a_lsb;				// lsb for A
			
 
				-	int		b_lsb;				// lsb for B
			
 
				-};
			
 
				-
			
 
				-
			
 
				-struct IntEndptsRGBA
			
 
				-{
			
 
				-	int		A[NCHANNELS_RGBA];
			
 
				-	int		B[NCHANNELS_RGBA];
			
 
				-};
			
 
				-
			
 
				-struct IntEndptsRGBA_2
			
 
				-{
			
 
				-	int		A[NCHANNELS_RGBA];
			
 
				-	int		B[NCHANNELS_RGBA];
			
 
				-	int		a_lsb;				// lsb for A
			
 
				-	int		b_lsb;				// lsb for B
			
 
				-};
			
 
				-
			
 
				-struct IntEndptsRGBA_2a
			
 
				-{
			
 
				-	int		A[NCHANNELS_RGBA];
			
 
				-	int		B[NCHANNELS_RGBA];
			
 
				-	int		a_lsb;				// lsb for RGB channels of A
			
 
				-	int		b_lsb;				// lsb for RGB channels of A
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc7/shapes_three.h
+++ b/3rdparty/nvtt/bc7/shapes_three.h
@@ -1,132 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-#ifndef	_AVPCL_SHAPES_THREE_H
			
 
				-#define _AVPCL_SHAPES_THREE_H
			
 
				-
			
 
				-// shapes for 3 regions
			
 
				-
			
 
				-#define NREGIONS 3
			
 
				-#define NSHAPES 64
			
 
				-#define SHAPEBITS 6
			
 
				-
			
 
				-static int shapes[NSHAPES*16] = 
			
 
				-{
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   0, 2, 2, 2,   
			
 
				-0, 0, 1, 1,   0, 0, 1, 1,   2, 0, 0, 1,   0, 0, 2, 2,   
			
 
				-0, 2, 2, 1,   2, 2, 1, 1,   2, 2, 1, 1,   0, 0, 1, 1,   
			
 
				-2, 2, 2, 2,   2, 2, 2, 1,   2, 2, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 2, 2,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 2, 2,   0, 0, 1, 1,   
			
 
				-1, 1, 2, 2,   0, 0, 2, 2,   1, 1, 1, 1,   2, 2, 1, 1,   
			
 
				-1, 1, 2, 2,   0, 0, 2, 2,   1, 1, 1, 1,   2, 2, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 2,   
			
 
				-0, 0, 0, 0,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 1, 2,   
			
 
				-1, 1, 1, 1,   1, 1, 1, 1,   2, 2, 2, 2,   0, 0, 1, 2,   
			
 
				-2, 2, 2, 2,   2, 2, 2, 2,   2, 2, 2, 2,   0, 0, 1, 2,   
			
 
				-
			
 
				-0, 1, 1, 2,   0, 1, 2, 2,   0, 0, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 1, 1, 2,   0, 1, 2, 2,   0, 1, 1, 2,   2, 0, 0, 1,   
			
 
				-0, 1, 1, 2,   0, 1, 2, 2,   1, 1, 2, 2,   2, 2, 0, 0,   
			
 
				-0, 1, 1, 2,   0, 1, 2, 2,   1, 2, 2, 2,   2, 2, 2, 0,   
			
 
				-
			
 
				-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 0,   0, 0, 2, 2,   
			
 
				-0, 0, 1, 1,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 2, 2,   
			
 
				-0, 1, 1, 2,   2, 0, 0, 1,   1, 1, 2, 2,   0, 0, 2, 2,   
			
 
				-1, 1, 2, 2,   2, 2, 0, 0,   1, 1, 2, 2,   1, 1, 1, 1,   
			
 
				-
			
 
				-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   1, 1, 0, 0,   
			
 
				-0, 2, 2, 2,   2, 2, 2, 1,   0, 1, 2, 2,   2, 2, 1, 0,   
			
 
				-0, 2, 2, 2,   2, 2, 2, 1,   0, 1, 2, 2,   2, 2, 1, 0,   
			
 
				-
			
 
				-0, 1, 2, 2,   0, 0, 1, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-0, 1, 2, 2,   0, 0, 1, 2,   1, 2, 2, 1,   0, 1, 1, 0,   
			
 
				-0, 0, 1, 1,   1, 1, 2, 2,   1, 2, 2, 1,   1, 2, 2, 1,   
			
 
				-0, 0, 0, 0,   2, 2, 2, 2,   0, 1, 1, 0,   1, 2, 2, 1,   
			
 
				-
			
 
				-0, 0, 2, 2,   0, 1, 1, 0,   0, 0, 1, 1,   0, 0, 0, 0,   
			
 
				-1, 1, 0, 2,   0, 1, 1, 0,   0, 1, 2, 2,   2, 0, 0, 0,   
			
 
				-1, 1, 0, 2,   2, 0, 0, 2,   0, 1, 2, 2,   2, 2, 1, 1,   
			
 
				-0, 0, 2, 2,   2, 2, 2, 2,   0, 0, 1, 1,   2, 2, 2, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 2, 2, 2,   0, 0, 1, 1,   0, 1, 2, 0,   
			
 
				-0, 0, 0, 2,   0, 0, 2, 2,   0, 0, 1, 2,   0, 1, 2, 0,   
			
 
				-1, 1, 2, 2,   0, 0, 1, 2,   0, 0, 2, 2,   0, 1, 2, 0,   
			
 
				-1, 2, 2, 2,   0, 0, 1, 1,   0, 2, 2, 2,   0, 1, 2, 0,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 1, 2, 0,   0, 1, 2, 0,   0, 0, 1, 1,   
			
 
				-1, 1, 1, 1,   1, 2, 0, 1,   2, 0, 1, 2,   2, 2, 0, 0,   
			
 
				-2, 2, 2, 2,   2, 0, 1, 2,   1, 2, 0, 1,   1, 1, 2, 2,   
			
 
				-0, 0, 0, 0,   0, 1, 2, 0,   0, 1, 2, 0,   0, 0, 1, 1,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 1, 0, 1,   0, 0, 0, 0,   0, 0, 2, 2,   
			
 
				-1, 1, 2, 2,   0, 1, 0, 1,   0, 0, 0, 0,   1, 1, 2, 2,   
			
 
				-2, 2, 0, 0,   2, 2, 2, 2,   2, 1, 2, 1,   0, 0, 2, 2,   
			
 
				-0, 0, 1, 1,   2, 2, 2, 2,   2, 1, 2, 1,   1, 1, 2, 2,   
			
 
				-
			
 
				-0, 0, 2, 2,   0, 2, 2, 0,   0, 1, 0, 1,   0, 0, 0, 0,   
			
 
				-0, 0, 1, 1,   1, 2, 2, 1,   2, 2, 2, 2,   2, 1, 2, 1,   
			
 
				-0, 0, 2, 2,   0, 2, 2, 0,   2, 2, 2, 2,   2, 1, 2, 1,   
			
 
				-0, 0, 1, 1,   1, 2, 2, 1,   0, 1, 0, 1,   2, 1, 2, 1,   
			
 
				-
			
 
				-0, 1, 0, 1,   0, 2, 2, 2,   0, 0, 0, 2,   0, 0, 0, 0,   
			
 
				-0, 1, 0, 1,   0, 1, 1, 1,   1, 1, 1, 2,   2, 1, 1, 2,   
			
 
				-0, 1, 0, 1,   0, 2, 2, 2,   0, 0, 0, 2,   2, 1, 1, 2,   
			
 
				-2, 2, 2, 2,   0, 1, 1, 1,   1, 1, 1, 2,   2, 1, 1, 2,   
			
 
				-
			
 
				-0, 2, 2, 2,   0, 0, 0, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-0, 1, 1, 1,   1, 1, 1, 2,   0, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-0, 1, 1, 1,   1, 1, 1, 2,   0, 1, 1, 0,   2, 1, 1, 2,   
			
 
				-0, 2, 2, 2,   0, 0, 0, 2,   2, 2, 2, 2,   2, 1, 1, 2,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 0, 2, 2,   0, 0, 2, 2,   0, 0, 0, 0,   
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 0, 0,   
			
 
				-2, 2, 2, 2,   0, 0, 1, 1,   1, 1, 2, 2,   0, 0, 0, 0,   
			
 
				-2, 2, 2, 2,   0, 0, 2, 2,   0, 0, 2, 2,   2, 1, 1, 2,   
			
 
				-
			
 
				-0, 0, 0, 2,   0, 2, 2, 2,   0, 1, 0, 1,   0, 1, 1, 1,   
			
 
				-0, 0, 0, 1,   1, 2, 2, 2,   2, 2, 2, 2,   2, 0, 1, 1,   
			
 
				-0, 0, 0, 2,   0, 2, 2, 2,   2, 2, 2, 2,   2, 2, 0, 1,   
			
 
				-0, 0, 0, 1,   1, 2, 2, 2,   2, 2, 2, 2,   2, 2, 2, 0,
			
 
				-};
			
 
				-
			
 
				-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
			
 
				-
			
 
				-static int shapeindex_to_compressed_indices[NSHAPES*3] = 
			
 
				-{
			
 
				-	0, 3,15,  0, 3, 8,  0,15, 8,  0,15, 3,
			
 
				-	0, 8,15,  0, 3,15,  0,15, 3,  0,15, 8,
			
 
				-	0, 8,15,  0, 8,15,  0, 6,15,  0, 6,15,
			
 
				-	0, 6,15,  0, 5,15,  0, 3,15,  0, 3, 8,
			
 
				-
			
 
				-	0, 3,15,  0, 3, 8,  0, 8,15,  0,15, 3,
			
 
				-	0, 3,15,  0, 3, 8,  0, 6,15,  0,10, 8,
			
 
				-	0, 5, 3,  0, 8,15,  0, 8, 6,  0, 6,10,
			
 
				-	0, 8,15,  0, 5,15,  0,15,10,  0,15, 8,
			
 
				-
			
 
				-	0, 8,15,  0,15, 3,  0, 3,15,  0, 5,10,
			
 
				-	0, 6,10,  0,10, 8,  0, 8, 9,  0,15,10,
			
 
				-	0,15, 6,  0, 3,15,  0,15, 8,  0, 5,15,
			
 
				-	0,15, 3,  0,15, 6,  0,15, 6,  0,15, 8,
			
 
				-
			
 
				-	0, 3,15,  0,15, 3,  0, 5,15,  0, 5,15,
			
 
				-	0, 5,15,  0, 8,15,  0, 5,15,  0,10,15,
			
 
				-	0, 5,15,  0,10,15,  0, 8,15,  0,13,15,
			
 
				-	0,15, 3,  0,12,15,  0, 3,15,  0, 3, 8
			
 
				-
			
 
				-};
			
 
				-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*3+(region)]
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc7/shapes_two.h
+++ b/3rdparty/nvtt/bc7/shapes_two.h
@@ -1,133 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-#ifndef _AVPCL_SHAPES_TWO_H
			
 
				-#define _AVPCL_SHAPES_TWO_H
			
 
				-
			
 
				-// shapes for two regions
			
 
				-
			
 
				-#define NREGIONS 2
			
 
				-#define NSHAPES 64
			
 
				-#define SHAPEBITS 6
			
 
				-
			
 
				-static int shapes[NSHAPES*16] = 
			
 
				-{
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
			
 
				-0, 0, 0, 1,   0, 1, 1, 1,   0, 0, 1, 1,   0, 0, 0, 1,   
			
 
				-0, 0, 0, 1,   0, 1, 1, 1,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 0, 0,   
			
 
				-0, 0, 0, 1,   1, 1, 1, 1,   0, 1, 1, 1,   0, 0, 0, 1,   
			
 
				-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-0, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 1,   0, 0, 0, 0,   
			
 
				-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 0,   
			
 
				-1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   1, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 0, 0,   0, 1, 1, 1,   
			
 
				-1, 0, 0, 0,   0, 0, 0, 1,   0, 0, 0, 0,   0, 0, 1, 1,   
			
 
				-1, 1, 1, 0,   0, 0, 0, 0,   1, 0, 0, 0,   0, 0, 0, 1,   
			
 
				-1, 1, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 0, 0, 0,   0, 0, 0, 0,   0, 1, 1, 1,   
			
 
				-0, 0, 0, 1,   1, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 0,   1, 1, 0, 0,   1, 0, 0, 0,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   0, 0, 0, 1,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 0, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   
			
 
				-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
			
 
				-0, 0, 0, 1,   1, 0, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
			
 
				-0, 0, 0, 0,   1, 1, 0, 0,   0, 1, 1, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 0, 0, 1,   0, 0, 0, 0,   0, 1, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 1, 1, 1,   1, 1, 1, 1,   0, 0, 0, 1,   1, 0, 0, 1,   
			
 
				-1, 1, 1, 0,   1, 1, 1, 1,   1, 0, 0, 0,   1, 0, 0, 1,   
			
 
				-1, 0, 0, 0,   0, 0, 0, 0,   1, 1, 1, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   0, 0, 1, 1,   
			
 
				-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   0, 0, 1, 1,   
			
 
				-0, 1, 0, 1,   0, 0, 0, 0,   0, 1, 0, 1,   1, 1, 0, 0,   
			
 
				-0, 1, 0, 1,   1, 1, 1, 1,   1, 0, 1, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 0, 1, 1,   0, 1, 0, 1,   0, 1, 1, 0,   0, 1, 0, 1,   
			
 
				-1, 1, 0, 0,   0, 1, 0, 1,   1, 0, 0, 1,   1, 0, 1, 0,   
			
 
				-0, 0, 1, 1,   1, 0, 1, 0,   0, 1, 1, 0,   1, 0, 1, 0,   
			
 
				-1, 1, 0, 0,   1, 0, 1, 0,   1, 0, 0, 1,   0, 1, 0, 1,   
			
 
				-
			
 
				-0, 1, 1, 1,   0, 0, 0, 1,   0, 0, 1, 1,   0, 0, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   1, 0, 1, 1,   
			
 
				-1, 1, 0, 0,   1, 1, 0, 0,   0, 1, 0, 0,   1, 1, 0, 1,   
			
 
				-1, 1, 1, 0,   1, 0, 0, 0,   1, 1, 0, 0,   1, 1, 0, 0,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 0, 0,   
			
 
				-1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   0, 1, 1, 0,   
			
 
				-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   0, 1, 1, 0,   
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   0, 0, 0, 0,   
			
 
				-
			
 
				-0, 1, 0, 0,   0, 0, 1, 0,   0, 0, 0, 0,   0, 0, 0, 0,   
			
 
				-1, 1, 1, 0,   0, 1, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-0, 1, 0, 0,   0, 0, 1, 0,   0, 1, 1, 1,   1, 1, 1, 0,   
			
 
				-0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 0, 1, 1,   0, 1, 1, 0,   0, 0, 1, 1,   
			
 
				-1, 1, 0, 0,   0, 1, 1, 0,   0, 0, 1, 1,   1, 0, 0, 1,   
			
 
				-1, 0, 0, 1,   1, 1, 0, 0,   1, 0, 0, 1,   1, 1, 0, 0,   
			
 
				-0, 0, 1, 1,   1, 0, 0, 1,   1, 1, 0, 0,   0, 1, 1, 0,   
			
 
				-
			
 
				-0, 1, 1, 0,   0, 1, 1, 0,   0, 1, 1, 1,   0, 0, 0, 1,   
			
 
				-1, 1, 0, 0,   0, 0, 1, 1,   1, 1, 1, 0,   1, 0, 0, 0,   
			
 
				-1, 1, 0, 0,   0, 0, 1, 1,   1, 0, 0, 0,   1, 1, 1, 0,   
			
 
				-1, 0, 0, 1,   1, 0, 0, 1,   0, 0, 0, 1,   0, 1, 1, 1,   
			
 
				-
			
 
				-0, 0, 0, 0,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-1, 1, 1, 1,   0, 0, 1, 1,   0, 0, 1, 0,   0, 1, 0, 0,   
			
 
				-0, 0, 1, 1,   1, 1, 1, 1,   1, 1, 1, 0,   0, 1, 1, 1,   
			
 
				-0, 0, 1, 1,   0, 0, 0, 0,   1, 1, 1, 0,   0, 1, 1, 1,   
			
 
				-
			
 
				-};
			
 
				-
			
 
				-#define	REGION(x,y,si)	shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
			
 
				-
			
 
				-static int shapeindex_to_compressed_indices[NSHAPES*2] = 
			
 
				-{
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-
			
 
				-	0,15,  0, 2,  0, 8,  0, 2,
			
 
				-	0, 2,  0, 8,  0, 8,  0,15,
			
 
				-	0, 2,  0, 8,  0, 2,  0, 2,
			
 
				-	0, 8,  0, 8,  0, 2,  0, 2,
			
 
				-
			
 
				-	0,15,  0,15,  0, 6,  0, 8,
			
 
				-	0, 2,  0, 8,  0,15,  0,15,
			
 
				-	0, 2,  0, 8,  0, 2,  0, 2,
			
 
				-	0, 2,  0,15,  0,15,  0, 6,
			
 
				-
			
 
				-	0, 6,  0, 2,  0, 6,  0, 8,
			
 
				-	0,15,  0,15,  0, 2,  0, 2,
			
 
				-	0,15,  0,15,  0,15,  0,15,
			
 
				-	0,15,  0, 2,  0, 2,  0,15
			
 
				-
			
 
				-};
			
 
				-#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region)  shapeindex_to_compressed_indices[(si)*2+(region)]
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/bc7/tile.h
+++ b/3rdparty/nvtt/bc7/tile.h
@@ -1,41 +0,0 @@
 
				-/*
			
 
				-Copyright 2007 nVidia, Inc.
			
 
				-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. 
			
 
				-
			
 
				-You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
			
 
				-
			
 
				-See the License for the specific language governing permissions and limitations under the License.
			
 
				-*/
			
 
				-
			
 
				-#ifndef _AVPCL_TILE_H
			
 
				-#define _AVPCL_TILE_H
			
 
				-
			
 
				-#include "nvmath/vector.h"
			
 
				-#include <math.h>
			
 
				-#include "avpcl_utils.h"
			
 
				-
			
 
				-namespace AVPCL {
			
 
				-
			
 
				-// extract a tile of pixels from an array
			
 
				-
			
 
				-class Tile
			
 
				-{
			
 
				-public:
			
 
				-	static const int TILE_H = 4;
			
 
				-	static const int TILE_W = 4;
			
 
				-	static const int TILE_TOTAL = TILE_H * TILE_W;
			
 
				-	nv::Vector4 data[TILE_H][TILE_W];
			
 
				-    float importance_map[TILE_H][TILE_W];
			
 
				-	int	size_x, size_y;			// actual size of tile
			
 
				-
			
 
				-	Tile() {};
			
 
				-	~Tile(){};
			
 
				-	Tile(int xs, int ys) {size_x = xs; size_y = ys;}
			
 
				-};
			
 
				-
			
 
				-}
			
 
				-
			
 
				-#endif
			
--- a/3rdparty/nvtt/nvcore/array.h
+++ b/3rdparty/nvtt/nvcore/array.h
@@ -1,181 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_ARRAY_H
			
 
				-#define NV_CORE_ARRAY_H
			
 
				-
			
 
				-/*
			
 
				-This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be 
			
 
				-using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
			
 
				-are not supported.
			
 
				-
			
 
				-Note also that push_back and resize does not support inserting arguments elements that are in the same 
			
 
				-container. This is forbidden to prevent an extra copy.
			
 
				-*/
			
 
				-
			
 
				-
			
 
				-#include "memory.h"
			
 
				-#include "debug.h"
			
 
				-#include "foreach.h" // pseudoindex
			
 
				-
			
 
				-
			
 
				-namespace nv 
			
 
				-{
			
 
				-    class Stream;
			
 
				-
			
 
				-    /**
			
 
				-    * Replacement for std::vector that is easier to debug and provides
			
 
				-    * some nice foreach enumerators. 
			
 
				-    */
			
 
				-    template<typename T>
			
 
				-    class NVCORE_CLASS Array {
			
 
				-    public:
			
 
				-        typedef uint size_type;
			
 
				-
			
 
				-        // Default constructor.
			
 
				-        NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
			
 
				-
			
 
				-        // Copy constructor.
			
 
				-        NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
			
 
				-            copy(a.m_buffer, a.m_size);
			
 
				-        }
			
 
				-
			
 
				-        // Constructor that initializes the vector with the given elements.
			
 
				-        NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
			
 
				-            copy(ptr, num);
			
 
				-        }
			
 
				-
			
 
				-        // Allocate array.
			
 
				-        NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
			
 
				-            setArrayCapacity(capacity);
			
 
				-        }
			
 
				-
			
 
				-        // Destructor.
			
 
				-        NV_FORCEINLINE ~Array() {
			
 
				-            clear();
			
 
				-            free<T>(m_buffer);
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-        /// Const element access.
			
 
				-        NV_FORCEINLINE const T & operator[]( uint index ) const
			
 
				-        {
			
 
				-            nvDebugCheck(index < m_size);
			
 
				-            return m_buffer[index];
			
 
				-        }
			
 
				-        NV_FORCEINLINE const T & at( uint index ) const
			
 
				-        {
			
 
				-            nvDebugCheck(index < m_size);
			
 
				-            return m_buffer[index];
			
 
				-        }
			
 
				-
			
 
				-        /// Element access.
			
 
				-        NV_FORCEINLINE T & operator[] ( uint index )
			
 
				-        {
			
 
				-            nvDebugCheck(index < m_size);
			
 
				-            return m_buffer[index];
			
 
				-        }
			
 
				-        NV_FORCEINLINE T & at( uint index )
			
 
				-        {
			
 
				-            nvDebugCheck(index < m_size);
			
 
				-            return m_buffer[index];
			
 
				-        }
			
 
				-
			
 
				-        /// Get vector size.
			
 
				-        NV_FORCEINLINE uint size() const { return m_size; }
			
 
				-
			
 
				-        /// Get vector size.
			
 
				-        NV_FORCEINLINE uint count() const { return m_size; }
			
 
				-
			
 
				-        /// Get vector capacity.
			
 
				-        NV_FORCEINLINE uint capacity() const { return m_capacity; }
			
 
				-
			
 
				-        /// Get const vector pointer.
			
 
				-        NV_FORCEINLINE const T * buffer() const { return m_buffer; }
			
 
				-
			
 
				-        /// Get vector pointer.
			
 
				-        NV_FORCEINLINE T * buffer() { return m_buffer; }
			
 
				-
			
 
				-        /// Provide begin/end pointers for C++11 range-based for loops.
			
 
				-        NV_FORCEINLINE T * begin() { return m_buffer; }
			
 
				-        NV_FORCEINLINE T * end() { return m_buffer + m_size; }
			
 
				-        NV_FORCEINLINE const T * begin() const { return m_buffer; }
			
 
				-        NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
			
 
				-
			
 
				-        /// Is vector empty.
			
 
				-        NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
			
 
				-
			
 
				-        /// Is a null vector.
			
 
				-        NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
			
 
				-
			
 
				-
			
 
				-        T & append();
			
 
				-        void push_back( const T & val );
			
 
				-        void pushBack( const T & val );
			
 
				-        Array<T> & append( const T & val );
			
 
				-        Array<T> & operator<< ( T & t );
			
 
				-        void pop_back();
			
 
				-        void popBack(uint count = 1);
			
 
				-        void popFront(uint count = 1);
			
 
				-        const T & back() const;
			
 
				-        T & back();
			
 
				-        const T & front() const;
			
 
				-        T & front();
			
 
				-        bool contains(const T & e) const;
			
 
				-        bool find(const T & element, uint * indexPtr) const;
			
 
				-        bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
			
 
				-        void removeAt(uint index);
			
 
				-        bool remove(const T & element);
			
 
				-        void insertAt(uint index, const T & val = T());
			
 
				-        void append(const Array<T> & other);
			
 
				-        void append(const T other[], uint count);
			
 
				-        void replaceWithLast(uint index);
			
 
				-        void resize(uint new_size);
			
 
				-        void resize(uint new_size, const T & elem);
			
 
				-        void fill(const T & elem);
			
 
				-        void clear();
			
 
				-        void shrink();
			
 
				-        void reserve(uint desired_size);
			
 
				-        void copy(const T * data, uint count);
			
 
				-        Array<T> & operator=( const Array<T> & a );
			
 
				-        T * release();
			
 
				-
			
 
				-
			
 
				-        // Array enumerator.
			
 
				-        typedef uint PseudoIndex;
			
 
				-
			
 
				-        NV_FORCEINLINE PseudoIndex start() const { return 0; }
			
 
				-        NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
			
 
				-        NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
			
 
				-
			
 
				-#if NV_CC_MSVC
			
 
				-        NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
			
 
				-            return m_buffer[i(this)];
			
 
				-        }
			
 
				-        NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
			
 
				-            return m_buffer[i(this)];
			
 
				-        }
			
 
				-#endif
			
 
				-
			
 
				-        // Friends.
			
 
				-        template <typename Typ> 
			
 
				-        friend Stream & operator<< ( Stream & s, Array<Typ> & p );
			
 
				-
			
 
				-        template <typename Typ>
			
 
				-        friend void swap(Array<Typ> & a, Array<Typ> & b);
			
 
				-
			
 
				-
			
 
				-    protected:
			
 
				-
			
 
				-        void setArraySize(uint new_size);
			
 
				-        void setArrayCapacity(uint new_capacity);
			
 
				-
			
 
				-        T * m_buffer;
			
 
				-        uint m_capacity;
			
 
				-        uint m_size;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_ARRAY_H
			
--- a/3rdparty/nvtt/nvcore/array.inl
+++ b/3rdparty/nvtt/nvcore/array.inl
@@ -1,437 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>

			
 
				-

			
 
				-#ifndef NV_CORE_ARRAY_INL

			
 
				-#define NV_CORE_ARRAY_INL

			
 
				-

			
 
				-#include "array.h"

			
 
				-

			
 
				-#include "stream.h"

			
 
				-#include "utils.h" // swap

			
 
				-

			
 
				-#include <string.h>	// memmove

			
 
				-#include <new> // for placement new

			
 
				-

			
 
				-

			
 
				-

			
 
				-namespace nv 

			
 
				-{

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE T & Array<T>::append()

			
 
				-    {

			
 
				-        uint old_size = m_size;

			
 
				-        uint new_size = m_size + 1;

			
 
				-

			
 
				-        setArraySize(new_size);

			
 
				-

			
 
				-        construct_range(m_buffer, new_size, old_size);

			
 
				-

			
 
				-        return m_buffer[old_size]; // Return reference to last element.

			
 
				-    }

			
 
				-

			
 
				-    // Push an element at the end of the vector.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::push_back( const T & val )

			
 
				-    {

			
 
				-#if 1

			
 
				-        nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);

			
 
				-

			
 
				-        uint old_size = m_size;

			
 
				-        uint new_size = m_size + 1;

			
 
				-

			
 
				-        setArraySize(new_size);

			
 
				-

			
 
				-        construct_range(m_buffer, new_size, old_size, val);

			
 
				-#else

			
 
				-        uint new_size = m_size + 1;

			
 
				-

			
 
				-        if (new_size > m_capacity)

			
 
				-        {

			
 
				-            // @@ Is there any way to avoid this copy?

			
 
				-            // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?

			
 
				-            // @@ Assert instead of copy?

			
 
				-            const T copy(val);	// create a copy in case value is inside of this array.

			
 
				-

			
 
				-            setArraySize(new_size);

			
 
				-

			
 
				-            new (m_buffer+new_size-1) T(copy);

			
 
				-        }

			
 
				-        else

			
 
				-        {

			
 
				-            m_size = new_size;

			
 
				-            new(m_buffer+new_size-1) T(val);

			
 
				-        }

			
 
				-#endif // 0/1

			
 
				-    }

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::pushBack( const T & val )

			
 
				-    {

			
 
				-        push_back(val);

			
 
				-    }

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )

			
 
				-    {

			
 
				-        push_back(val);

			
 
				-        return *this;

			
 
				-    }

			
 
				-

			
 
				-    // Qt like push operator.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )

			
 
				-    {

			
 
				-        push_back(t);

			
 
				-        return *this;

			
 
				-    }

			
 
				-

			
 
				-    // Pop the element at the end of the vector.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::pop_back()

			
 
				-    {

			
 
				-        nvDebugCheck( m_size > 0 );

			
 
				-        resize( m_size - 1 );

			
 
				-    }

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::popBack(uint count)

			
 
				-    {

			
 
				-        nvDebugCheck(m_size >= count);

			
 
				-        resize(m_size - count);

			
 
				-    }

			
 
				-

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::popFront(uint count)

			
 
				-    {

			
 
				-        nvDebugCheck(m_size >= count);

			
 
				-        //resize(m_size - count);

			
 
				-

			
 
				-        if (m_size == count) {

			
 
				-            clear();

			
 
				-        }

			
 
				-        else {

			
 
				-            destroy_range(m_buffer, 0, count);

			
 
				-

			
 
				-            memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));

			
 
				-

			
 
				-            m_size -= count;

			
 
				-        }

			
 
				-

			
 
				-    }

			
 
				-

			
 
				-

			
 
				-    // Get back element.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE const T & Array<T>::back() const

			
 
				-    {

			
 
				-        nvDebugCheck( m_size > 0 );

			
 
				-        return m_buffer[m_size-1];

			
 
				-    }

			
 
				-

			
 
				-    // Get back element.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE T & Array<T>::back()

			
 
				-    {

			
 
				-        nvDebugCheck( m_size > 0 );

			
 
				-        return m_buffer[m_size-1];

			
 
				-    }

			
 
				-

			
 
				-    // Get front element.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE const T & Array<T>::front() const

			
 
				-    {

			
 
				-        nvDebugCheck( m_size > 0 );

			
 
				-        return m_buffer[0];

			
 
				-    }

			
 
				-

			
 
				-    // Get front element.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE T & Array<T>::front()

			
 
				-    {

			
 
				-        nvDebugCheck( m_size > 0 );

			
 
				-        return m_buffer[0];

			
 
				-    }

			
 
				-

			
 
				-    // Check if the given element is contained in the array.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE bool Array<T>::contains(const T & e) const

			
 
				-    {

			
 
				-        return find(e, NULL);

			
 
				-    }

			
 
				-

			
 
				-    // Return true if element found.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const

			
 
				-    {

			
 
				-        return find(element, 0, m_size, indexPtr);

			
 
				-    }

			
 
				-

			
 
				-    // Return true if element found within the given range.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const

			
 
				-    {

			
 
				-        return ::nv::find(element, m_buffer, begin, end, indexPtr);

			
 
				-    }

			
 
				-

			
 
				-

			
 
				-    // Remove the element at the given index. This is an expensive operation!

			
 
				-    template <typename T>

			
 
				-    void Array<T>::removeAt(uint index)

			
 
				-    {

			
 
				-        nvDebugCheck(index >= 0 && index < m_size);

			
 
				-

			
 
				-        if (m_size == 1) {

			
 
				-            clear();

			
 
				-        }

			
 
				-        else {

			
 
				-            m_buffer[index].~T();

			
 
				-

			
 
				-            memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));

			
 
				-            m_size--;

			
 
				-        }

			
 
				-    }

			
 
				-

			
 
				-    // Remove the first instance of the given element.

			
 
				-    template <typename T>

			
 
				-    bool Array<T>::remove(const T & element)

			
 
				-    {

			
 
				-        uint index;

			
 
				-        if (find(element, &index)) {

			
 
				-            removeAt(index);

			
 
				-            return true;

			
 
				-        }

			
 
				-        return false;

			
 
				-    }

			
 
				-

			
 
				-    // Insert the given element at the given index shifting all the elements up.

			
 
				-    template <typename T>

			
 
				-    void Array<T>::insertAt(uint index, const T & val/*=T()*/)

			
 
				-    {

			
 
				-        nvDebugCheck( index >= 0 && index <= m_size );

			
 
				-

			
 
				-        setArraySize(m_size + 1);

			
 
				-

			
 
				-        if (index < m_size - 1) {

			
 
				-            memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));

			
 
				-        }

			
 
				-

			
 
				-        // Copy-construct into the newly opened slot.

			
 
				-        new(m_buffer+index) T(val);

			
 
				-    }

			
 
				-

			
 
				-    // Append the given data to our vector.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::append(const Array<T> & other)

			
 
				-    {

			
 
				-        append(other.m_buffer, other.m_size);

			
 
				-    }

			
 
				-

			
 
				-    // Append the given data to our vector.

			
 
				-    template <typename T>

			
 
				-    void Array<T>::append(const T other[], uint count)

			
 
				-    {

			
 
				-        if (count > 0) {

			
 
				-            const uint old_size = m_size;

			
 
				-

			
 
				-            setArraySize(m_size + count);

			
 
				-

			
 
				-            for (uint i = 0; i < count; i++ ) {

			
 
				-                new(m_buffer + old_size + i) T(other[i]);

			
 
				-            }

			
 
				-        }

			
 
				-    }

			
 
				-

			
 
				-

			
 
				-    // Remove the given element by replacing it with the last one.

			
 
				-    template <typename T> 

			
 
				-    void Array<T>::replaceWithLast(uint index)

			
 
				-    {

			
 
				-        nvDebugCheck( index < m_size );

			
 
				-        nv::swap(m_buffer[index], back());      // @@ Is this OK when index == size-1?

			
 
				-        (m_buffer+m_size-1)->~T();

			
 
				-        m_size--;

			
 
				-    }

			
 
				-

			
 
				-    // Resize the vector preserving existing elements.

			
 
				-    template <typename T> 

			
 
				-    void Array<T>::resize(uint new_size)

			
 
				-    {

			
 
				-        uint old_size = m_size;

			
 
				-

			
 
				-        // Destruct old elements (if we're shrinking).

			
 
				-        destroy_range(m_buffer, new_size, old_size);

			
 
				-

			
 
				-        setArraySize(new_size);

			
 
				-

			
 
				-        // Call default constructors

			
 
				-        construct_range(m_buffer, new_size, old_size);

			
 
				-    }

			
 
				-

			
 
				-

			
 
				-    // Resize the vector preserving existing elements and initializing the

			
 
				-    // new ones with the given value.

			
 
				-    template <typename T> 

			
 
				-    void Array<T>::resize(uint new_size, const T & elem)

			
 
				-    {

			
 
				-        nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);

			
 
				-

			
 
				-        uint old_size = m_size;

			
 
				-

			
 
				-        // Destruct old elements (if we're shrinking).

			
 
				-        destroy_range(m_buffer, new_size, old_size);

			
 
				-

			
 
				-        setArraySize(new_size);

			
 
				-

			
 
				-        // Call copy constructors

			
 
				-        construct_range(m_buffer, new_size, old_size, elem);

			
 
				-    }

			
 
				-

			
 
				-    // Fill array with the given value.

			
 
				-    template <typename T>

			
 
				-    void Array<T>::fill(const T & elem)

			
 
				-    {

			
 
				-        fill(m_buffer, m_size, elem);

			
 
				-    }

			
 
				-

			
 
				-    // Clear the buffer.

			
 
				-    template <typename T> 

			
 
				-    NV_FORCEINLINE void Array<T>::clear()

			
 
				-    {

			
 
				-        nvDebugCheck(isValidPtr(m_buffer));

			
 
				-

			
 
				-        // Destruct old elements

			
 
				-        destroy_range(m_buffer, 0, m_size);

			
 
				-

			
 
				-        m_size = 0;

			
 
				-    }

			
 
				-

			
 
				-    // Shrink the allocated vector.

			
 
				-    template <typename T> 

			
 
				-    NV_FORCEINLINE void Array<T>::shrink()

			
 
				-    {

			
 
				-        if (m_size < m_capacity) {

			
 
				-            setArrayCapacity(m_size);

			
 
				-        }

			
 
				-    }

			
 
				-

			
 
				-    // Preallocate space.

			
 
				-    template <typename T> 

			
 
				-    NV_FORCEINLINE void Array<T>::reserve(uint desired_size)

			
 
				-    {

			
 
				-        if (desired_size > m_capacity) {

			
 
				-            setArrayCapacity(desired_size);

			
 
				-        }

			
 
				-    }

			
 
				-

			
 
				-    // Copy elements to this array. Resizes it if needed.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)

			
 
				-    {

			
 
				-#if 1   // More simple, but maybe not be as efficient?

			
 
				-        destroy_range(m_buffer, 0, m_size);

			
 
				-

			
 
				-        setArraySize(count);

			
 
				-

			
 
				-        construct_range(m_buffer, count, 0, data);

			
 
				-#else

			
 
				-        const uint old_size = m_size;

			
 
				-

			
 
				-        destroy_range(m_buffer, count, old_size);

			
 
				-

			
 
				-        setArraySize(count);

			
 
				-

			
 
				-        copy_range(m_buffer, data, old_size);

			
 
				-

			
 
				-        construct_range(m_buffer, count, old_size, data);

			
 
				-#endif

			
 
				-    }

			
 
				-

			
 
				-    // Assignment operator.

			
 
				-    template <typename T>

			
 
				-    NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )

			
 
				-    {

			
 
				-        copy(a.m_buffer, a.m_size);

			
 
				-        return *this;

			
 
				-    }

			
 
				-

			
 
				-    // Release ownership of allocated memory and returns pointer to it.

			
 
				-    template <typename T>

			
 
				-    T * Array<T>::release() {

			
 
				-        T * tmp = m_buffer;

			
 
				-        m_buffer = NULL;

			
 
				-        m_capacity = 0;

			
 
				-        m_size = 0;

			
 
				-        return tmp;

			
 
				-    }

			
 
				-

			
 
				-

			
 
				-

			
 
				-    // Change array size.

			
 
				-    template <typename T> 

			
 
				-    inline void Array<T>::setArraySize(uint new_size) {

			
 
				-        m_size = new_size;

			
 
				-

			
 
				-        if (new_size > m_capacity) {

			
 
				-            uint new_buffer_size;

			
 
				-            if (m_capacity == 0) {

			
 
				-                // first allocation is exact

			
 
				-                new_buffer_size = new_size;

			
 
				-            }

			
 
				-            else {

			
 
				-                // following allocations grow array by 25%

			
 
				-                new_buffer_size = new_size + (new_size >> 2);

			
 
				-            }

			
 
				-

			
 
				-            setArrayCapacity( new_buffer_size );

			
 
				-        }

			
 
				-    }

			
 
				-

			
 
				-    // Change array capacity.

			
 
				-    template <typename T> 

			
 
				-    inline void Array<T>::setArrayCapacity(uint new_capacity) {

			
 
				-        nvDebugCheck(new_capacity >= m_size);

			
 
				-

			
 
				-        if (new_capacity == 0) {

			
 
				-            // free the buffer.

			
 
				-            if (m_buffer != NULL) {

			
 
				-                free<T>(m_buffer);

			
 
				-                m_buffer = NULL;

			
 
				-            }

			
 
				-        }

			
 
				-        else {

			
 
				-            // realloc the buffer

			
 
				-            m_buffer = realloc<T>(m_buffer, new_capacity);

			
 
				-        }

			
 
				-

			
 
				-        m_capacity = new_capacity;

			
 
				-    }

			
 
				-

			
 
				-    // Array serialization.

			
 
				-    template <typename Typ> 

			
 
				-    inline Stream & operator<< ( Stream & s, Array<Typ> & p )

			
 
				-    {

			
 
				-        if (s.isLoading()) {

			
 
				-            uint size;

			
 
				-            s << size;

			
 
				-            p.resize( size );

			
 
				-        }

			
 
				-        else {

			
 
				-            s << p.m_size;

			
 
				-        }

			
 
				-

			
 
				-        for (uint i = 0; i < p.m_size; i++) {

			
 
				-            s << p.m_buffer[i];

			
 
				-        }

			
 
				-

			
 
				-        return s;

			
 
				-    }

			
 
				-

			
 
				-    // Swap the members of the two given vectors.

			
 
				-    template <typename Typ>

			
 
				-    inline void swap(Array<Typ> & a, Array<Typ> & b)

			
 
				-    {

			
 
				-        nv::swap(a.m_buffer, b.m_buffer);

			
 
				-        nv::swap(a.m_capacity, b.m_capacity);

			
 
				-        nv::swap(a.m_size, b.m_size);

			
 
				-    }

			
 
				-

			
 
				-

			
 
				-} // nv namespace

			
 
				-

			
 
				-#endif // NV_CORE_ARRAY_INL

			
--- a/3rdparty/nvtt/nvcore/debug.h
+++ b/3rdparty/nvtt/nvcore/debug.h
@@ -1,216 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_DEBUG_H
			
 
				-#define NV_CORE_DEBUG_H
			
 
				-
			
 
				-#include "nvcore.h"
			
 
				-
			
 
				-#include <stdarg.h> // va_list
			
 
				-
			
 
				-
			
 
				-// Make sure we are using our assert.
			
 
				-#undef assert
			
 
				-
			
 
				-#define NV_ABORT_DEBUG      1
			
 
				-#define NV_ABORT_IGNORE     2
			
 
				-#define NV_ABORT_EXIT       3
			
 
				-
			
 
				-#define nvNoAssert(exp) \
			
 
				-    NV_MULTI_LINE_MACRO_BEGIN \
			
 
				-    (void)sizeof(exp); \
			
 
				-    NV_MULTI_LINE_MACRO_END
			
 
				-
			
 
				-#if NV_NO_ASSERT
			
 
				-
			
 
				-#   define nvAssert(exp) nvNoAssert(exp)
			
 
				-#   define nvCheck(exp) nvNoAssert(exp)
			
 
				-#   define nvDebugAssert(exp) nvNoAssert(exp)
			
 
				-#   define nvDebugCheck(exp) nvNoAssert(exp)
			
 
				-#   define nvDebugBreak() nvNoAssert(0)
			
 
				-
			
 
				-#else // NV_NO_ASSERT
			
 
				-
			
 
				-#   if NV_CC_MSVC
			
 
				-        // @@ Does this work in msvc-6 and earlier?
			
 
				-#       define nvDebugBreak()       __debugbreak()
			
 
				-//#       define nvDebugBreak()        __asm { int 3 }
			
 
				-#   elif NV_OS_ORBIS
			
 
				-#       define nvDebugBreak()       __debugbreak()
			
 
				-#   elif NV_CC_GNUC
			
 
				-#       define nvDebugBreak()       __builtin_trap()
			
 
				-#   else
			
 
				-#       error "No nvDebugBreak()!"
			
 
				-#   endif
			
 
				-
			
 
				-/*
			
 
				-#   elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
			
 
				-        // @@ Use __builtin_trap() on GCC
			
 
				-#       define nvDebugBreak()       __asm__ volatile ("trap")
			
 
				-#   elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
			
 
				-#       define nvDebugBreak()       __asm__ volatile ("int3")
			
 
				-#   elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
			
 
				-#       define nvDebugBreak()       __asm__ ( "int %0" : :"I"(3) )
			
 
				-#   else
			
 
				-#       include <signal.h>
			
 
				-#       define nvDebugBreak()       raise(SIGTRAP)
			
 
				-#   endif
			
 
				-*/
			
 
				-
			
 
				-#define nvDebugBreakOnce() \
			
 
				-    NV_MULTI_LINE_MACRO_BEGIN \
			
 
				-    static bool firstTime = true; \
			
 
				-    if (firstTime) { firstTime = false; nvDebugBreak(); } \
			
 
				-    NV_MULTI_LINE_MACRO_END
			
 
				-
			
 
				-#define nvAssertMacro(exp) \
			
 
				-    NV_MULTI_LINE_MACRO_BEGIN \
			
 
				-    if (!(exp)) { \
			
 
				-        if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
			
 
				-            nvDebugBreak(); \
			
 
				-        } \
			
 
				-    } \
			
 
				-    NV_MULTI_LINE_MACRO_END
			
 
				-
			
 
				-// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
			
 
				-#define nvAssertMacroWithIgnoreAll(exp,...) \
			
 
				-    NV_MULTI_LINE_MACRO_BEGIN \
			
 
				-        static bool ignoreAll = false; \
			
 
				-        if (!ignoreAll && !(exp)) { \
			
 
				-            int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
			
 
				-            if (result == NV_ABORT_DEBUG) { \
			
 
				-                nvDebugBreak(); \
			
 
				-            } else if (result == NV_ABORT_IGNORE) { \
			
 
				-                ignoreAll = true; \
			
 
				-            } \
			
 
				-        } \
			
 
				-    NV_MULTI_LINE_MACRO_END
			
 
				-
			
 
				-// Interesting assert macro from Insomniac:
			
 
				-// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
			
 
				-// Used as follows:
			
 
				-// if (nvCheck(i < count)) {
			
 
				-//     normal path
			
 
				-// } else {
			
 
				-//     fixup code.
			
 
				-// }
			
 
				-// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
			
 
				-#define nvCheckMacro(exp) \
			
 
				-    (\
			
 
				-        (exp) ? true : ( \
			
 
				-            (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
			
 
				-        ) \
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-#define nvAssert(exp)    nvAssertMacro(exp)
			
 
				-#define nvCheck(exp)     nvAssertMacro(exp)
			
 
				-
			
 
				-#if defined(_DEBUG)
			
 
				-#   define nvDebugAssert(exp)   nvAssertMacro(exp)
			
 
				-#   define nvDebugCheck(exp)    nvAssertMacro(exp)
			
 
				-#else // _DEBUG
			
 
				-#   define nvDebugAssert(exp)   nvNoAssert(exp)
			
 
				-#   define nvDebugCheck(exp)    nvNoAssert(exp)
			
 
				-#endif // _DEBUG
			
 
				-
			
 
				-#endif // NV_NO_ASSERT
			
 
				-
			
 
				-// Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
			
 
				-/*#if !defined(_DEBUG)
			
 
				-#   if NV_CC_MSVC
			
 
				-#       define nvAssume(exp)    __assume(exp)
			
 
				-#   else
			
 
				-#       define nvAssume(exp)    nvCheck(exp)
			
 
				-#   endif
			
 
				-#else
			
 
				-#   define nvAssume(exp)    nvCheck(exp)
			
 
				-#endif*/
			
 
				-
			
 
				-#if defined(_DEBUG)
			
 
				-#  if NV_CC_MSVC
			
 
				-#   define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
			
 
				-#  else
			
 
				-#   define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
			
 
				-#  endif
			
 
				-#else
			
 
				-#  if NV_CC_MSVC
			
 
				-#   define nvUnreachable() __assume(0)
			
 
				-#  else
			
 
				-#   define nvUnreachable() __builtin_unreachable()
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-#define nvError(x)      nvAbort(x, __FILE__, __LINE__, __FUNC__)
			
 
				-#define nvWarning(x)    nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
			
 
				-
			
 
				-#ifndef NV_DEBUG_PRINT
			
 
				-#define NV_DEBUG_PRINT 1 //defined(_DEBUG)
			
 
				-#endif
			
 
				-
			
 
				-#if NV_DEBUG_PRINT
			
 
				-#define nvDebug(...)    nvDebugPrint(__VA_ARGS__)
			
 
				-#else
			
 
				-#if NV_CC_MSVC
			
 
				-#define nvDebug(...)    __noop(__VA_ARGS__)
			
 
				-#else
			
 
				-#define nvDebug(...)    ((void)0) // Non-msvc platforms do not evaluate arguments?
			
 
				-#endif
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
			
 
				-NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
			
 
				-
			
 
				-namespace nv
			
 
				-{
			
 
				-    inline bool isValidPtr(const void * ptr) {
			
 
				-    #if NV_CPU_X86_64
			
 
				-        if (ptr == NULL) return true;
			
 
				-        if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
			
 
				-        if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
			
 
				-    #else
			
 
				-	    if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
			
 
				-	    if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
			
 
				-	    if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
			
 
				-	    if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
			
 
				-    #endif
			
 
				-        return true;
			
 
				-    }
			
 
				-
			
 
				-    // Message handler interface.
			
 
				-    struct MessageHandler {
			
 
				-        virtual void log(const char * str, va_list arg) = 0;
			
 
				-        virtual ~MessageHandler() {}
			
 
				-    };
			
 
				-
			
 
				-    // Assert handler interface.
			
 
				-    struct AssertHandler {
			
 
				-        virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
			
 
				-        virtual ~AssertHandler() {}
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    namespace debug
			
 
				-    {
			
 
				-        NVCORE_API void dumpInfo();
			
 
				-        NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
			
 
				-
			
 
				-        NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
			
 
				-        NVCORE_API void resetMessageHandler();
			
 
				-
			
 
				-        NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
			
 
				-        NVCORE_API void resetAssertHandler();
			
 
				-
			
 
				-        NVCORE_API void enableSigHandler(bool interactive);
			
 
				-        NVCORE_API void disableSigHandler();
			
 
				-
			
 
				-        NVCORE_API bool isDebuggerPresent();
			
 
				-        NVCORE_API bool attachToDebugger();
			
 
				-
			
 
				-        NVCORE_API void terminate(int code);
			
 
				-    }
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_DEBUG_H
			
--- a/3rdparty/nvtt/nvcore/defsgnucdarwin.h
+++ b/3rdparty/nvtt/nvcore/defsgnucdarwin.h
@@ -1,57 +0,0 @@
 
				-#ifndef NV_CORE_H
			
 
				-#error "Do not include this file directly."
			
 
				-#endif
			
 
				-
			
 
				-#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
			
 
				-#include <stddef.h> // operator new, size_t, NULL
			
 
				-
			
 
				-#ifndef __STDC_VERSION__
			
 
				-#	define __STDC_VERSION__ 0
			
 
				-#endif // __STDC_VERSION__
			
 
				-
			
 
				-// Function linkage
			
 
				-#define DLL_IMPORT
			
 
				-#if __GNUC__ >= 4
			
 
				-#	define DLL_EXPORT __attribute__((visibility("default")))
			
 
				-#	define DLL_EXPORT_CLASS DLL_EXPORT
			
 
				-#else
			
 
				-#	define DLL_EXPORT
			
 
				-#	define DLL_EXPORT_CLASS
			
 
				-#endif
			
 
				-
			
 
				-// Function calling modes
			
 
				-#if NV_CPU_X86
			
 
				-#	define NV_CDECL 	__attribute__((cdecl))
			
 
				-#	define NV_STDCALL	__attribute__((stdcall))
			
 
				-#else
			
 
				-#	define NV_CDECL 
			
 
				-#	define NV_STDCALL
			
 
				-#endif
			
 
				-
			
 
				-#define NV_FASTCALL		__attribute__((fastcall))
			
 
				-#define NV_FORCEINLINE	inline
			
 
				-#define NV_DEPRECATED   __attribute__((deprecated))
			
 
				-#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
			
 
				-
			
 
				-#if __GNUC__ > 2
			
 
				-#define NV_PURE     __attribute__((pure))
			
 
				-#define NV_CONST    __attribute__((const))
			
 
				-#else
			
 
				-#define NV_PURE
			
 
				-#define NV_CONST
			
 
				-#endif
			
 
				-
			
 
				-#define NV_NOINLINE __attribute__((noinline))
			
 
				-
			
 
				-// Define __FUNC__ properly.
			
 
				-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
			
 
				-#	if __GNUC__ >= 2
			
 
				-#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
			
 
				-#	else
			
 
				-#		define __FUNC__ "<unknown>"
			
 
				-#	endif
			
 
				-#else
			
 
				-#	define __FUNC__ __PRETTY_FUNCTION__
			
 
				-#endif
			
 
				-
			
 
				-#define restrict    __restrict__
			
--- a/3rdparty/nvtt/nvcore/defsgnuclinux.h
+++ b/3rdparty/nvtt/nvcore/defsgnuclinux.h
@@ -1,63 +0,0 @@
 
				-#ifndef NV_CORE_H
			
 
				-#error "Do not include this file directly."
			
 
				-#endif
			
 
				-
			
 
				-#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
			
 
				-#include <stddef.h> // operator new, size_t, NULL
			
 
				-
			
 
				-#ifndef __STDC_VERSION__
			
 
				-#	define __STDC_VERSION__ 0
			
 
				-#endif
			
 
				-
			
 
				-// Function linkage
			
 
				-#define DLL_IMPORT
			
 
				-#if __GNUC__ >= 4
			
 
				-#   define DLL_EXPORT   __attribute__((visibility("default")))
			
 
				-#   define DLL_EXPORT_CLASS DLL_EXPORT
			
 
				-#else
			
 
				-#   define DLL_EXPORT
			
 
				-#   define DLL_EXPORT_CLASS
			
 
				-#endif
			
 
				-
			
 
				-// Function calling modes
			
 
				-#if NV_CPU_X86
			
 
				-#   define NV_CDECL     __attribute__((cdecl))
			
 
				-#   define NV_STDCALL   __attribute__((stdcall))
			
 
				-#else
			
 
				-#   define NV_CDECL 
			
 
				-#   define NV_STDCALL
			
 
				-#endif
			
 
				-
			
 
				-#define NV_FASTCALL     __attribute__((fastcall))
			
 
				-//#if __GNUC__ > 3
			
 
				-// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
			
 
				-#define NV_FORCEINLINE  inline
			
 
				-//#else
			
 
				-// Some compilers complain that inline and always_inline are redundant.
			
 
				-//#define NV_FORCEINLINE  __attribute__((always_inline))
			
 
				-//#endif
			
 
				-#define NV_DEPRECATED   __attribute__((deprecated))
			
 
				-#define NV_THREAD_LOCAL __thread 
			
 
				-
			
 
				-#if __GNUC__ > 2
			
 
				-#define NV_PURE     __attribute__((pure))
			
 
				-#define NV_CONST    __attribute__((const))
			
 
				-#else
			
 
				-#define NV_PURE
			
 
				-#define NV_CONST
			
 
				-#endif
			
 
				-
			
 
				-#define NV_NOINLINE __attribute__((noinline))
			
 
				-
			
 
				-// Define __FUNC__ properly.
			
 
				-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
			
 
				-#   if __GNUC__ >= 2
			
 
				-#       define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
			
 
				-#   else
			
 
				-#       define __FUNC__ "<unknown>"
			
 
				-#   endif
			
 
				-#else
			
 
				-#   define __FUNC__ __PRETTY_FUNCTION__
			
 
				-#endif
			
 
				-
			
 
				-#define restrict    __restrict__
			
--- a/3rdparty/nvtt/nvcore/defsgnucwin32.h
+++ b/3rdparty/nvtt/nvcore/defsgnucwin32.h
@@ -1,65 +0,0 @@
 
				-#ifndef NV_CORE_H
			
 
				-#error "Do not include this file directly."
			
 
				-#endif
			
 
				-
			
 
				-//#include <cstddef> // size_t, NULL
			
 
				-
			
 
				-// Function linkage
			
 
				-#define DLL_IMPORT	__declspec(dllimport)
			
 
				-#define DLL_EXPORT	__declspec(dllexport)
			
 
				-#define DLL_EXPORT_CLASS DLL_EXPORT
			
 
				-
			
 
				-// Function calling modes
			
 
				-#if NV_CPU_X86
			
 
				-#	define NV_CDECL 	__attribute__((cdecl))
			
 
				-#	define NV_STDCALL	__attribute__((stdcall))
			
 
				-#else
			
 
				-#	define NV_CDECL 
			
 
				-#	define NV_STDCALL
			
 
				-#endif
			
 
				-
			
 
				-#define NV_FASTCALL		__attribute__((fastcall))
			
 
				-#define NV_FORCEINLINE	inline
			
 
				-#define NV_DEPRECATED   __attribute__((deprecated))
			
 
				-
			
 
				-#if __GNUC__ > 2
			
 
				-#define NV_PURE		__attribute__((pure))
			
 
				-#define NV_CONST	__attribute__((const))
			
 
				-#else
			
 
				-#define NV_PURE
			
 
				-#define NV_CONST
			
 
				-#endif
			
 
				-
			
 
				-#define NV_NOINLINE __attribute__((noinline))
			
 
				-
			
 
				-// Define __FUNC__ properly.
			
 
				-#if defined(__STDC_VERSION__) && __STDC_VERSION__ < 199901L
			
 
				-#	if __GNUC__ >= 2
			
 
				-#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
			
 
				-#	else
			
 
				-#		define __FUNC__ "<unknown>"
			
 
				-#	endif
			
 
				-#else
			
 
				-#	define __FUNC__ __PRETTY_FUNCTION__
			
 
				-#endif
			
 
				-
			
 
				-#define restrict	__restrict__
			
 
				-
			
 
				-/*
			
 
				-// Type definitions
			
 
				-typedef unsigned char		uint8;
			
 
				-typedef signed char			int8;
			
 
				-
			
 
				-typedef unsigned short		uint16;
			
 
				-typedef signed short		int16;
			
 
				-
			
 
				-typedef unsigned int		uint32;
			
 
				-typedef signed int			int32;
			
 
				-
			
 
				-typedef unsigned long long	uint64;
			
 
				-typedef signed long long	int64;
			
 
				-
			
 
				-// Aliases
			
 
				-typedef uint32				uint;
			
 
				-*/
			
 
				-
			
--- a/3rdparty/nvtt/nvcore/defsvcwin32.h
+++ b/3rdparty/nvtt/nvcore/defsvcwin32.h
@@ -1,94 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_H
			
 
				-#error "Do not include this file directly."
			
 
				-#endif
			
 
				-
			
 
				-// Function linkage
			
 
				-#define DLL_IMPORT __declspec(dllimport)
			
 
				-#define DLL_EXPORT __declspec(dllexport)
			
 
				-#define DLL_EXPORT_CLASS DLL_EXPORT
			
 
				-
			
 
				-// Function calling modes
			
 
				-#define NV_CDECL        __cdecl
			
 
				-#define NV_STDCALL      __stdcall
			
 
				-#define NV_FASTCALL     __fastcall
			
 
				-#define NV_DEPRECATED
			
 
				-
			
 
				-#define NV_PURE
			
 
				-#define NV_CONST
			
 
				-
			
 
				-// Set standard function names.
			
 
				-#if _MSC_VER < 1900
			
 
				-#   define snprintf _snprintf
			
 
				-#endif
			
 
				-#if _MSC_VER < 1500
			
 
				-#   define vsnprintf _vsnprintf
			
 
				-#endif
			
 
				-#if _MSC_VER < 1700
			
 
				-#   define strtoll _strtoi64
			
 
				-#   define strtoull _strtoui64
			
 
				-#endif
			
 
				-#define chdir _chdir
			
 
				-#define getcwd _getcwd 
			
 
				-
			
 
				-#if _MSC_VER < 1800 // Not sure what version introduced this.
			
 
				-#define va_copy(a, b) (a) = (b)
			
 
				-#endif
			
 
				-
			
 
				-#if !defined restrict
			
 
				-#define restrict
			
 
				-#endif
			
 
				-
			
 
				-// Ignore gcc attributes.
			
 
				-#define __attribute__(X)
			
 
				-
			
 
				-#if !defined __FUNC__
			
 
				-#define __FUNC__ __FUNCTION__ 
			
 
				-#endif
			
 
				-
			
 
				-#define NV_NOINLINE __declspec(noinline)
			
 
				-#define NV_FORCEINLINE inline
			
 
				-
			
 
				-#define NV_THREAD_LOCAL __declspec(thread)
			
 
				-
			
 
				-/*
			
 
				-// Type definitions
			
 
				-typedef unsigned char       uint8;
			
 
				-typedef signed char         int8;
			
 
				-
			
 
				-typedef unsigned short      uint16;
			
 
				-typedef signed short        int16;
			
 
				-
			
 
				-typedef unsigned int        uint32;
			
 
				-typedef signed int          int32;
			
 
				-
			
 
				-typedef unsigned __int64    uint64;
			
 
				-typedef signed __int64      int64;
			
 
				-
			
 
				-// Aliases
			
 
				-typedef uint32              uint;
			
 
				-*/
			
 
				-
			
 
				-// Unwanted VC++ warnings to disable.
			
 
				-/*
			
 
				-#pragma warning(disable : 4244)     // conversion to float, possible loss of data
			
 
				-#pragma warning(disable : 4245)     // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
			
 
				-#pragma warning(disable : 4100)     // unreferenced formal parameter
			
 
				-#pragma warning(disable : 4514)     // unreferenced inline function has been removed
			
 
				-#pragma warning(disable : 4710)     // inline function not expanded
			
 
				-#pragma warning(disable : 4127)     // Conditional expression is constant
			
 
				-#pragma warning(disable : 4305)     // truncation from 'const double' to 'float'
			
 
				-#pragma warning(disable : 4505)     // unreferenced local function has been removed
			
 
				-
			
 
				-#pragma warning(disable : 4702)     // unreachable code in inline expanded function
			
 
				-#pragma warning(disable : 4711)     // function selected for automatic inlining
			
 
				-#pragma warning(disable : 4725)     // Pentium fdiv bug
			
 
				-
			
 
				-#pragma warning(disable : 4786)     // Identifier was truncated and cannot be debugged.
			
 
				-
			
 
				-#pragma warning(disable : 4675)     // resolved overload was found by argument-dependent lookup
			
 
				-*/
			
 
				-
			
 
				-#pragma warning(1 : 4705)     // Report unused local variables.
			
 
				-#pragma warning(1 : 4555)     // Expression has no effect.
			
--- a/3rdparty/nvtt/nvcore/foreach.h
+++ b/3rdparty/nvtt/nvcore/foreach.h
@@ -1,68 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#pragma once
			
 
				-#ifndef NV_CORE_FOREACH_H
			
 
				-#define NV_CORE_FOREACH_H
			
 
				-
			
 
				-/*
			
 
				-These foreach macros are very non-standard and somewhat confusing, but I like them.
			
 
				-*/
			
 
				-
			
 
				-#include "nvcore.h"
			
 
				-
			
 
				-#if NV_CC_GNUC // If typeof or decltype is available:
			
 
				-#if !NV_CC_CPP11
			
 
				-#   define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype
			
 
				-#else
			
 
				-#   define NV_DECLTYPE decltype
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-Ideally we would like to write this:
			
 
				-
			
 
				-#define NV_FOREACH(i, container) \
			
 
				-    for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
			
 
				-
			
 
				-But gcc versions prior to 4.7 required an intermediate type. See:
			
 
				-https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
			
 
				-*/
			
 
				-
			
 
				-#define NV_FOREACH(i, container) \
			
 
				-    typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \
			
 
				-    for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
			
 
				-
			
 
				-#else // If typeof not available:
			
 
				-
			
 
				-#include <new> // placement new
			
 
				-
			
 
				-struct PseudoIndexWrapper {
			
 
				-    template <typename T>
			
 
				-    PseudoIndexWrapper(const T & container) {
			
 
				-        nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
			
 
				-        new (memory) typename T::PseudoIndex(container.start());
			
 
				-    }
			
 
				-    // PseudoIndex cannot have a dtor!
			
 
				-
			
 
				-    template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
			
 
				-        return *reinterpret_cast<typename T::PseudoIndex *>(memory);
			
 
				-    }
			
 
				-    template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
			
 
				-        return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
			
 
				-    }
			
 
				-
			
 
				-    uint8 memory[4];	// Increase the size if we have bigger enumerators.
			
 
				-};
			
 
				-
			
 
				-#define NV_FOREACH(i, container) \
			
 
				-    for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-// Declare foreach keyword.
			
 
				-#if !defined NV_NO_USE_KEYWORDS
			
 
				-#   define foreach NV_FOREACH
			
 
				-#   define foreach_index NV_FOREACH
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-#endif // NV_CORE_FOREACH_H
			
--- a/3rdparty/nvtt/nvcore/hash.h
+++ b/3rdparty/nvtt/nvcore/hash.h
@@ -1,83 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#pragma once
			
 
				-#ifndef NV_CORE_HASH_H
			
 
				-#define NV_CORE_HASH_H
			
 
				-
			
 
				-#include "nvcore.h"
			
 
				-
			
 
				-namespace nv
			
 
				-{
			
 
				-    inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
			
 
				-    {
			
 
				-        const uint8 * data = (const uint8 *) data_in;
			
 
				-        uint i = 0;
			
 
				-        while (i < size) {
			
 
				-            h = (h << 16) + (h << 6) - h + (uint) data[i++];
			
 
				-        }
			
 
				-        return h;
			
 
				-    }
			
 
				-
			
 
				-    // Note that this hash does not handle NaN properly.
			
 
				-    inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
			
 
				-    {
			
 
				-        for (uint i = 0; i < count; i++) {
			
 
				-            //nvDebugCheck(nv::isFinite(*f));
			
 
				-            union { float f; uint32 i; } x = { f[i] };
			
 
				-            if (x.i == 0x80000000) x.i = 0;
			
 
				-            h = sdbmHash(&x, 4, h);
			
 
				-        }
			
 
				-        return h;
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-    template <typename T>
			
 
				-    inline uint hash(const T & t, uint h = 5381)
			
 
				-    {
			
 
				-        return sdbmHash(&t, sizeof(T), h);
			
 
				-    }
			
 
				-
			
 
				-    template <>
			
 
				-    inline uint hash(const float & f, uint h)
			
 
				-    {
			
 
				-        return sdbmFloatHash(&f, 1, h);
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-    // Functors for hash table:
			
 
				-    template <typename Key> struct Hash 
			
 
				-    {
			
 
				-        uint operator()(const Key & k) const {
			
 
				-            return hash(k);
			
 
				-        }
			
 
				-    };
			
 
				-
			
 
				-    template <typename Key> struct Equal
			
 
				-    {
			
 
				-        bool operator()(const Key & k0, const Key & k1) const {
			
 
				-            return k0 == k1;
			
 
				-        }
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    // @@ Move to Utils.h?
			
 
				-    template <typename T1, typename T2>
			
 
				-    struct Pair {
			
 
				-        T1 first;
			
 
				-        T2 second;
			
 
				-    };
			
 
				-
			
 
				-    template <typename T1, typename T2>
			
 
				-    bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
			
 
				-        return p0.first == p1.first && p0.second == p1.second;
			
 
				-    }
			
 
				-
			
 
				-    template <typename T1, typename T2>
			
 
				-    uint hash(const Pair<T1,T2> & p, uint h = 5381) {
			
 
				-        return hash(p.second, hash(p.first));
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_HASH_H
			
--- a/3rdparty/nvtt/nvcore/memory.h
+++ b/3rdparty/nvtt/nvcore/memory.h
@@ -1,30 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_MEMORY_H
			
 
				-#define NV_CORE_MEMORY_H
			
 
				-
			
 
				-#include "nvcore.h"
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-namespace nv {
			
 
				-
			
 
				-    // C++ helpers.
			
 
				-    template <typename T> inline T * malloc(size_t count) {
			
 
				-        return (T *)::malloc(sizeof(T) * count);
			
 
				-    }
			
 
				-
			
 
				-    template <typename T> inline T * realloc(T * ptr, size_t count) {
			
 
				-        return (T *)::realloc(ptr, sizeof(T) * count);
			
 
				-    }
			
 
				-
			
 
				-    template <typename T> inline void free(const T * ptr) {
			
 
				-        ::free((void *)ptr);
			
 
				-    }
			
 
				-
			
 
				-    template <typename T> inline void zero(T & data) {
			
 
				-        memset(&data, 0, sizeof(T));
			
 
				-    }
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_MEMORY_H
			
--- a/3rdparty/nvtt/nvcore/nvcore.h
+++ b/3rdparty/nvtt/nvcore/nvcore.h
@@ -1,363 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_H
			
 
				-#define NV_CORE_H
			
 
				-
			
 
				-#define NVCORE_SHARED 0
			
 
				-#define NV_NO_ASSERT 0
			
 
				-
			
 
				-// Function linkage
			
 
				-#if NVCORE_SHARED
			
 
				-#ifdef NVCORE_EXPORTS
			
 
				-#define NVCORE_API DLL_EXPORT
			
 
				-#define NVCORE_CLASS DLL_EXPORT_CLASS
			
 
				-#else
			
 
				-#define NVCORE_API DLL_IMPORT
			
 
				-#define NVCORE_CLASS DLL_IMPORT
			
 
				-#endif
			
 
				-#else // NVCORE_SHARED
			
 
				-#define NVCORE_API
			
 
				-#define NVCORE_CLASS
			
 
				-#endif // NVCORE_SHARED
			
 
				-
			
 
				-// Platform definitions
			
 
				-#include "posh.h"
			
 
				-
			
 
				-#define NV_OS_STRING POSH_OS_STRING
			
 
				-
			
 
				-#if defined POSH_OS_LINUX
			
 
				-#   define NV_OS_LINUX 1
			
 
				-#   define NV_OS_UNIX 1
			
 
				-#elif defined POSH_OS_ORBIS
			
 
				-#   define NV_OS_ORBIS 1
			
 
				-#elif defined POSH_OS_FREEBSD
			
 
				-#   define NV_OS_FREEBSD 1
			
 
				-#   define NV_OS_UNIX 1
			
 
				-#elif defined POSH_OS_OPENBSD
			
 
				-#   define NV_OS_OPENBSD 1
			
 
				-#   define NV_OS_UNIX 1
			
 
				-#elif defined POSH_OS_CYGWIN32
			
 
				-#   define NV_OS_CYGWIN 1
			
 
				-#elif defined POSH_OS_MINGW
			
 
				-#   define NV_OS_MINGW 1
			
 
				-#   define NV_OS_WIN32 1
			
 
				-#elif defined POSH_OS_OSX
			
 
				-#   define NV_OS_DARWIN 1
			
 
				-#   define NV_OS_UNIX 1
			
 
				-#elif defined POSH_OS_IOS
			
 
				-#   define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
			
 
				-#   define NV_OS_UNIX 1
			
 
				-#   define NV_OS_IOS 1
			
 
				-#elif defined POSH_OS_UNIX
			
 
				-#   define NV_OS_UNIX 1
			
 
				-#elif defined POSH_OS_WIN64
			
 
				-#   define NV_OS_WIN32 1
			
 
				-#   define NV_OS_WIN64 1
			
 
				-#elif defined POSH_OS_WIN32
			
 
				-#   define NV_OS_WIN32 1
			
 
				-#elif defined POSH_OS_XBOX
			
 
				-#   define NV_OS_XBOX 1
			
 
				-#else
			
 
				-#   error "Unsupported OS"
			
 
				-#endif
			
 
				-
			
 
				-#ifndef NV_OS_WIN32
			
 
				-#	define NV_OS_WIN32  0
			
 
				-#endif // NV_OS_WIN32
			
 
				-
			
 
				-#ifndef NV_OS_WIN64
			
 
				-#	define NV_OS_WIN64  0
			
 
				-#endif // NV_OS_WIN64
			
 
				-
			
 
				-#ifndef NV_OS_MINGW
			
 
				-#	define NV_OS_MINGW  0
			
 
				-#endif // NV_OS_MINGW
			
 
				-
			
 
				-#ifndef NV_OS_CYGWIN
			
 
				-#	define NV_OS_CYGWIN 0
			
 
				-#endif // NV_OS_CYGWIN
			
 
				-
			
 
				-#ifndef NV_OS_LINUX
			
 
				-#	define NV_OS_LINUX  0
			
 
				-#endif // NV_OS_LINUX
			
 
				-
			
 
				-#ifndef NV_OS_FREEBSD
			
 
				-#	define NV_OS_FREEBSD 0
			
 
				-#endif // NV_OS_FREEBSD
			
 
				-
			
 
				-#ifndef NV_OS_OPENBSD
			
 
				-#	define NV_OS_OPENBSD 0
			
 
				-#endif // NV_OS_OPENBSD
			
 
				-
			
 
				-#ifndef NV_OS_UNIX
			
 
				-#	define NV_OS_UNIX   0
			
 
				-#endif // NV_OS_UNIX
			
 
				-
			
 
				-#ifndef NV_OS_DARWIN
			
 
				-#	define NV_OS_DARWIN 0
			
 
				-#endif // NV_OS_DARWIN
			
 
				-
			
 
				-#ifndef NV_OS_XBOX
			
 
				-#	define NV_OS_XBOX   0
			
 
				-#endif // NV_OS_XBOX
			
 
				-
			
 
				-#ifndef NV_OS_ORBIS
			
 
				-#	define NV_OS_ORBIS  0
			
 
				-#endif // NV_OS_ORBIS
			
 
				-
			
 
				-#ifndef NV_OS_IOS
			
 
				-#	define NV_OS_IOS    0
			
 
				-#endif // NV_OS_IOS
			
 
				-
			
 
				-// Threading:
			
 
				-// some platforms don't implement __thread or similar for thread-local-storage
			
 
				-#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
			
 
				-#   define NV_OS_USE_PTHREAD 1
			
 
				-#   if NV_OS_DARWIN || NV_OS_IOS
			
 
				-#       define NV_OS_HAS_TLS_QUALIFIER 0
			
 
				-#   else
			
 
				-#       define NV_OS_HAS_TLS_QUALIFIER 1
			
 
				-#   endif
			
 
				-#else
			
 
				-#   define NV_OS_USE_PTHREAD 0
			
 
				-#   define NV_OS_HAS_TLS_QUALIFIER 1
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-// CPUs:
			
 
				-
			
 
				-#define NV_CPU_STRING   POSH_CPU_STRING
			
 
				-
			
 
				-#if defined POSH_CPU_X86_64
			
 
				-//#   define NV_CPU_X86 1
			
 
				-#   define NV_CPU_X86_64 1
			
 
				-#elif defined POSH_CPU_X86
			
 
				-#   define NV_CPU_X86 1
			
 
				-#elif defined POSH_CPU_PPC
			
 
				-#   define NV_CPU_PPC 1
			
 
				-#elif defined POSH_CPU_STRONGARM
			
 
				-#   define NV_CPU_ARM 1
			
 
				-#elif defined POSH_CPU_AARCH64
			
 
				-#   define NV_CPU_AARCH64 1
			
 
				-#else
			
 
				-#   error "Unsupported CPU"
			
 
				-#endif
			
 
				-
			
 
				-#ifndef NV_CPU_X86
			
 
				-#	define NV_CPU_X86     0
			
 
				-#endif // NV_CPU_X86
			
 
				-
			
 
				-#ifndef NV_CPU_X86_64
			
 
				-#	define NV_CPU_X86_64  0
			
 
				-#endif // NV_CPU_X86_64
			
 
				-
			
 
				-#ifndef NV_CPU_PPC
			
 
				-#	define NV_CPU_PPC     0
			
 
				-#endif // NV_CPU_PPC
			
 
				-
			
 
				-#ifndef NV_CPU_ARM
			
 
				-#	define NV_CPU_ARM     0
			
 
				-#endif // NV_CPU_ARM
			
 
				-
			
 
				-#ifndef NV_CPU_AARCH64
			
 
				-#	define NV_CPU_AARCH64 0
			
 
				-#endif // NV_CPU_AARCH64
			
 
				-
			
 
				-// Compiler:
			
 
				-
			
 
				-#if defined POSH_COMPILER_CLANG
			
 
				-#   define NV_CC_CLANG  1
			
 
				-#   define NV_CC_GNUC   1    // Clang is compatible with GCC.
			
 
				-#   define NV_CC_STRING "clang"
			
 
				-#	pragma clang diagnostic ignored "-Wmissing-braces"
			
 
				-#	pragma clang diagnostic ignored "-Wshadow"
			
 
				-#	pragma clang diagnostic ignored "-Wunused-local-typedef"
			
 
				-#	pragma clang diagnostic ignored "-Wunused-function"
			
 
				-#	pragma clang diagnostic ignored "-Wunused-variable"
			
 
				-#	pragma clang diagnostic ignored "-Wunused-parameter"
			
 
				-#	pragma clang diagnostic ignored "-Wsometimes-uninitialized"
			
 
				-#elif defined POSH_COMPILER_GCC
			
 
				-#   define NV_CC_GNUC   1
			
 
				-#   define NV_CC_STRING "gcc"
			
 
				-#	pragma GCC diagnostic ignored "-Wshadow"
			
 
				-#	pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
			
 
				-#	pragma GCC diagnostic ignored "-Wunused-function"
			
 
				-#	pragma GCC diagnostic ignored "-Wunused-but-set-variable"
			
 
				-#	pragma GCC diagnostic ignored "-Wunused-variable"
			
 
				-#	pragma GCC diagnostic ignored "-Wunused-parameter"
			
 
				-#	pragma GCC diagnostic ignored "-Warray-bounds"
			
 
				-#elif defined POSH_COMPILER_MSVC
			
 
				-#   define NV_CC_MSVC   1
			
 
				-#   define NV_CC_STRING "msvc"
			
 
				-#else
			
 
				-#   error "Unsupported compiler"
			
 
				-#endif
			
 
				-
			
 
				-#ifndef NV_CC_GNUC
			
 
				-#	define NV_CC_GNUC  0
			
 
				-#endif // NV_CC_GNUC
			
 
				-
			
 
				-#ifndef NV_CC_MSVC
			
 
				-#	define NV_CC_MSVC  0
			
 
				-#endif // NV_CC_MSVC
			
 
				-
			
 
				-#ifndef NV_CC_CLANG
			
 
				-#	define NV_CC_CLANG 0
			
 
				-#endif // NV_CC_CLANG
			
 
				-
			
 
				-#if NV_CC_MSVC
			
 
				-#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
			
 
				-#else
			
 
				-// @@ IC: This works in CLANG, about GCC?
			
 
				-// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
			
 
				-#ifdef __clang__
			
 
				-#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
			
 
				-#elif defined __GNUC__ 
			
 
				-#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
			
 
				-#endif
			
 
				-#endif
			
 
				-
			
 
				-// Endiannes:
			
 
				-#define NV_LITTLE_ENDIAN    POSH_LITTLE_ENDIAN
			
 
				-#define NV_BIG_ENDIAN       POSH_BIG_ENDIAN
			
 
				-#define NV_ENDIAN_STRING    POSH_ENDIAN_STRING
			
 
				-
			
 
				-
			
 
				-// Type definitions:
			
 
				-typedef posh_u8_t   uint8;
			
 
				-typedef posh_i8_t   int8;
			
 
				-
			
 
				-typedef posh_u16_t  uint16;
			
 
				-typedef posh_i16_t  int16;
			
 
				-
			
 
				-typedef posh_u32_t  uint32;
			
 
				-typedef posh_i32_t  int32;
			
 
				-
			
 
				-typedef posh_u64_t  uint64;
			
 
				-typedef posh_i64_t  int64;
			
 
				-
			
 
				-// Aliases
			
 
				-typedef uint32      uint;
			
 
				-
			
 
				-
			
 
				-// Version string:
			
 
				-#define NV_VERSION_STRING \
			
 
				-    NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
			
 
				-    NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
			
 
				-
			
 
				-
			
 
				-// Disable copy constructor and assignment operator. 
			
 
				-#if NV_CC_CPP11
			
 
				-#define NV_FORBID_COPY(C) \
			
 
				-    C( const C & ) = delete; \
			
 
				-    C &operator=( const C & ) = delete
			
 
				-#else
			
 
				-#define NV_FORBID_COPY(C) \
			
 
				-    private: \
			
 
				-    C( const C & ); \
			
 
				-    C &operator=( const C & )
			
 
				-#endif
			
 
				-
			
 
				-// Disable dynamic allocation on the heap. 
			
 
				-// See Prohibiting Heap-Based Objects in More Effective C++.
			
 
				-#define NV_FORBID_HEAPALLOC() \
			
 
				-    private: \
			
 
				-    void *operator new(size_t size); \
			
 
				-    void *operator new[](size_t size)
			
 
				-
			
 
				-// String concatenation macros.
			
 
				-#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
			
 
				-#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
			
 
				-#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
			
 
				-#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
			
 
				-#define NV_STRING2(x) #x
			
 
				-#define NV_STRING(x) NV_STRING2(x)
			
 
				-
			
 
				-#if NV_CC_MSVC
			
 
				-#define NV_MULTI_LINE_MACRO_BEGIN do {  
			
 
				-#define NV_MULTI_LINE_MACRO_END \
			
 
				-    __pragma(warning(push)) \
			
 
				-    __pragma(warning(disable:4127)) \
			
 
				-    } while(false) \
			
 
				-    __pragma(warning(pop))  
			
 
				-#else
			
 
				-#define NV_MULTI_LINE_MACRO_BEGIN do {
			
 
				-#define NV_MULTI_LINE_MACRO_END } while(false)
			
 
				-#endif
			
 
				-
			
 
				-#if NV_CC_CPP11
			
 
				-#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
			
 
				-#else
			
 
				-#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
			
 
				-#endif
			
 
				-#define NV_COMPILER_CHECK(x) nvStaticCheck(x)   // I like this name best.
			
 
				-
			
 
				-// Make sure type definitions are fine.
			
 
				-NV_COMPILER_CHECK(sizeof(int8) == 1);
			
 
				-NV_COMPILER_CHECK(sizeof(uint8) == 1);
			
 
				-NV_COMPILER_CHECK(sizeof(int16) == 2);
			
 
				-NV_COMPILER_CHECK(sizeof(uint16) == 2);
			
 
				-NV_COMPILER_CHECK(sizeof(int32) == 4);
			
 
				-NV_COMPILER_CHECK(sizeof(uint32) == 4);
			
 
				-NV_COMPILER_CHECK(sizeof(int32) == 4);
			
 
				-NV_COMPILER_CHECK(sizeof(uint32) == 4);
			
 
				-
			
 
				-
			
 
				-#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
			
 
				-
			
 
				-#if 0 // Disabled in The Witness.
			
 
				-#if NV_CC_MSVC
			
 
				-#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
			
 
				-#else
			
 
				-#define NV_MESSAGE(x) message(x)
			
 
				-#endif
			
 
				-#else
			
 
				-#define NV_MESSAGE(x) 
			
 
				-#endif
			
 
				-
			
 
				-
			
 
				-// Startup initialization macro.
			
 
				-#define NV_AT_STARTUP(some_code) \
			
 
				-    namespace { \
			
 
				-        static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
			
 
				-            NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
			
 
				-        } \
			
 
				-        NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
			
 
				-    }
			
 
				-
			
 
				-// Indicate the compiler that the parameter is not used to suppress compier warnings.
			
 
				-#define NV_UNUSED(a) ((a)=(a))
			
 
				-
			
 
				-// Null index. @@ Move this somewhere else... it's only used by nvmesh.
			
 
				-//const unsigned int NIL = unsigned int(~0);
			
 
				-//#define NIL uint(~0)
			
 
				-
			
 
				-// Null pointer.
			
 
				-#ifndef NULL
			
 
				-#define NULL 0
			
 
				-#endif
			
 
				-
			
 
				-// Platform includes
			
 
				-#if NV_CC_MSVC
			
 
				-#   if NV_OS_WIN32
			
 
				-#       include "defsvcwin32.h"
			
 
				-#   elif NV_OS_XBOX
			
 
				-#       include "defsvcxbox.h"
			
 
				-#   else
			
 
				-#       error "MSVC: Platform not supported"
			
 
				-#   endif
			
 
				-#elif NV_CC_GNUC
			
 
				-#   if NV_OS_LINUX
			
 
				-#       include "defsgnuclinux.h"
			
 
				-#   elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
			
 
				-#       include "defsgnucdarwin.h"
			
 
				-#   elif NV_OS_MINGW
			
 
				-#       include "defsgnucwin32.h"
			
 
				-#   elif NV_OS_CYGWIN
			
 
				-#       error "GCC: Cygwin not supported"
			
 
				-#   else
			
 
				-#       error "GCC: Platform not supported"
			
 
				-#   endif
			
 
				-#endif
			
 
				-
			
 
				-#endif // NV_CORE_H
			
--- a/3rdparty/nvtt/nvcore/posh.h
+++ b/3rdparty/nvtt/nvcore/posh.h
@@ -1,1030 +0,0 @@
 
				-/**
			
 
				-@file posh.h
			
 
				-@author Brian Hook
			
 
				-@version 1.3.001
			
 
				-
			
 
				-Header file for POSH, the Portable Open Source Harness project.
			
 
				-
			
 
				-NOTE: Unlike most header files, this one is designed to be included
			
 
				-multiple times, which is why it does not have the @#ifndef/@#define
			
 
				-preamble.
			
 
				-
			
 
				-POSH relies on environment specified preprocessor symbols in order
			
 
				-to infer as much as possible about the target OS/architecture and
			
 
				-the host compiler capabilities.
			
 
				-
			
 
				-NOTE: POSH is simple and focused. It attempts to provide basic
			
 
				-functionality and information, but it does NOT attempt to emulate
			
 
				-missing functionality.  I am also not willing to make POSH dirty
			
 
				-and hackish to support truly ancient and/or outmoded and/or bizarre
			
 
				-technologies such as non-ANSI compilers, systems with non-IEEE
			
 
				-floating point formats, segmented 16-bit operating systems, etc.
			
 
				-
			
 
				-Please refer to the accompanying HTML documentation or visit
			
 
				-http://www.poshlib.org for more information on how to use POSH.
			
 
				-
			
 
				-LICENSE:
			
 
				-
			
 
				-Copyright (c) 2004, Brian Hook
			
 
				-All rights reserved.
			
 
				-
			
 
				-Redistribution and use in source and binary forms, with or without
			
 
				-modification, are permitted provided that the following conditions are
			
 
				-met:
			
 
				-
			
 
				-    * Redistributions of source code must retain the above copyright
			
 
				-      notice, this list of conditions and the following disclaimer.
			
 
				-
			
 
				-    * Redistributions in binary form must reproduce the above
			
 
				-      copyright notice, this list of conditions and the following
			
 
				-      disclaimer in the documentation and/or other materials provided
			
 
				-      with the distribution.
			
 
				-
			
 
				-    * The names of this package'ss contributors contributors may not
			
 
				-      be used to endorse or promote products derived from this
			
 
				-      software without specific prior written permission.
			
 
				-
			
 
				-
			
 
				-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
 
				-
			
 
				-REVISION:
			
 
				-
			
 
				-I've been lax about revision histories, so this starts at, um, 1.3.001.
			
 
				-Sorry for any inconveniences.
			
 
				-
			
 
				-1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary,
			
 
				-                      where I was not detecting Visual Studio
			
 
				-                      compilation on x86-64 systems.  Added check for
			
 
				-                      _M_X64 which should fix that.
			
 
				-
			
 
				-*/
			
 
				-/*
			
 
				-I have yet to find an authoritative reference on preprocessor
			
 
				-symbols, but so far this is what I've gleaned:
			
 
				-
			
 
				-GNU GCC/G++:
			
 
				-   - __GNUC__: GNU C version
			
 
				-   - __GNUG__: GNU C++ compiler
			
 
				-   - __sun__ : on Sun platforms
			
 
				-   - __svr4__: on Solaris and other SysV R4 platforms
			
 
				-   - __mips__: on MIPS processor platforms
			
 
				-   - __sparc_v9__: on Sparc 64-bit CPUs
			
 
				-   - __sparcv9: 64-bit Solaris
			
 
				-   - __MIPSEL__: mips processor, compiled for little endian
			
 
				-   - __MIPSEB__: mips processor, compiled for big endian
			
 
				-   - _R5900: MIPS/Sony/Toshiba R5900 (PS2)
			
 
				-   - mc68000: 68K
			
 
				-   - m68000: 68K
			
 
				-   - m68k: 68K
			
 
				-   - __palmos__: PalmOS
			
 
				-
			
 
				-Intel C/C++ Compiler:
			
 
				-   - __ECC      : compiler version, IA64 only
			
 
				-   - __EDG__
			
 
				-   - __ELF__
			
 
				-   - __GXX_ABI_VERSION
			
 
				-   - __i386     : IA-32 only
			
 
				-   - __i386__   : IA-32 only
			
 
				-   - i386       : IA-32 only
			
 
				-   - __ia64     : IA-64 only
			
 
				-   - __ia64__   : IA-64 only
			
 
				-   - ia64       : IA-64 only
			
 
				-   - __ICC      : IA-32 only
			
 
				-   - __INTEL_COMPILER : IA-32 or IA-64, newer versions only
			
 
				-
			
 
				-Apple's C/C++ Compiler for OS X:
			
 
				-   - __APPLE_CC__
			
 
				-   - __APPLE__
			
 
				-   - __BIG_ENDIAN__
			
 
				-   - __APPLE__
			
 
				-   - __ppc__
			
 
				-   - __MACH__
			
 
				-
			
 
				-DJGPP:
			
 
				-   - __MSDOS__
			
 
				-   - __unix__
			
 
				-   - __unix
			
 
				-   - __GNUC__
			
 
				-   - __GO32
			
 
				-   - DJGPP
			
 
				-   - __i386, __i386, i386
			
 
				-
			
 
				-Cray's C compiler:
			
 
				-   - _ADDR64: if 64-bit pointers
			
 
				-   - _UNICOS: 
			
 
				-   - __unix:
			
 
				-
			
 
				-SGI's CC compiler predefines the following (and more) with -ansi:
			
 
				-   - __sgi
			
 
				-   - __unix
			
 
				-   - __host_mips
			
 
				-   - _SYSTYPE_SVR4
			
 
				-   - __mips
			
 
				-   - _MIPSEB
			
 
				-   - anyone know if there is a predefined symbol for the compiler?!
			
 
				-
			
 
				-MinGW:
			
 
				-   - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others
			
 
				-   - __MINGW32__
			
 
				-
			
 
				-Cygwin:
			
 
				-   - as Gnu C, but also
			
 
				-   - __unix__
			
 
				-   - __CYGWIN32__
			
 
				-
			
 
				-Microsoft Visual Studio predefines the following:
			
 
				-   - _MSC_VER
			
 
				-   - _WIN32: on Win32
			
 
				-   - _M_IX6 (on x86 systems)
			
 
				-   - _M_X64: on x86-64 systems
			
 
				-   - _M_ALPHA (on DEC AXP systems)
			
 
				-   - _SH3: WinCE, Hitachi SH-3
			
 
				-   - _MIPS: WinCE, MIPS
			
 
				-   - _ARM: WinCE, ARM
			
 
				-
			
 
				-Sun's C Compiler:
			
 
				-   - sun and _sun
			
 
				-   - unix and _unix
			
 
				-   - sparc and _sparc (SPARC systems only)
			
 
				-   - i386 and _i386 (x86 systems only)
			
 
				-   - __SVR4 (Solaris only)
			
 
				-   - __sparcv9: 64-bit solaris
			
 
				-   - __SUNPRO_C
			
 
				-   - _LP64: defined in 64-bit LP64 mode, but only if <sys/types.h> is included
			
 
				-
			
 
				-Borland C/C++ predefines the following:
			
 
				-   - __BORLANDC__:
			
 
				-
			
 
				-DEC/Compaq C/C++ on Alpha:
			
 
				-   - __alpha
			
 
				-   - __arch64__
			
 
				-   - __unix__ (on Tru64 Unix)
			
 
				-   - __osf__
			
 
				-   - __DECC
			
 
				-   - __DECCXX (C++ compilation)
			
 
				-   - __DECC_VER
			
 
				-   - __DECCXX_VER
			
 
				-
			
 
				-IBM's AIX compiler:
			
 
				-   - __64BIT__ if 64-bit mode
			
 
				-   - _AIX
			
 
				-   - __IBMC__: C compiler version
			
 
				-   - __IBMCPP__: C++ compiler version
			
 
				-   - _LONG_LONG: compiler allows long long
			
 
				-
			
 
				-Watcom:
			
 
				-   - __WATCOMC__
			
 
				-   - __DOS__ : if targeting DOS
			
 
				-   - __386__ : if 32-bit support
			
 
				-   - __WIN32__ : if targetin 32-bit Windows
			
 
				-
			
 
				-HP-UX C/C++ Compiler:
			
 
				-   - __hpux
			
 
				-   - __unix
			
 
				-   - __hppa (on PA-RISC)
			
 
				-   - __LP64__: if compiled in 64-bit mode
			
 
				-
			
 
				-Metrowerks:
			
 
				-   - __MWERKS__
			
 
				-   - __powerpc__
			
 
				-   - _powerc
			
 
				-   - __MC68K__
			
 
				-   - macintosh when compiling for MacOS
			
 
				-   - __INTEL__ for x86 targets
			
 
				-   - __POWERPC__
			
 
				-
			
 
				-LLVM:
			
 
				-   - __llvm__
			
 
				-   - __clang__
			
 
				-*/
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** Include <limits.h> optionally
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#ifdef POSH_USE_LIMITS_H
			
 
				-#  include <limits.h>
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** Determine compilation environment
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if defined __ECC || defined __ICC || defined __INTEL_COMPILER
			
 
				-#  define POSH_COMPILER_STRING "Intel C/C++"
			
 
				-#  define POSH_COMPILER_INTEL 1
			
 
				-#endif
			
 
				-
			
 
				-#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__
			
 
				-#  define POSH_COMPILER_STRING    "MIPSpro C/C++"
			
 
				-#  define POSH_COMPILER_MIPSPRO 1 
			
 
				-#endif
			
 
				-
			
 
				-#if defined __hpux && !defined __GNUC__
			
 
				-#  define POSH_COMPILER_STRING "HP-UX CC"
			
 
				-#  define POSH_COMPILER_HPCC 1 
			
 
				-#endif
			
 
				-
			
 
				-#if defined __clang__
			
 
				-#  define POSH_COMPILER_STRING "Clang"
			
 
				-#  define POSH_COMPILER_CLANG 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __GNUC__ && !defined __clang__
			
 
				-#  define POSH_COMPILER_STRING "Gnu GCC"
			
 
				-#  define POSH_COMPILER_GCC 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __APPLE_CC__
			
 
				-   /* we don't define the compiler string here, let it be GNU */
			
 
				-#  define POSH_COMPILER_APPLECC 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __IBMC__ || defined __IBMCPP__
			
 
				-#  define POSH_COMPILER_STRING "IBM C/C++"
			
 
				-#  define POSH_COMPILER_IBM 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined _MSC_VER
			
 
				-#  define POSH_COMPILER_STRING "Microsoft Visual C++"
			
 
				-#  define POSH_COMPILER_MSVC 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __SUNPRO_C
			
 
				-#  define POSH_COMPILER_STRING "Sun Pro" 
			
 
				-#  define POSH_COMPILER_SUN 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __BORLANDC__
			
 
				-#  define POSH_COMPILER_STRING "Borland C/C++"
			
 
				-#  define POSH_COMPILER_BORLAND 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __MWERKS__
			
 
				-#  define POSH_COMPILER_STRING     "MetroWerks CodeWarrior"
			
 
				-#  define POSH_COMPILER_METROWERKS 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __DECC || defined __DECCXX
			
 
				-#  define POSH_COMPILER_STRING "Compaq/DEC C/C++"
			
 
				-#  define POSH_COMPILER_DEC 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined __WATCOMC__
			
 
				-#  define POSH_COMPILER_STRING "Watcom C/C++"
			
 
				-#  define POSH_COMPILER_WATCOM 1
			
 
				-#endif
			
 
				-
			
 
				-#if !defined POSH_COMPILER_STRING
			
 
				-#  define POSH_COMPILER_STRING "Unknown compiler"
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** Determine target operating system
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if defined linux || defined __linux__
			
 
				-#  define POSH_OS_LINUX 1 
			
 
				-#  define POSH_OS_STRING "Linux"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __FreeBSD__
			
 
				-#  define POSH_OS_FREEBSD 1 
			
 
				-#  define POSH_OS_STRING "FreeBSD"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __OpenBSD__
			
 
				-#  define POSH_OS_OPENBSD 1
			
 
				-#  define POSH_OS_STRING "OpenBSD"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __CYGWIN32__
			
 
				-#  define POSH_OS_CYGWIN32 1
			
 
				-#  define POSH_OS_STRING "Cygwin"
			
 
				-#endif
			
 
				-
			
 
				-#if defined GEKKO
			
 
				-#  define POSH_OS_GAMECUBE
			
 
				-#  define __powerpc__
			
 
				-#  define POSH_OS_STRING "GameCube"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __MINGW32__
			
 
				-#  define POSH_OS_MINGW 1
			
 
				-#  define POSH_OS_STRING "MinGW"
			
 
				-#endif
			
 
				-
			
 
				-#if defined GO32 && defined DJGPP && defined __MSDOS__ 
			
 
				-#  define POSH_OS_GO32 1
			
 
				-#  define POSH_OS_STRING "GO32/MS-DOS"
			
 
				-#endif
			
 
				-
			
 
				-/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS,
			
 
				-   otherwise Watcom assumes host=target */
			
 
				-#if defined __WATCOMC__  && defined __386__ && defined __DOS__
			
 
				-#  define POSH_OS_DOS32 1
			
 
				-#  define POSH_OS_STRING "DOS/32-bit"
			
 
				-#endif
			
 
				-
			
 
				-#if defined _UNICOS
			
 
				-#  define POSH_OS_UNICOS 1
			
 
				-#  define POSH_OS_STRING "UNICOS"
			
 
				-#endif
			
 
				-
			
 
				-#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
			
 
				-#  define POSH_OS_OSX 1
			
 
				-#  define POSH_OS_STRING "MacOS X"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __sun__ || defined sun || defined __sun || defined __solaris__
			
 
				-#  if defined __SVR4 || defined __svr4__ || defined __solaris__
			
 
				-#     define POSH_OS_STRING "Solaris"
			
 
				-#     define POSH_OS_SOLARIS 1
			
 
				-#  endif
			
 
				-#  if !defined POSH_OS_STRING
			
 
				-#     define POSH_OS_STRING "SunOS"
			
 
				-#     define POSH_OS_SUNOS 1
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined __sgi__ || defined sgi || defined __sgi
			
 
				-#  define POSH_OS_IRIX 1
			
 
				-#  define POSH_OS_STRING "Irix"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __hpux__ || defined __hpux
			
 
				-#  define POSH_OS_HPUX 1
			
 
				-#  define POSH_OS_STRING "HP-UX"
			
 
				-#endif
			
 
				-
			
 
				-#if defined _AIX
			
 
				-#  define POSH_OS_AIX 1
			
 
				-#  define POSH_OS_STRING "AIX"
			
 
				-#endif
			
 
				-
			
 
				-#if ( defined __alpha && defined __osf__ )
			
 
				-#  define POSH_OS_TRU64 1
			
 
				-#  define POSH_OS_STRING "Tru64"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __BEOS__ || defined __beos__
			
 
				-#  define POSH_OS_BEOS 1
			
 
				-#  define POSH_OS_STRING "BeOS"
			
 
				-#endif
			
 
				-
			
 
				-#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA
			
 
				-#  define POSH_OS_AMIGA 1
			
 
				-#  define POSH_OS_STRING "Amiga"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __unix__
			
 
				-#  define POSH_OS_UNIX 1 
			
 
				-#  if !defined POSH_OS_STRING
			
 
				-#     define POSH_OS_STRING "Unix-like(generic)"
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined _WIN32_WCE
			
 
				-#  define POSH_OS_WINCE 1
			
 
				-#  define POSH_OS_STRING "Windows CE"
			
 
				-#endif
			
 
				-
			
 
				-#if defined _XBOX || defined _XBOX_VER
			
 
				-#  define POSH_OS_XBOX 1
			
 
				-#  define POSH_OS_STRING "XBOX"
			
 
				-#endif
			
 
				-
			
 
				-#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__
			
 
				-#  define POSH_OS_WIN32 1
			
 
				-#  if !defined POSH_OS_XBOX
			
 
				-#     if defined _WIN64
			
 
				-#        define POSH_OS_WIN64 1
			
 
				-#        if !defined POSH_OS_STRING
			
 
				-#           define POSH_OS_STRING "Win64"
			
 
				-#        endif // !defined POSH_OS_STRING
			
 
				-#     else
			
 
				-#        if !defined POSH_OS_STRING
			
 
				-#           define POSH_OS_STRING "Win32"
			
 
				-#        endif
			
 
				-#     endif
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined __palmos__
			
 
				-#  define POSH_OS_PALM 1
			
 
				-#  define POSH_OS_STRING "PalmOS"
			
 
				-#endif
			
 
				-
			
 
				-#if defined THINK_C || defined macintosh
			
 
				-#  define POSH_OS_MACOS 1
			
 
				-#  define POSH_OS_STRING "MacOS"
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** -----------------------------------------------------------------------------
			
 
				-** Determine target CPU
			
 
				-** -----------------------------------------------------------------------------
			
 
				-*/
			
 
				-
			
 
				-#if defined GEKKO
			
 
				-#  define POSH_CPU_PPC750 1
			
 
				-#  define POSH_CPU_STRING "IBM PowerPC 750 (NGC)"
			
 
				-#endif
			
 
				-
			
 
				-#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000
			
 
				-#  define POSH_CPU_68K 1
			
 
				-#  define POSH_CPU_STRING "MC68000"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __PPC__ || defined __POWERPC__  || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ || defined _M_PPC
			
 
				-#  define POSH_CPU_PPC 1
			
 
				-#  if !defined POSH_CPU_STRING
			
 
				-#    if defined __powerpc64__
			
 
				-#       define POSH_CPU_STRING "PowerPC64"
			
 
				-#    else
			
 
				-#       define POSH_CPU_STRING "PowerPC"
			
 
				-#    endif
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined _CRAYT3E || defined _CRAYMPP
			
 
				-#  define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/
			
 
				-#  define POSH_CPU_STRING "Cray T3E (Alpha 21164)"
			
 
				-#endif
			
 
				-
			
 
				-#if defined CRAY || defined _CRAY && !defined _CRAYT3E
			
 
				-#  error Non-AXP Cray systems not supported
			
 
				-#endif
			
 
				-
			
 
				-#if defined _SH3
			
 
				-#  define POSH_CPU_SH3 1
			
 
				-#  define POSH_CPU_STRING "Hitachi SH-3"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __sh4__ || defined __SH4__
			
 
				-#  define POSH_CPU_SH3 1
			
 
				-#  define POSH_CPU_SH4 1
			
 
				-#  define POSH_CPU_STRING "Hitachi SH-4"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __sparc__ || defined __sparc
			
 
				-#  if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__
			
 
				-#     define POSH_CPU_SPARC64 1 
			
 
				-#     define POSH_CPU_STRING "Sparc/64"
			
 
				-#  else
			
 
				-#     define POSH_CPU_STRING "Sparc/32"
			
 
				-#  endif
			
 
				-#  define POSH_CPU_SPARC 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined ARM || defined __arm__ || defined _ARM
			
 
				-#  define POSH_CPU_STRONGARM 1
			
 
				-#  define POSH_CPU_STRING "ARM"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __aarch64__
			
 
				-#  define POSH_CPU_AARCH64 1
			
 
				-#  define POSH_CPU_STRING "ARM64"
			
 
				-#endif
			
 
				-
			
 
				-#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS
			
 
				-#  define POSH_CPU_MIPS 1 
			
 
				-#  if defined _R5900
			
 
				-#    define POSH_CPU_STRING "MIPS R5900 (PS2)"
			
 
				-#  else
			
 
				-#    define POSH_CPU_STRING "MIPS"
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined __ia64 || defined _M_IA64 || defined __ia64__ 
			
 
				-#  define POSH_CPU_IA64 1
			
 
				-#  define POSH_CPU_STRING "IA64"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64
			
 
				-#  define POSH_CPU_X86 1
			
 
				-#  if defined __x86_64__ || defined _M_X64
			
 
				-#     define POSH_CPU_X86_64 1 
			
 
				-#  endif
			
 
				-#  if defined POSH_CPU_X86_64
			
 
				-#     define POSH_CPU_STRING "AMD x86-64"
			
 
				-#  else
			
 
				-#     define POSH_CPU_STRING "Intel 386+"
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__
			
 
				-#  define POSH_CPU_AXP 1
			
 
				-#  define POSH_CPU_STRING "AXP"
			
 
				-#endif
			
 
				-
			
 
				-#if defined __hppa || defined hppa
			
 
				-#  define POSH_CPU_HPPA 1
			
 
				-#  define POSH_CPU_STRING "PA-RISC"
			
 
				-#endif
			
 
				-
			
 
				-#if !defined POSH_CPU_STRING
			
 
				-#  error POSH cannot determine target CPU
			
 
				-#  define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** -----------------------------------------------------------------------------
			
 
				-** Attempt to autodetect building for embedded on Sony PS2
			
 
				-** -----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if !defined POSH_OS_STRING
			
 
				-#  if !defined FORCE_DOXYGEN
			
 
				-#    define POSH_OS_EMBEDDED 1 
			
 
				-#  endif
			
 
				-#  if defined _R5900
			
 
				-#     define POSH_OS_STRING "Sony PS2(embedded)"
			
 
				-#  else
			
 
				-#     define POSH_OS_STRING "Embedded/Unknown"
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ---------------------------------------------------------------------------
			
 
				-** Handle cdecl, stdcall, fastcall, etc.
			
 
				-** ---------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64
			
 
				-#  if defined __GNUC__
			
 
				-#     define POSH_CDECL __attribute__((cdecl))
			
 
				-#     define POSH_STDCALL __attribute__((stdcall))
			
 
				-#     define POSH_FASTCALL __attribute__((fastcall))
			
 
				-#  elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ )
			
 
				-#     define POSH_CDECL    __cdecl
			
 
				-#     define POSH_STDCALL  __stdcall
			
 
				-#     define POSH_FASTCALL __fastcall
			
 
				-#  endif
			
 
				-#else
			
 
				-#  define POSH_CDECL    
			
 
				-#  define POSH_STDCALL  
			
 
				-#  define POSH_FASTCALL 
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ---------------------------------------------------------------------------
			
 
				-** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB
			
 
				-** ---------------------------------------------------------------------------
			
 
				-*/
			
 
				-
			
 
				-/*
			
 
				-** We undefine this so that multiple inclusions will work
			
 
				-*/
			
 
				-#if defined POSH_IMPORTEXPORT
			
 
				-#  undef POSH_IMPORTEXPORT
			
 
				-#endif
			
 
				-
			
 
				-#if defined POSH_DLL
			
 
				-#   if defined POSH_OS_WIN32
			
 
				-#      if defined _MSC_VER 
			
 
				-#         if ( _MSC_VER >= 800 )
			
 
				-#            if defined POSH_BUILDING_LIB
			
 
				-#               define POSH_IMPORTEXPORT __declspec( dllexport )
			
 
				-#            else
			
 
				-#               define POSH_IMPORTEXPORT __declspec( dllimport )
			
 
				-#            endif
			
 
				-#         else
			
 
				-#            if defined POSH_BUILDING_LIB
			
 
				-#               define POSH_IMPORTEXPORT __export
			
 
				-#            else
			
 
				-#               define POSH_IMPORTEXPORT 
			
 
				-#            endif
			
 
				-#         endif
			
 
				-#      endif  /* defined _MSC_VER */
			
 
				-#      if defined __BORLANDC__
			
 
				-#         if ( __BORLANDC__ >= 0x500 )
			
 
				-#            if defined POSH_BUILDING_LIB 
			
 
				-#               define POSH_IMPORTEXPORT __declspec( dllexport )
			
 
				-#            else
			
 
				-#               define POSH_IMPORTEXPORT __declspec( dllimport )
			
 
				-#            endif
			
 
				-#         else
			
 
				-#            if defined POSH_BUILDING_LIB
			
 
				-#               define POSH_IMPORTEXPORT __export
			
 
				-#            else
			
 
				-#               define POSH_IMPORTEXPORT 
			
 
				-#            endif
			
 
				-#         endif
			
 
				-#      endif /* defined __BORLANDC__ */
			
 
				-       /* for all other compilers, we're just making a blanket assumption */
			
 
				-#      if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__
			
 
				-#         if defined POSH_BUILDING_LIB
			
 
				-#            define POSH_IMPORTEXPORT __declspec( dllexport )
			
 
				-#         else
			
 
				-#            define POSH_IMPORTEXPORT __declspec( dllimport )
			
 
				-#         endif
			
 
				-#      endif /* all other compilers */
			
 
				-#      if !defined POSH_IMPORTEXPORT
			
 
				-#         error Building DLLs not supported on this compiler ([email protected] if you know how)
			
 
				-#      endif
			
 
				-#   endif /* defined POSH_OS_WIN32 */
			
 
				-#endif
			
 
				-
			
 
				-/* On pretty much everything else, we can thankfully just ignore this */
			
 
				-#if !defined POSH_IMPORTEXPORT
			
 
				-#  define POSH_IMPORTEXPORT
			
 
				-#endif
			
 
				-
			
 
				-#if defined FORCE_DOXYGEN
			
 
				-#  define POSH_DLL    
			
 
				-#  define POSH_BUILDING_LIB
			
 
				-#  undef POSH_DLL
			
 
				-#  undef POSH_BUILDING_LIB
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** (Re)define POSH_PUBLIC_API export signature 
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#ifdef POSH_PUBLIC_API
			
 
				-#  undef POSH_PUBLIC_API
			
 
				-#endif
			
 
				-
			
 
				-#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) )
			
 
				-#  define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT 
			
 
				-#else
			
 
				-#  define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** Try to infer endianess.  Basically we just go through the CPUs we know are
			
 
				-** little endian, and assume anything that isn't one of those is big endian.
			
 
				-** As a sanity check, we also do this with operating systems we know are
			
 
				-** little endian, such as Windows.  Some processors are bi-endian, such as 
			
 
				-** the MIPS series, so we have to be careful about those.
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__
			
 
				-#  define POSH_ENDIAN_STRING "little"
			
 
				-#  define POSH_LITTLE_ENDIAN 1
			
 
				-#else
			
 
				-#  define POSH_ENDIAN_STRING "big"
			
 
				-#  define POSH_BIG_ENDIAN 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined FORCE_DOXYGEN
			
 
				-#  define POSH_LITTLE_ENDIAN
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** Cross-platform compile time assertion macro
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ]
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** 64-bit Integer
			
 
				-**
			
 
				-** We don't require 64-bit support, nor do we emulate its functionality, we
			
 
				-** simply export it if it's available.  Since we can't count on <limits.h>
			
 
				-** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive.
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64
			
 
				-#  define POSH_64BIT_INTEGER 1
			
 
				-typedef long posh_i64_t; 
			
 
				-typedef unsigned long posh_u64_t;
			
 
				-#  define POSH_I64( x ) ((posh_i64_t)x)
			
 
				-#  define POSH_U64( x ) ((posh_u64_t)x)
			
 
				-#  define POSH_I64_PRINTF_PREFIX "l"
			
 
				-#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC )
			
 
				-#  define POSH_64BIT_INTEGER 1
			
 
				-typedef __int64 posh_i64_t;
			
 
				-typedef unsigned __int64 posh_u64_t;
			
 
				-#  define POSH_I64( x ) ((posh_i64_t)(x##i64))
			
 
				-#  define POSH_U64( x ) ((posh_u64_t)(x##ui64))
			
 
				-#  define POSH_I64_PRINTF_PREFIX "I64"
			
 
				-#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC
			
 
				-#  define POSH_64BIT_INTEGER 1
			
 
				-typedef long long posh_i64_t;
			
 
				-typedef unsigned long long posh_u64_t;
			
 
				-#  define POSH_U64( x ) ((posh_u64_t)(x##LL))
			
 
				-#  define POSH_I64( x ) ((posh_i64_t)(x##LL))
			
 
				-#  define POSH_I64_PRINTF_PREFIX "ll"
			
 
				-#endif
			
 
				-
			
 
				-/* hack */
			
 
				-/*#ifdef __MINGW32__
			
 
				-#undef POSH_I64
			
 
				-#undef POSH_U64
			
 
				-#undef POSH_I64_PRINTF_PREFIX
			
 
				-#define POSH_I64( x ) ((posh_i64_t)x)
			
 
				-#define POSH_U64( x ) ((posh_u64_t)x)
			
 
				-#define POSH_I64_PRINTF_PREFIX "I64"
			
 
				-#endif*/
			
 
				-
			
 
				-#ifdef FORCE_DOXYGEN
			
 
				-typedef long long posh_i64_t;
			
 
				-typedef unsigned long posh_u64_t;
			
 
				-#  define POSH_64BIT_INTEGER
			
 
				-#  define POSH_I64_PRINTF_PREFIX
			
 
				-#  define POSH_I64(x)
			
 
				-#  define POSH_U64(x)
			
 
				-#endif
			
 
				-
			
 
				-/** Minimum value for a 64-bit signed integer */
			
 
				-#define POSH_I64_MIN  POSH_I64(0x8000000000000000)
			
 
				-/** Maximum value for a 64-bit signed integer */
			
 
				-#define POSH_I64_MAX  POSH_I64(0x7FFFFFFFFFFFFFFF)
			
 
				-/** Minimum value for a 64-bit unsigned integer */
			
 
				-#define POSH_U64_MIN  POSH_U64(0)
			
 
				-/** Maximum value for a 64-bit unsigned integer */
			
 
				-#define POSH_U64_MAX  POSH_U64(0xFFFFFFFFFFFFFFFF)
			
 
				-
			
 
				-/* ----------------------------------------------------------------------------
			
 
				-** Basic Sized Types
			
 
				-**
			
 
				-** These types are expected to be EXACTLY sized so you can use them for
			
 
				-** serialization.
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#define POSH_FALSE 0 
			
 
				-#define POSH_TRUE  1 
			
 
				-
			
 
				-typedef int            posh_bool_t;
			
 
				-typedef unsigned char  posh_byte_t;
			
 
				-
			
 
				-/* NOTE: These assume that CHAR_BIT is 8!! */
			
 
				-typedef unsigned char  posh_u8_t;
			
 
				-typedef signed char    posh_i8_t;
			
 
				-
			
 
				-#if defined POSH_USE_LIMITS_H
			
 
				-#  if CHAR_BITS > 8
			
 
				-#    error This machine uses 9-bit characters.  This is a warning, you can comment this out now.
			
 
				-#  endif /* CHAR_BITS > 8 */
			
 
				-
			
 
				-/* 16-bit */
			
 
				-#  if ( USHRT_MAX == 65535 ) 
			
 
				-   typedef unsigned short posh_u16_t;
			
 
				-   typedef short          posh_i16_t;
			
 
				-#  else
			
 
				-   /* Yes, in theory there could still be a 16-bit character type and shorts are
			
 
				-      32-bits in size...if you find such an architecture, let me know =P */
			
 
				-#    error No 16-bit type found
			
 
				-#  endif
			
 
				-
			
 
				-/* 32-bit */
			
 
				-#  if ( INT_MAX == 2147483647 )
			
 
				-  typedef unsigned       posh_u32_t;
			
 
				-  typedef int            posh_i32_t;
			
 
				-#  elif ( LONG_MAX == 2147483647 )
			
 
				-  typedef unsigned long  posh_u32_t;
			
 
				-  typedef long           posh_i32_t;
			
 
				-#  else
			
 
				-      error No 32-bit type found
			
 
				-#  endif
			
 
				-
			
 
				-#else /* POSH_USE_LIMITS_H */
			
 
				-
			
 
				-  typedef unsigned short posh_u16_t;
			
 
				-  typedef short          posh_i16_t;
			
 
				-
			
 
				-#  if !defined POSH_OS_PALM
			
 
				-  typedef unsigned       posh_u32_t;
			
 
				-  typedef int            posh_i32_t;
			
 
				-#  else
			
 
				-  typedef unsigned long  posh_u32_t;
			
 
				-  typedef long           posh_i32_t;
			
 
				-#  endif
			
 
				-#endif
			
 
				-
			
 
				-/** Minimum value for a byte */
			
 
				-#define POSH_BYTE_MIN    0
			
 
				-/** Maximum value for an 8-bit unsigned value */
			
 
				-#define POSH_BYTE_MAX    255
			
 
				-/** Minimum value for a byte */
			
 
				-#define POSH_I16_MIN     ( ( posh_i16_t ) 0x8000 )
			
 
				-/** Maximum value for a 16-bit signed value */
			
 
				-#define POSH_I16_MAX     ( ( posh_i16_t ) 0x7FFF ) 
			
 
				-/** Minimum value for a 16-bit unsigned value */
			
 
				-#define POSH_U16_MIN     0
			
 
				-/** Maximum value for a 16-bit unsigned value */
			
 
				-#define POSH_U16_MAX     ( ( posh_u16_t ) 0xFFFF )
			
 
				-/** Minimum value for a 32-bit signed value */
			
 
				-#define POSH_I32_MIN     ( ( posh_i32_t ) 0x80000000 )
			
 
				-/** Maximum value for a 32-bit signed value */
			
 
				-#define POSH_I32_MAX     ( ( posh_i32_t ) 0x7FFFFFFF )
			
 
				-/** Minimum value for a 32-bit unsigned value */
			
 
				-#define POSH_U32_MIN     0
			
 
				-/** Maximum value for a 32-bit unsigned value */
			
 
				-#define POSH_U32_MAX     ( ( posh_u32_t ) 0xFFFFFFFF )
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** Sanity checks on expected sizes
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if !defined FORCE_DOXYGEN
			
 
				-
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1);
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1);
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1);
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2);
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2);
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4);
			
 
				-POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4);
			
 
				-
			
 
				-#if !defined POSH_NO_FLOAT
			
 
				-   POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 );
			
 
				-   POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8);
			
 
				-#endif
			
 
				-
			
 
				-#if defined POSH_64BIT_INTEGER
			
 
				-   POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8);
			
 
				-   POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8);
			
 
				-#endif
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** 64-bit pointer support
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX )
			
 
				-#  define POSH_64BIT_POINTER 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX
			
 
				-#  define POSH_64BIT_POINTER 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
			
 
				-#   define POSH_64BIT_POINTER 1
			
 
				-#endif
			
 
				-
			
 
				-#if defined POSH_64BIT_POINTER
			
 
				-   POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 );
			
 
				-#elif !defined FORCE_DOXYGEN
			
 
				-/* if this assertion is hit then you're on a system that either has 64-bit
			
 
				-   addressing and we didn't catch it, or you're on a system with 16-bit
			
 
				-   pointers.  In the latter case, POSH doesn't actually care, we're just
			
 
				-   triggering this assertion to make sure you're aware of the situation,
			
 
				-   so feel free to delete it.
			
 
				-
			
 
				-   If this assertion is triggered on a known 32 or 64-bit platform, 
			
 
				-   please let us know ([email protected]) */
			
 
				-   POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 );
			
 
				-#endif
			
 
				-
			
 
				-#if defined FORCE_DOXYGEN
			
 
				-#  define POSH_64BIT_POINTER
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				-** ----------------------------------------------------------------------------
			
 
				-** POSH Utility Functions
			
 
				-**
			
 
				-** These are optional POSH utility functions that are not required if you don't
			
 
				-** need anything except static checking of your host and target environment.
			
 
				-** 
			
 
				-** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want
			
 
				-** to enforce their export if your own library is only using them internally.
			
 
				-** ----------------------------------------------------------------------------
			
 
				-*/
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-const char *POSH_GetArchString( void );
			
 
				-
			
 
				-#if !defined POSH_NO_FLOAT
			
 
				-
			
 
				-posh_u32_t  POSH_LittleFloatBits( float f );
			
 
				-posh_u32_t  POSH_BigFloatBits( float f );
			
 
				-float       POSH_FloatFromLittleBits( posh_u32_t bits );
			
 
				-float       POSH_FloatFromBigBits( posh_u32_t bits );
			
 
				-
			
 
				-void        POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] );
			
 
				-double      POSH_DoubleFromBits( const posh_byte_t src[ 8 ] );
			
 
				-
			
 
				-/* unimplemented
			
 
				-float      *POSH_WriteFloatToLittle( void *dst, float f );
			
 
				-float      *POSH_WriteFloatToBig( void *dst, float f );
			
 
				-float       POSH_ReadFloatFromLittle( const void *src );
			
 
				-float       POSH_ReadFloatFromBig( const void *src );
			
 
				-
			
 
				-double     *POSH_WriteDoubleToLittle( void *dst, double d );
			
 
				-double     *POSH_WriteDoubleToBig( void *dst, double d );
			
 
				-double      POSH_ReadDoubleFromLittle( const void *src );
			
 
				-double      POSH_ReadDoubleFromBig( const void *src );
			
 
				-*/
			
 
				-#endif /* !defined POSH_NO_FLOAT */
			
 
				-
			
 
				-#if defined FORCE_DOXYGEN
			
 
				-#  define POSH_NO_FLOAT
			
 
				-#  undef  POSH_NO_FLOAT
			
 
				-#endif
			
 
				-
			
 
				-extern posh_u16_t  POSH_SwapU16( posh_u16_t u );
			
 
				-extern posh_i16_t  POSH_SwapI16( posh_i16_t u );
			
 
				-extern posh_u32_t  POSH_SwapU32( posh_u32_t u );
			
 
				-extern posh_i32_t  POSH_SwapI32( posh_i32_t u );
			
 
				-
			
 
				-#if defined POSH_64BIT_INTEGER
			
 
				-
			
 
				-extern posh_u64_t  POSH_SwapU64( posh_u64_t u );
			
 
				-extern posh_i64_t  POSH_SwapI64( posh_i64_t u );
			
 
				-
			
 
				-#endif /*POSH_64BIT_INTEGER */
			
 
				-
			
 
				-extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value );
			
 
				-extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value );
			
 
				-extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value );
			
 
				-extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value );
			
 
				-
			
 
				-extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value );
			
 
				-extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value );
			
 
				-extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value );
			
 
				-extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value );
			
 
				-
			
 
				-extern posh_u16_t  POSH_ReadU16FromLittle( const void *src );
			
 
				-extern posh_i16_t  POSH_ReadI16FromLittle( const void *src );
			
 
				-extern posh_u32_t  POSH_ReadU32FromLittle( const void *src );
			
 
				-extern posh_i32_t  POSH_ReadI32FromLittle( const void *src );
			
 
				-
			
 
				-extern posh_u16_t  POSH_ReadU16FromBig( const void *src );
			
 
				-extern posh_i16_t  POSH_ReadI16FromBig( const void *src );
			
 
				-extern posh_u32_t  POSH_ReadU32FromBig( const void *src );
			
 
				-extern posh_i32_t  POSH_ReadI32FromBig( const void *src );
			
 
				-
			
 
				-#if defined POSH_64BIT_INTEGER
			
 
				-extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value );
			
 
				-extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value );
			
 
				-extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value );
			
 
				-extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value );
			
 
				-
			
 
				-extern posh_u64_t  POSH_ReadU64FromLittle( const void *src );
			
 
				-extern posh_i64_t  POSH_ReadI64FromLittle( const void *src );
			
 
				-extern posh_u64_t  POSH_ReadU64FromBig( const void *src );
			
 
				-extern posh_i64_t  POSH_ReadI64FromBig( const void *src );
			
 
				-#endif /* POSH_64BIT_INTEGER */
			
 
				-
			
 
				-#if defined POSH_LITTLE_ENDIAN
			
 
				-
			
 
				-#  define POSH_LittleU16(x) (x)
			
 
				-#  define POSH_LittleU32(x) (x)
			
 
				-#  define POSH_LittleI16(x) (x)
			
 
				-#  define POSH_LittleI32(x) (x)
			
 
				-#  if defined POSH_64BIT_INTEGER
			
 
				-#    define POSH_LittleU64(x) (x)
			
 
				-#    define POSH_LittleI64(x) (x)
			
 
				-#  endif /* defined POSH_64BIT_INTEGER */
			
 
				-
			
 
				-#  define POSH_BigU16(x) POSH_SwapU16(x)
			
 
				-#  define POSH_BigU32(x) POSH_SwapU32(x)
			
 
				-#  define POSH_BigI16(x) POSH_SwapI16(x)
			
 
				-#  define POSH_BigI32(x) POSH_SwapI32(x)
			
 
				-#  if defined POSH_64BIT_INTEGER
			
 
				-#    define POSH_BigU64(x) POSH_SwapU64(x)
			
 
				-#    define POSH_BigI64(x) POSH_SwapI64(x)
			
 
				-#  endif /* defined POSH_64BIT_INTEGER */
			
 
				-
			
 
				-#else
			
 
				-
			
 
				-#  define POSH_BigU16(x) (x)
			
 
				-#  define POSH_BigU32(x) (x)
			
 
				-#  define POSH_BigI16(x) (x)
			
 
				-#  define POSH_BigI32(x) (x)
			
 
				-
			
 
				-#  if defined POSH_64BIT_INTEGER
			
 
				-#    define POSH_BigU64(x) (x)
			
 
				-#    define POSH_BigI64(x) (x)
			
 
				-#  endif /* POSH_64BIT_INTEGER */
			
 
				-
			
 
				-#  define POSH_LittleU16(x) POSH_SwapU16(x)
			
 
				-#  define POSH_LittleU32(x) POSH_SwapU32(x)
			
 
				-#  define POSH_LittleI16(x) POSH_SwapI16(x)
			
 
				-#  define POSH_LittleI32(x) POSH_SwapI32(x)
			
 
				-
			
 
				-#  if defined POSH_64BIT_INTEGER
			
 
				-#    define POSH_LittleU64(x) POSH_SwapU64(x)
			
 
				-#    define POSH_LittleI64(x) POSH_SwapI64(x)
			
 
				-#  endif /* POSH_64BIT_INTEGER */
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}
			
 
				-#endif
			
--- a/3rdparty/nvtt/nvcore/stdstream.h
+++ b/3rdparty/nvtt/nvcore/stdstream.h
@@ -1,459 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#include "nvcore.h"
			
 
				-#include "stream.h"
			
 
				-#include "array.h"
			
 
				-
			
 
				-#include <stdio.h> // fopen
			
 
				-#include <string.h> // memcpy
			
 
				-
			
 
				-namespace nv
			
 
				-{
			
 
				-
			
 
				-    // Portable version of fopen.
			
 
				-    inline FILE * fileOpen(const char * fileName, const char * mode)
			
 
				-    {
			
 
				-        nvCheck(fileName != NULL);
			
 
				-#if NV_CC_MSVC && _MSC_VER >= 1400
			
 
				-        FILE * fp;
			
 
				-        if (fopen_s(&fp, fileName, mode) == 0) {
			
 
				-            return fp;
			
 
				-        }
			
 
				-        return NULL;
			
 
				-#else
			
 
				-        return fopen(fileName, mode);
			
 
				-#endif
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-    /// Base stdio stream.
			
 
				-    class NVCORE_CLASS StdStream : public Stream
			
 
				-    {
			
 
				-        NV_FORBID_COPY(StdStream);
			
 
				-    public:
			
 
				-
			
 
				-        /// Ctor.
			
 
				-        StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
			
 
				-
			
 
				-        /// Dtor. 
			
 
				-        virtual ~StdStream()
			
 
				-        {
			
 
				-            if( m_fp != NULL && m_autoclose ) {
			
 
				-#if NV_OS_WIN32
			
 
				-                _fclose_nolock( m_fp );
			
 
				-#else
			
 
				-                fclose( m_fp );
			
 
				-#endif
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-        /** @name Stream implementation. */
			
 
				-        //@{
			
 
				-        virtual void seek( uint pos )
			
 
				-        {
			
 
				-            nvDebugCheck(m_fp != NULL);
			
 
				-            nvDebugCheck(pos <= size());
			
 
				-#if NV_OS_WIN32
			
 
				-            _fseek_nolock(m_fp, pos, SEEK_SET);
			
 
				-#else
			
 
				-            fseek(m_fp, pos, SEEK_SET);
			
 
				-#endif
			
 
				-        }
			
 
				-
			
 
				-        virtual uint tell() const
			
 
				-        {
			
 
				-            nvDebugCheck(m_fp != NULL);
			
 
				-#if NV_OS_WIN32
			
 
				-            return _ftell_nolock(m_fp);
			
 
				-#else
			
 
				-            return (uint)ftell(m_fp);
			
 
				-#endif
			
 
				-        }
			
 
				-
			
 
				-        virtual uint size() const
			
 
				-        {
			
 
				-            nvDebugCheck(m_fp != NULL);
			
 
				-#if NV_OS_WIN32
			
 
				-            uint pos = _ftell_nolock(m_fp);
			
 
				-            _fseek_nolock(m_fp, 0, SEEK_END);
			
 
				-            uint end = _ftell_nolock(m_fp);
			
 
				-            _fseek_nolock(m_fp, pos, SEEK_SET);
			
 
				-#else
			
 
				-            uint pos = (uint)ftell(m_fp);
			
 
				-            fseek(m_fp, 0, SEEK_END);
			
 
				-            uint end = (uint)ftell(m_fp);
			
 
				-            fseek(m_fp, pos, SEEK_SET);
			
 
				-#endif
			
 
				-            return end;
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isError() const
			
 
				-        {
			
 
				-            return m_fp == NULL || ferror( m_fp ) != 0;
			
 
				-        }
			
 
				-
			
 
				-        virtual void clearError()
			
 
				-        {
			
 
				-            nvDebugCheck(m_fp != NULL);
			
 
				-            clearerr(m_fp);
			
 
				-        }
			
 
				-
			
 
				-        // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. 
			
 
				-        // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better 
			
 
				-        // implementation uses use ftell and fseek to determine our location within the file.
			
 
				-        virtual bool isAtEnd() const
			
 
				-        {
			
 
				-            if (m_fp == NULL) return true;
			
 
				-            //nvDebugCheck(m_fp != NULL);
			
 
				-            //return feof( m_fp ) != 0;
			
 
				-#if NV_OS_WIN32
			
 
				-            uint pos = _ftell_nolock(m_fp);
			
 
				-            _fseek_nolock(m_fp, 0, SEEK_END);
			
 
				-            uint end = _ftell_nolock(m_fp);
			
 
				-            _fseek_nolock(m_fp, pos, SEEK_SET);
			
 
				-#else
			
 
				-            uint pos = (uint)ftell(m_fp);
			
 
				-            fseek(m_fp, 0, SEEK_END);
			
 
				-            uint end = (uint)ftell(m_fp);
			
 
				-            fseek(m_fp, pos, SEEK_SET);
			
 
				-#endif
			
 
				-            return pos == end;
			
 
				-        }
			
 
				-
			
 
				-        /// Always true.
			
 
				-        virtual bool isSeekable() const { return true; }
			
 
				-        //@}
			
 
				-
			
 
				-    protected:
			
 
				-
			
 
				-        FILE * m_fp;
			
 
				-        bool m_autoclose;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    /// Standard output stream.
			
 
				-    class NVCORE_CLASS StdOutputStream : public StdStream
			
 
				-    {
			
 
				-        NV_FORBID_COPY(StdOutputStream);
			
 
				-    public:
			
 
				-
			
 
				-        /// Construct stream by file name.
			
 
				-        StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
			
 
				-
			
 
				-        /// Construct stream by file handle.
			
 
				-        StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
			
 
				-        {
			
 
				-        }
			
 
				-
			
 
				-        /** @name Stream implementation. */
			
 
				-        //@{
			
 
				-        /// Write data.
			
 
				-        virtual uint serialize( void * data, uint len )
			
 
				-        {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            nvDebugCheck(m_fp != NULL);
			
 
				-#if NV_OS_WIN32
			
 
				-            return (uint)_fwrite_nolock(data, 1, len, m_fp);
			
 
				-#elif NV_OS_LINUX
			
 
				-            return (uint)fwrite_unlocked(data, 1, len, m_fp);
			
 
				-#elif NV_OS_DARWIN
			
 
				-            // @@ No error checking, always returns len.
			
 
				-            for (uint i = 0; i < len; i++) {
			
 
				-                putc_unlocked(((char *)data)[i], m_fp);
			
 
				-            }
			
 
				-            return len;
			
 
				-#else
			
 
				-            return (uint)fwrite(data, 1, len, m_fp);
			
 
				-#endif
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isLoading() const
			
 
				-        {
			
 
				-            return false;
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isSaving() const
			
 
				-        {
			
 
				-            return true;
			
 
				-        }
			
 
				-        //@}
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    /// Standard input stream.
			
 
				-    class NVCORE_CLASS StdInputStream : public StdStream
			
 
				-    {
			
 
				-        NV_FORBID_COPY(StdInputStream);
			
 
				-    public:
			
 
				-
			
 
				-        /// Construct stream by file name.
			
 
				-        StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
			
 
				-
			
 
				-        /// Construct stream by file handle.
			
 
				-        StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
			
 
				-        {
			
 
				-        }
			
 
				-
			
 
				-        /** @name Stream implementation. */
			
 
				-        //@{
			
 
				-        /// Read data.
			
 
				-        virtual uint serialize( void * data, uint len )
			
 
				-        {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            nvDebugCheck(m_fp != NULL);
			
 
				-#if NV_OS_WIN32
			
 
				-            return (uint)_fread_nolock(data, 1, len, m_fp);
			
 
				-#elif NV_OS_LINUX
			
 
				-            return (uint)fread_unlocked(data, 1, len, m_fp);
			
 
				-#elif NV_OS_DARWIN
			
 
				-            // @@ No error checking, always returns len.
			
 
				-            for (uint i = 0; i < len; i++) {
			
 
				-                ((char *)data)[i] = getc_unlocked(m_fp);
			
 
				-            }
			
 
				-            return len;
			
 
				-#else
			
 
				-            return (uint)fread(data, 1, len, m_fp);
			
 
				-#endif
			
 
				-            
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isLoading() const
			
 
				-        {
			
 
				-            return true;
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isSaving() const
			
 
				-        {
			
 
				-            return false;
			
 
				-        }
			
 
				-        //@}
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-
			
 
				-    /// Memory input stream.
			
 
				-    class NVCORE_CLASS MemoryInputStream : public Stream
			
 
				-    {
			
 
				-        NV_FORBID_COPY(MemoryInputStream);
			
 
				-    public:
			
 
				-
			
 
				-        /// Ctor.
			
 
				-        MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
			
 
				-
			
 
				-        /** @name Stream implementation. */
			
 
				-        //@{
			
 
				-        /// Read data.
			
 
				-        virtual uint serialize( void * data, uint len )
			
 
				-        {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            nvDebugCheck(!isError());
			
 
				-
			
 
				-            uint left = m_size - tell();
			
 
				-            if (len > left) len = left;
			
 
				-
			
 
				-            memcpy( data, m_ptr, len );
			
 
				-            m_ptr += len;
			
 
				-
			
 
				-            return len;
			
 
				-        }
			
 
				-
			
 
				-        virtual void seek( uint pos )
			
 
				-        {
			
 
				-            nvDebugCheck(!isError());
			
 
				-            m_ptr = m_mem + pos;
			
 
				-            nvDebugCheck(!isError());
			
 
				-        }
			
 
				-
			
 
				-        virtual uint tell() const
			
 
				-        {
			
 
				-            nvDebugCheck(m_ptr >= m_mem);
			
 
				-            return uint(m_ptr - m_mem);
			
 
				-        }
			
 
				-
			
 
				-        virtual uint size() const
			
 
				-        {
			
 
				-            return m_size;
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isError() const
			
 
				-        {
			
 
				-            return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
			
 
				-        }
			
 
				-
			
 
				-        virtual void clearError()
			
 
				-        {
			
 
				-            // Nothing to do.
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isAtEnd() const
			
 
				-        {
			
 
				-            return m_ptr == m_mem + m_size;
			
 
				-        }
			
 
				-
			
 
				-        /// Always true.
			
 
				-        virtual bool isSeekable() const
			
 
				-        {
			
 
				-            return true;
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isLoading() const
			
 
				-        {
			
 
				-            return true;
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isSaving() const
			
 
				-        {
			
 
				-            return false;
			
 
				-        }
			
 
				-        //@}
			
 
				-
			
 
				-        const uint8 * ptr() const { return m_ptr; }
			
 
				-
			
 
				-
			
 
				-    private:
			
 
				-
			
 
				-        const uint8 * m_mem;
			
 
				-        const uint8 * m_ptr;
			
 
				-        uint m_size;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    /// Buffer output stream.
			
 
				-    class NVCORE_CLASS BufferOutputStream : public Stream
			
 
				-    {
			
 
				-        NV_FORBID_COPY(BufferOutputStream);
			
 
				-    public:
			
 
				-
			
 
				-        BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
			
 
				-
			
 
				-        virtual uint serialize( void * data, uint len )
			
 
				-        {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            m_buffer.append((uint8 *)data, len);
			
 
				-            return len;
			
 
				-        }
			
 
				-
			
 
				-        virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
			
 
				-        virtual uint tell() const { return m_buffer.size(); }
			
 
				-        virtual uint size() const { return m_buffer.size(); }
			
 
				-
			
 
				-        virtual bool isError() const { return false; }
			
 
				-        virtual void clearError() {}
			
 
				-
			
 
				-        virtual bool isAtEnd() const { return true; }
			
 
				-        virtual bool isSeekable() const { return false; }
			
 
				-        virtual bool isLoading() const { return false; }
			
 
				-        virtual bool isSaving() const { return true; }
			
 
				-
			
 
				-    private:
			
 
				-        Array<uint8> & m_buffer;
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    /// Protected input stream.
			
 
				-    class NVCORE_CLASS ProtectedStream : public Stream
			
 
				-    {
			
 
				-        NV_FORBID_COPY(ProtectedStream);
			
 
				-    public:
			
 
				-
			
 
				-        /// Ctor.
			
 
				-        ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
			
 
				-        { 
			
 
				-        }
			
 
				-
			
 
				-        /// Ctor.
			
 
				-        ProtectedStream( Stream * s, bool autodelete = true ) : 
			
 
				-        m_s(s), m_autodelete(autodelete) 
			
 
				-        {
			
 
				-            nvDebugCheck(m_s != NULL);
			
 
				-        }
			
 
				-
			
 
				-        /// Dtor.
			
 
				-        virtual ~ProtectedStream()
			
 
				-        {
			
 
				-            if( m_autodelete ) {
			
 
				-                delete m_s;
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        /** @name Stream implementation. */
			
 
				-        //@{
			
 
				-        /// Read data.
			
 
				-        virtual uint serialize( void * data, uint len )
			
 
				-        {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            len = m_s->serialize( data, len );
			
 
				-
			
 
				-            if( m_s->isError() ) {
			
 
				-                throw;
			
 
				-            }
			
 
				-
			
 
				-            return len;
			
 
				-        }
			
 
				-
			
 
				-        virtual void seek( uint pos )
			
 
				-        {
			
 
				-            m_s->seek( pos );
			
 
				-
			
 
				-            if( m_s->isError() ) {
			
 
				-                throw;
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        virtual uint tell() const
			
 
				-        {
			
 
				-            return m_s->tell();
			
 
				-        }
			
 
				-
			
 
				-        virtual uint size() const
			
 
				-        {
			
 
				-            return m_s->size();
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isError() const
			
 
				-        {
			
 
				-            return m_s->isError();
			
 
				-        }
			
 
				-
			
 
				-        virtual void clearError()
			
 
				-        {
			
 
				-            m_s->clearError();
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isAtEnd() const
			
 
				-        {
			
 
				-            return m_s->isAtEnd();
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isSeekable() const
			
 
				-        {
			
 
				-            return m_s->isSeekable();
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isLoading() const
			
 
				-        {
			
 
				-            return m_s->isLoading();
			
 
				-        }
			
 
				-
			
 
				-        virtual bool isSaving() const
			
 
				-        {
			
 
				-            return m_s->isSaving();
			
 
				-        }
			
 
				-        //@}
			
 
				-
			
 
				-
			
 
				-    private:
			
 
				-
			
 
				-        Stream * const m_s;
			
 
				-        bool const m_autodelete;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-
			
 
				-//#endif // NV_CORE_STDSTREAM_H
			
--- a/3rdparty/nvtt/nvcore/stream.h
+++ b/3rdparty/nvtt/nvcore/stream.h
@@ -1,163 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_STREAM_H
			
 
				-#define NV_CORE_STREAM_H
			
 
				-
			
 
				-#include "nvcore.h"
			
 
				-#include "debug.h"
			
 
				-
			
 
				-namespace nv
			
 
				-{
			
 
				-
			
 
				-    /// Base stream class.
			
 
				-    class NVCORE_CLASS Stream {
			
 
				-    public:
			
 
				-
			
 
				-        enum ByteOrder {
			
 
				-            LittleEndian = false,
			
 
				-            BigEndian = true,
			
 
				-        };
			
 
				-
			
 
				-        /// Get the byte order of the system.
			
 
				-        static ByteOrder getSystemByteOrder() { 
			
 
				-#if NV_LITTLE_ENDIAN
			
 
				-            return LittleEndian;
			
 
				-#else
			
 
				-            return BigEndian;
			
 
				-#endif
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-        /// Ctor.
			
 
				-        Stream() : m_byteOrder(LittleEndian) { }
			
 
				-
			
 
				-        /// Virtual destructor.
			
 
				-        virtual ~Stream() {}
			
 
				-
			
 
				-        /// Set byte order.
			
 
				-        void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
			
 
				-
			
 
				-        /// Get byte order.
			
 
				-        ByteOrder byteOrder() const { return m_byteOrder; }
			
 
				-
			
 
				-
			
 
				-        /// Serialize the given data.
			
 
				-        virtual uint serialize( void * data, uint len ) = 0;
			
 
				-
			
 
				-        /// Move to the given position in the archive.
			
 
				-        virtual void seek( uint pos ) = 0;
			
 
				-
			
 
				-        /// Return the current position in the archive.
			
 
				-        virtual uint tell() const = 0;
			
 
				-
			
 
				-        /// Return the current size of the archive.
			
 
				-        virtual uint size() const = 0;
			
 
				-
			
 
				-        /// Determine if there has been any error.
			
 
				-        virtual bool isError() const = 0;
			
 
				-
			
 
				-        /// Clear errors.
			
 
				-        virtual void clearError() = 0;
			
 
				-
			
 
				-        /// Return true if the stream is at the end.
			
 
				-        virtual bool isAtEnd() const = 0;
			
 
				-
			
 
				-        /// Return true if the stream is seekable.
			
 
				-        virtual bool isSeekable() const = 0;
			
 
				-
			
 
				-        /// Return true if this is an input stream.
			
 
				-        virtual bool isLoading() const = 0;
			
 
				-
			
 
				-        /// Return true if this is an output stream.
			
 
				-        virtual bool isSaving() const = 0;
			
 
				-
			
 
				-
			
 
				-        void advance(uint offset) { seek(tell() + offset); }
			
 
				-
			
 
				-
			
 
				-        // friends	
			
 
				-        friend Stream & operator<<( Stream & s, bool & c ) {
			
 
				-#if NV_OS_DARWIN && !NV_CC_CPP11
			
 
				-            nvStaticCheck(sizeof(bool) == 4);
			
 
				-            uint8 b = c ? 1 : 0;
			
 
				-            s.serialize( &b, 1 );
			
 
				-            c = (b == 1);
			
 
				-#else
			
 
				-            nvStaticCheck(sizeof(bool) == 1);
			
 
				-            s.serialize( &c, 1 );
			
 
				-#endif
			
 
				-            return s;
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, char & c ) {
			
 
				-            nvStaticCheck(sizeof(char) == 1);
			
 
				-            s.serialize( &c, 1 );
			
 
				-            return s;
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, uint8 & c ) {
			
 
				-            nvStaticCheck(sizeof(uint8) == 1);
			
 
				-            s.serialize( &c, 1 );
			
 
				-            return s;
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, int8 & c ) {
			
 
				-            nvStaticCheck(sizeof(int8) == 1);
			
 
				-            s.serialize( &c, 1 );
			
 
				-            return s;
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, uint16 & c ) {
			
 
				-            nvStaticCheck(sizeof(uint16) == 2);
			
 
				-            return s.byteOrderSerialize( &c, 2 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, int16 & c ) {
			
 
				-            nvStaticCheck(sizeof(int16) == 2);
			
 
				-            return s.byteOrderSerialize( &c, 2 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, uint32 & c ) {
			
 
				-            nvStaticCheck(sizeof(uint32) == 4);
			
 
				-            return s.byteOrderSerialize( &c, 4 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, int32 & c ) {
			
 
				-            nvStaticCheck(sizeof(int32) == 4);
			
 
				-            return s.byteOrderSerialize( &c, 4 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, uint64 & c ) {
			
 
				-            nvStaticCheck(sizeof(uint64) == 8);
			
 
				-            return s.byteOrderSerialize( &c, 8 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, int64 & c ) {
			
 
				-            nvStaticCheck(sizeof(int64) == 8);
			
 
				-            return s.byteOrderSerialize( &c, 8 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, float & c ) {
			
 
				-            nvStaticCheck(sizeof(float) == 4);
			
 
				-            return s.byteOrderSerialize( &c, 4 );
			
 
				-        }
			
 
				-        friend Stream & operator<<( Stream & s, double & c ) {
			
 
				-            nvStaticCheck(sizeof(double) == 8);
			
 
				-            return s.byteOrderSerialize( &c, 8 );
			
 
				-        }
			
 
				-
			
 
				-    protected:
			
 
				-
			
 
				-        /// Serialize in the stream byte order.
			
 
				-        Stream & byteOrderSerialize( void * v, uint len ) {
			
 
				-            if( m_byteOrder == getSystemByteOrder() ) {
			
 
				-                serialize( v, len );
			
 
				-            }
			
 
				-            else {
			
 
				-                for( uint i = len; i > 0; i-- ) {
			
 
				-                    serialize( (uint8 *)v + i - 1, 1 );
			
 
				-                }
			
 
				-            }
			
 
				-            return *this;
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-    private:
			
 
				-
			
 
				-        ByteOrder m_byteOrder;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_STREAM_H
			
--- a/3rdparty/nvtt/nvcore/strlib.h
+++ b/3rdparty/nvtt/nvcore/strlib.h
@@ -1,429 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_STRING_H
			
 
				-#define NV_CORE_STRING_H
			
 
				-
			
 
				-#include "debug.h"
			
 
				-#include "hash.h" // hash
			
 
				-
			
 
				-//#include <string.h> // strlen, etc.
			
 
				-
			
 
				-#if NV_OS_WIN32
			
 
				-#define NV_PATH_SEPARATOR '\\'
			
 
				-#else
			
 
				-#define NV_PATH_SEPARATOR '/'
			
 
				-#endif
			
 
				-
			
 
				-namespace nv
			
 
				-{
			
 
				-
			
 
				-    NVCORE_API uint strHash(const char * str, uint h) NV_PURE;
			
 
				-
			
 
				-    /// String hash based on Bernstein's hash.
			
 
				-    inline uint strHash(const char * data, uint h = 5381)
			
 
				-    {
			
 
				-        uint i = 0;
			
 
				-        while(data[i] != 0) {
			
 
				-            h = (33 * h) ^ uint(data[i]);
			
 
				-            i++;
			
 
				-        }
			
 
				-        return h;
			
 
				-    }
			
 
				-
			
 
				-    template <> struct Hash<const char *> {
			
 
				-        uint operator()(const char * str) const { return strHash(str); }
			
 
				-    };
			
 
				-
			
 
				-    NVCORE_API uint strLen(const char * str) NV_PURE;                       // Asserts on NULL strings.
			
 
				-
			
 
				-    NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE;       // Asserts on NULL strings.
			
 
				-    NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE;   // Asserts on NULL strings.
			
 
				-    NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE;     // Accepts NULL strings.
			
 
				-    NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
			
 
				-
			
 
				-    template <> struct Equal<const char *> {
			
 
				-        bool operator()(const char * a, const char * b) const { return strEqual(a, b); }
			
 
				-    };
			
 
				-
			
 
				-    NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE;
			
 
				-    NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE;
			
 
				-
			
 
				-
			
 
				-    NVCORE_API void strCpy(char * dst, uint size, const char * src);
			
 
				-    NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
			
 
				-    NVCORE_API void strCat(char * dst, uint size, const char * src);
			
 
				-
			
 
				-    NVCORE_API const char * strSkipWhiteSpace(const char * str);
			
 
				-    NVCORE_API char * strSkipWhiteSpace(char * str);
			
 
				-
			
 
				-    NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
			
 
				-
			
 
				-    NVCORE_API bool isNumber(const char * str) NV_PURE;
			
 
				-
			
 
				-    /* @@ Implement these two functions and modify StringBuilder to use them?
			
 
				-    NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
			
 
				-    NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
			
 
				-
			
 
				-    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
			
 
				-    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
			
 
				-        va_list args;
			
 
				-        va_start(args, fmt);
			
 
				-        strFormatList(buffer, count, fmt, args);
			
 
				-        va_end(args);
			
 
				-    }
			
 
				-    template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
			
 
				-        va_list tmp;
			
 
				-        va_copy(tmp, args);
			
 
				-        strFormatList(buffer, count, fmt, tmp);
			
 
				-        va_end(tmp);
			
 
				-    }*/
			
 
				-
			
 
				-    template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
			
 
				-        strCpy(buffer, count, src);
			
 
				-    }
			
 
				-
			
 
				-    template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
			
 
				-        strCat(buffer, count, src);
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-
			
 
				-    /// String builder.
			
 
				-    class NVCORE_CLASS StringBuilder
			
 
				-    {
			
 
				-    public:
			
 
				-
			
 
				-        StringBuilder();
			
 
				-        explicit StringBuilder( uint size_hint );
			
 
				-        StringBuilder(const char * str);
			
 
				-        StringBuilder(const char * str, uint len);
			
 
				-        StringBuilder(const StringBuilder & other);
			
 
				-
			
 
				-        ~StringBuilder();
			
 
				-
			
 
				-        StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
			
 
				-        StringBuilder & formatList( const char * format, va_list arg );
			
 
				-
			
 
				-        StringBuilder & append(const char * str);
			
 
				-		StringBuilder & append(const char * str, uint len);
			
 
				-        StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
			
 
				-        StringBuilder & appendFormatList(const char * format, va_list arg);
			
 
				-
			
 
				-        StringBuilder & appendSpace(uint n);
			
 
				-
			
 
				-        StringBuilder & number( int i, int base = 10 );
			
 
				-        StringBuilder & number( uint i, int base = 10 );
			
 
				-
			
 
				-        StringBuilder & reserve(uint size_hint);
			
 
				-        StringBuilder & copy(const char * str);
			
 
				-        StringBuilder & copy(const char * str, uint len);
			
 
				-        StringBuilder & copy(const StringBuilder & str);
			
 
				-
			
 
				-        StringBuilder & toLower();
			
 
				-        StringBuilder & toUpper();
			
 
				-
			
 
				-        bool endsWith(const char * str) const;
			
 
				-        bool beginsWith(const char * str) const;
			
 
				-
			
 
				-        char * reverseFind(char c);
			
 
				-
			
 
				-        void reset();
			
 
				-        bool isNull() const { return m_size == 0; }
			
 
				-
			
 
				-        // const char * accessors
			
 
				-        //operator const char * () const { return m_str; }
			
 
				-        //operator char * () { return m_str; }
			
 
				-        const char * str() const { return m_str; }
			
 
				-        char * str() { return m_str; }
			
 
				-
			
 
				-        char * release();
			
 
				-
			
 
				-        /// Implement value semantics.
			
 
				-        StringBuilder & operator=( const StringBuilder & s ) {
			
 
				-            return copy(s);
			
 
				-        }
			
 
				-
			
 
				-        /// Implement value semantics.
			
 
				-        StringBuilder & operator=( const char * s ) {
			
 
				-            return copy(s);
			
 
				-        }
			
 
				-
			
 
				-        /// Equal operator.
			
 
				-        bool operator==( const StringBuilder & s ) const {
			
 
				-            return strMatch(s.m_str, m_str);
			
 
				-        }
			
 
				-
			
 
				-        /// Return the exact length.
			
 
				-        uint length() const { return isNull() ? 0 : strLen(m_str); }
			
 
				-
			
 
				-        /// Return the size of the string container.
			
 
				-        uint capacity() const { return m_size; }
			
 
				-
			
 
				-        /// Return the hash of the string.
			
 
				-        uint hash() const { return isNull() ? 0 : strHash(m_str); }
			
 
				-
			
 
				-        // Swap strings.
			
 
				-        friend void swap(StringBuilder & a, StringBuilder & b);
			
 
				-
			
 
				-    protected:
			
 
				-
			
 
				-        /// Size of the string container.
			
 
				-        uint m_size;
			
 
				-
			
 
				-        /// String.
			
 
				-        char * m_str;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    /// Path string. @@ This should be called PathBuilder.
			
 
				-    class NVCORE_CLASS Path : public StringBuilder
			
 
				-    {
			
 
				-    public:
			
 
				-        Path() : StringBuilder() {}
			
 
				-        explicit Path(int size_hint) : StringBuilder(size_hint) {}
			
 
				-        Path(const char * str) : StringBuilder(str) {}
			
 
				-        Path(const Path & path) : StringBuilder(path) {}
			
 
				-
			
 
				-        const char * fileName() const;
			
 
				-        const char * extension() const;
			
 
				-
			
 
				-        void translatePath(char pathSeparator = NV_PATH_SEPARATOR);
			
 
				-
			
 
				-        void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR);
			
 
				-
			
 
				-        void stripFileName();
			
 
				-        void stripExtension();
			
 
				-
			
 
				-        // statics
			
 
				-        NVCORE_API static char separator();
			
 
				-        NVCORE_API static const char * fileName(const char *);
			
 
				-        NVCORE_API static const char * extension(const char *);
			
 
				-
			
 
				-        NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    /// String class.
			
 
				-    class NVCORE_CLASS String
			
 
				-    {
			
 
				-    public:
			
 
				-
			
 
				-        /// Constructs a null string. @sa isNull()
			
 
				-        String()
			
 
				-        {
			
 
				-            data = NULL;
			
 
				-        }
			
 
				-
			
 
				-        /// Constructs a shared copy of str.
			
 
				-        String(const String & str)
			
 
				-        {
			
 
				-            data = str.data;
			
 
				-            if (data != NULL) addRef();
			
 
				-        }
			
 
				-
			
 
				-        /// Constructs a shared string from a standard string.
			
 
				-        String(const char * str)
			
 
				-        {
			
 
				-            setString(str);
			
 
				-        }
			
 
				-
			
 
				-        /// Constructs a shared string from a standard string.
			
 
				-        String(const char * str, int length)
			
 
				-        {
			
 
				-            setString(str, length);
			
 
				-        }
			
 
				-
			
 
				-        /// Constructs a shared string from a StringBuilder.
			
 
				-        String(const StringBuilder & str)
			
 
				-        {
			
 
				-            setString(str);
			
 
				-        }
			
 
				-
			
 
				-        /// Dtor.
			
 
				-        ~String()
			
 
				-        {
			
 
				-            release();
			
 
				-        }
			
 
				-
			
 
				-        String clone() const;
			
 
				-
			
 
				-        /// Release the current string and allocate a new one.
			
 
				-        const String & operator=( const char * str )
			
 
				-        {
			
 
				-            release();
			
 
				-            setString( str );
			
 
				-            return *this;
			
 
				-        }
			
 
				-
			
 
				-        /// Release the current string and allocate a new one.
			
 
				-        const String & operator=( const StringBuilder & str )
			
 
				-        {
			
 
				-            release();
			
 
				-            setString( str );
			
 
				-            return *this;
			
 
				-        }
			
 
				-
			
 
				-        /// Implement value semantics.
			
 
				-        String & operator=( const String & str )
			
 
				-        {
			
 
				-            if (str.data != data)
			
 
				-            {
			
 
				-                release();
			
 
				-                data = str.data;
			
 
				-                addRef();
			
 
				-            }
			
 
				-            return *this;
			
 
				-        }
			
 
				-
			
 
				-        /// Equal operator.
			
 
				-        bool operator==( const String & str ) const
			
 
				-        {
			
 
				-            return strMatch(str.data, data);
			
 
				-        }
			
 
				-
			
 
				-        /// Equal operator.
			
 
				-        bool operator==( const char * str ) const
			
 
				-        {
			
 
				-            return strMatch(str, data);
			
 
				-        }
			
 
				-
			
 
				-        /// Not equal operator.
			
 
				-        bool operator!=( const String & str ) const
			
 
				-        {
			
 
				-            return !strMatch(str.data, data);
			
 
				-        }
			
 
				-
			
 
				-        /// Not equal operator.
			
 
				-        bool operator!=( const char * str ) const
			
 
				-        {
			
 
				-            return !strMatch(str, data);
			
 
				-        }
			
 
				-
			
 
				-        /// Returns true if this string is the null string.
			
 
				-        bool isNull() const { return data == NULL; }
			
 
				-
			
 
				-        /// Return the exact length.
			
 
				-        uint length() const { nvDebugCheck(data != NULL); return strLen(data); }
			
 
				-
			
 
				-        /// Return the hash of the string.
			
 
				-        uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
			
 
				-
			
 
				-        /// const char * cast operator.
			
 
				-        operator const char * () const { return data; }
			
 
				-
			
 
				-        /// Get string pointer.
			
 
				-        const char * str() const { return data; }
			
 
				-
			
 
				-
			
 
				-    private:
			
 
				-
			
 
				-        // Add reference count.
			
 
				-        void addRef();
			
 
				-
			
 
				-        // Decrease reference count.
			
 
				-        void release();
			
 
				-
			
 
				-        uint16 getRefCount() const
			
 
				-        {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            return *reinterpret_cast<const uint16 *>(data - 2);
			
 
				-        }
			
 
				-
			
 
				-        void setRefCount(uint16 count) {
			
 
				-            nvDebugCheck(data != NULL);
			
 
				-            nvCheck(count < 0xFFFF);
			
 
				-            *reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
			
 
				-        }
			
 
				-
			
 
				-        void setData(const char * str) {
			
 
				-            data = str + 2;
			
 
				-        }
			
 
				-
			
 
				-        void allocString(const char * str)
			
 
				-        {
			
 
				-            allocString(str, strLen(str));
			
 
				-        }
			
 
				-
			
 
				-        void allocString(const char * str, uint length);
			
 
				-
			
 
				-        void setString(const char * str);
			
 
				-        void setString(const char * str, uint length);
			
 
				-        void setString(const StringBuilder & str);
			
 
				-
			
 
				-        // Swap strings.
			
 
				-        friend void swap(String & a, String & b);
			
 
				-
			
 
				-    private:
			
 
				-
			
 
				-        const char * data;
			
 
				-
			
 
				-    };
			
 
				-
			
 
				-    template <> struct Hash<String> {
			
 
				-        uint operator()(const String & str) const { return str.hash(); }
			
 
				-    };
			
 
				-
			
 
				-
			
 
				-    // Like AutoPtr, but for const char strings.
			
 
				-    class AutoString
			
 
				-    {
			
 
				-        NV_FORBID_COPY(AutoString);
			
 
				-        NV_FORBID_HEAPALLOC();
			
 
				-    public:
			
 
				-
			
 
				-        // Ctor.
			
 
				-        AutoString(const char * p = NULL) : m_ptr(p) { }
			
 
				-
			
 
				-#if NV_CC_CPP11
			
 
				-        // Move ctor.
			
 
				-        AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
			
 
				-#endif
			
 
				-        
			
 
				-        // Dtor. Deletes owned pointer.
			
 
				-        ~AutoString() {
			
 
				-            delete [] m_ptr;
			
 
				-            m_ptr = NULL;
			
 
				-        }
			
 
				-
			
 
				-        // Delete owned pointer and assign new one.
			
 
				-        void operator=(const char * p) {
			
 
				-            if (p != m_ptr) 
			
 
				-            {
			
 
				-                delete [] m_ptr;
			
 
				-                m_ptr = p;
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        // Get pointer.
			
 
				-        const char * ptr() const { return m_ptr; }
			
 
				-        operator const char *() const { return m_ptr; }
			
 
				-
			
 
				-        // Relinquish ownership of the underlying pointer and returns that pointer.
			
 
				-        const char * release() {
			
 
				-            const char * tmp = m_ptr;
			
 
				-            m_ptr = NULL;
			
 
				-            return tmp;
			
 
				-        }
			
 
				-
			
 
				-        // comparison operators.
			
 
				-        friend bool operator == (const AutoString & ap, const char * const p) {
			
 
				-            return (ap.ptr() == p);
			
 
				-        }
			
 
				-        friend bool operator != (const AutoString & ap, const char * const p) {
			
 
				-            return (ap.ptr() != p);
			
 
				-        }
			
 
				-        friend bool operator == (const char * const p, const AutoString & ap) {
			
 
				-            return (ap.ptr() == p);
			
 
				-        }
			
 
				-        friend bool operator != (const char * const p, const AutoString & ap) {
			
 
				-            return (ap.ptr() != p);
			
 
				-        }
			
 
				-
			
 
				-    private:
			
 
				-        const char * m_ptr;
			
 
				-    };
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_STRING_H
			
--- a/3rdparty/nvtt/nvcore/utils.h
+++ b/3rdparty/nvtt/nvcore/utils.h
@@ -1,281 +0,0 @@
 
				-// This code is in the public domain -- Ignacio Castaño <[email protected]>
			
 
				-
			
 
				-#ifndef NV_CORE_UTILS_H
			
 
				-#define NV_CORE_UTILS_H
			
 
				-
			
 
				-#include "debug.h" // nvdebugcheck
			
 
				-
			
 
				-#include <new> // for placement new
			
 
				-
			
 
				-
			
 
				-// Just in case. Grrr.
			
 
				-#undef min
			
 
				-#undef max
			
 
				-
			
 
				-#define NV_INT8_MIN    (-128)
			
 
				-#define NV_INT8_MAX    127
			
 
				-#define NV_UINT8_MAX    255
			
 
				-#define NV_INT16_MIN    (-32767-1)
			
 
				-#define NV_INT16_MAX    32767
			
 
				-#define NV_UINT16_MAX   0xffff
			
 
				-#define NV_INT32_MIN    (-2147483647-1)
			
 
				-#define NV_INT32_MAX    2147483647
			
 
				-#define NV_UINT32_MAX   0xffffffff
			
 
				-#define NV_INT64_MAX    POSH_I64(9223372036854775807)
			
 
				-#define NV_INT64_MIN    (-POSH_I64(9223372036854775807)-1)
			
 
				-#define NV_UINT64_MAX   POSH_U64(0xffffffffffffffff)
			
 
				-
			
 
				-#define NV_HALF_MAX     65504.0F
			
 
				-#define NV_FLOAT_MAX    3.402823466e+38F
			
 
				-
			
 
				-#define NV_INTEGER_TO_FLOAT_MAX  16777217     // Largest integer such that it and all smaller integers can be stored in a 32bit float.
			
 
				-
			
 
				-
			
 
				-namespace nv
			
 
				-{
			
 
				-    // Less error prone than casting. From CB:
			
 
				-    // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
			
 
				-
			
 
				-    // These intentionally look like casts.
			
 
				-
			
 
				-    // uint32 casts:
			
 
				-    template <typename T> inline uint32 U32(T x) { return x; }
			
 
				-    template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
			
 
				-    template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
			
 
				-    //template <> inline uint32 U32<uint32>(uint32 x) { return x; }
			
 
				-    template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
			
 
				-    //template <> inline uint32 U32<uint16>(uint16 x) { return x; }
			
 
				-    template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
			
 
				-    //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
			
 
				-    template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
			
 
				-
			
 
				-    // int32 casts:
			
 
				-    template <typename T> inline int32 I32(T x) { return x; }
			
 
				-    template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
			
 
				-    template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
			
 
				-    template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
			
 
				-    //template <> inline int32 I32<int32>(int32 x) { return x; }
			
 
				-    //template <> inline int32 I32<uint16>(uint16 x) { return x; }
			
 
				-    //template <> inline int32 I32<int16>(int16 x) { return x; }
			
 
				-    //template <> inline int32 I32<uint8>(uint8 x) { return x; }
			
 
				-    //template <> inline int32 I32<int8>(int8 x) { return x; }
			
 
				-
			
 
				-    // uint16 casts:
			
 
				-    template <typename T> inline uint16 U16(T x) { return x; }
			
 
				-    template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
			
 
				-    template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
			
 
				-    template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
			
 
				-    template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
			
 
				-    //template <> inline uint16 U16<uint16>(uint16 x) { return x; }
			
 
				-    template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
			
 
				-    //template <> inline uint16 U16<uint8>(uint8 x) { return x; }
			
 
				-    template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
			
 
				-
			
 
				-    // int16 casts:
			
 
				-    template <typename T> inline int16 I16(T x) { return x; }
			
 
				-    template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
			
 
				-    template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
			
 
				-    template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
			
 
				-    template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
			
 
				-    template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
			
 
				-    //template <> inline int16 I16<int16>(int16 x) { return x; }
			
 
				-    //template <> inline int16 I16<uint8>(uint8 x) { return x; }
			
 
				-    //template <> inline int16 I16<int8>(int8 x) { return x; }
			
 
				-
			
 
				-    // uint8 casts:
			
 
				-    template <typename T> inline uint8 U8(T x) { return x; }
			
 
				-    template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
			
 
				-    template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
			
 
				-    template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
			
 
				-    template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
			
 
				-    template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
			
 
				-    template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
			
 
				-    //template <> inline uint8 U8<uint8>(uint8 x) { return x; }
			
 
				-    template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
			
 
				-    //template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
			
 
				-
			
 
				-    // int8 casts:
			
 
				-    template <typename T> inline int8 I8(T x) { return x; }
			
 
				-    template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
			
 
				-    template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
			
 
				-    template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
			
 
				-    template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
			
 
				-    template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
			
 
				-    template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
			
 
				-    template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
			
 
				-    //template <> inline int8 I8<int8>(int8 x) { return x; }
			
 
				-
			
 
				-    // float casts:
			
 
				-    template <typename T> inline float F32(T x) { return x; }
			
 
				-    template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
			
 
				-    template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
			
 
				-    template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
			
 
				-    template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
			
 
				-    // The compiler should not complain about these conversions:
			
 
				-    //template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
			
 
				-    //template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
			
 
				-    //template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
			
 
				-    //template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
			
 
				-
			
 
				-
			
 
				-    /// Swap two values.
			
 
				-    template <typename T> 
			
 
				-    inline void swap(T & a, T & b)
			
 
				-    {
			
 
				-        T temp(a);
			
 
				-        a = b; 
			
 
				-        b = temp;
			
 
				-    }
			
 
				-
			
 
				-    /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
			
 
				-    template <typename T> 
			
 
				-    //inline const T & max(const T & a, const T & b)
			
 
				-    inline T max(const T & a, const T & b)
			
 
				-    {
			
 
				-        return (b < a) ? a : b;
			
 
				-    }
			
 
				-
			
 
				-	/// Return the maximum of the four arguments.
			
 
				-	template <typename T> 
			
 
				-	//inline const T & max4(const T & a, const T & b, const T & c)
			
 
				-	inline T max4(const T & a, const T & b, const T & c, const T & d)
			
 
				-	{
			
 
				-		return max(max(a, b), max(c, d));
			
 
				-	}
			
 
				-
			
 
				-    /// Return the maximum of the three arguments.
			
 
				-    template <typename T> 
			
 
				-    //inline const T & max3(const T & a, const T & b, const T & c)
			
 
				-    inline T max3(const T & a, const T & b, const T & c)
			
 
				-    {
			
 
				-        return max(a, max(b, c));
			
 
				-    }
			
 
				-
			
 
				-    /// Return the minimum of two values.
			
 
				-    template <typename T> 
			
 
				-    //inline const T & min(const T & a, const T & b)
			
 
				-    inline T min(const T & a, const T & b)
			
 
				-    {
			
 
				-        return (a < b) ? a : b;
			
 
				-    }
			
 
				-
			
 
				-    /// Return the maximum of the three arguments.
			
 
				-    template <typename T> 
			
 
				-    //inline const T & min3(const T & a, const T & b, const T & c)
			
 
				-    inline T min3(const T & a, const T & b, const T & c)
			
 
				-    {
			
 
				-        return min(a, min(b, c));
			
 
				-    }
			
 
				-
			
 
				-    /// Clamp between two values.
			
 
				-    template <typename T> 
			
 
				-    //inline const T & clamp(const T & x, const T & a, const T & b)
			
 
				-    inline T clamp(const T & x, const T & a, const T & b)
			
 
				-    {
			
 
				-        return min(max(x, a), b);
			
 
				-    }
			
 
				-
			
 
				-    /** Return the next power of two. 
			
 
				-    * @see http://graphics.stanford.edu/~seander/bithacks.html
			
 
				-    * @warning Behaviour for 0 is undefined.
			
 
				-    * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
			
 
				-    * @note nextPowerOfTwo(x) = 2 << log2(x-1)
			
 
				-    */
			
 
				-    inline uint nextPowerOfTwo( uint x )
			
 
				-    {
			
 
				-        nvDebugCheck( x != 0 );
			
 
				-#if 1	// On modern CPUs this is supposed to be as fast as using the bsr instruction.
			
 
				-        x--;
			
 
				-        x |= x >> 1;
			
 
				-        x |= x >> 2;
			
 
				-        x |= x >> 4;
			
 
				-        x |= x >> 8;
			
 
				-        x |= x >> 16;
			
 
				-        return x+1;	
			
 
				-#else
			
 
				-        uint p = 1;
			
 
				-        while( x > p ) {
			
 
				-            p += p;
			
 
				-        }
			
 
				-        return p;
			
 
				-#endif
			
 
				-    }
			
 
				-
			
 
				-    /// Return true if @a n is a power of two.
			
 
				-    inline bool isPowerOfTwo( uint n )
			
 
				-    {
			
 
				-        return (n & (n-1)) == 0;
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-    // @@ Move this to utils?
			
 
				-    /// Delete all the elements of a container.
			
 
				-    template <typename T>
			
 
				-    void deleteAll(T & container)
			
 
				-    {
			
 
				-        for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
			
 
				-        {
			
 
				-            delete container[i];
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-
			
 
				-    // @@ Specialize these methods for numeric, pointer, and pod types.
			
 
				-
			
 
				-    template <typename T>
			
 
				-    void construct_range(T * restrict ptr, uint new_size, uint old_size) {
			
 
				-        for (uint i = old_size; i < new_size; i++) {
			
 
				-            new(ptr+i) T; // placement new
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    template <typename T>
			
 
				-    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
			
 
				-        for (uint i = old_size; i < new_size; i++) {
			
 
				-            new(ptr+i) T(elem); // placement new
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    template <typename T>
			
 
				-    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
			
 
				-        for (uint i = old_size; i < new_size; i++) {
			
 
				-            new(ptr+i) T(src[i]); // placement new
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    template <typename T>
			
 
				-    void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
			
 
				-        for (uint i = new_size; i < old_size; i++) {
			
 
				-            (ptr+i)->~T(); // Explicit call to the destructor
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    template <typename T>
			
 
				-    void fill(T * restrict dst, uint count, const T & value) {
			
 
				-        for (uint i = 0; i < count; i++) {
			
 
				-            dst[i] = value;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    template <typename T>
			
 
				-    void copy_range(T * restrict dst, const T * restrict src, uint count) {
			
 
				-        for (uint i = 0; i < count; i++) {
			
 
				-            dst[i] = src[i];
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    template <typename T>
			
 
				-    bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
			
 
				-        for (uint i = begin; i < end; i++) {
			
 
				-            if (ptr[i] == element) {
			
 
				-                if (index != NULL) *index = i;
			
 
				-                return true;
			
 
				-            }
			
 
				-        }
			
 
				-        return false;
			
 
				-    }
			
 
				-
			
 
				-} // nv namespace
			
 
				-
			
 
				-#endif // NV_CORE_UTILS_H